mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-07 21:53:44 +00:00
- HW support updates:
- Add uncore support for Intel Comet Lake - Add RAPL support for Hygon Fam18h - Add Intel "IIO stack to PMON mapping" support on Skylake-SP CPUs, which enumerates per device performance counters via sysfs and enables the perf stat --iiostat functionality - Add support for Intel "Architectural LBRs", which generalized the model specific LBR hardware tracing feature into a model-independent, architected performance monitoring feature. Usage is mostly seamless to tooling, as the pre-existing LBR features are kept, but there's a couple of advantages under the hood, such as faster context-switching, faster LBR reads, cleaner exposure of LBR features to guest kernels, etc. ( Since architectural LBRs are supported via XSAVE, there's related changes to the x86 FPU code as well. ) - ftrace/perf updates: Add support to add a text poke event to record changes to kernel text (i.e. self-modifying code) in order to support tracers like Intel PT decoding through jump labels, kprobes and ftrace trampolines. - Misc cleanups, smaller fixes. Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAl8oAgcRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1gcSA/9EwKeLF03jkEXwzF/a/YhCxZXODH/klz/ 5D/Li+0HJy9TTVQWaSOxu31VcnWyAPER97aRjHohNMrAFKpAC4GwzxF2fjKUzzKJ eoWIgXvtlMM+nQb93UTB2+9Z3eHBEpKsqP8oc6qeXa74b2p3WfmvFRPBWFuzmOlH nb26F/Cu46HTEUfWvggU9flS0HpkdZ8X2Rt14sRwq5Gi2Wa/5+ygaksD+5nwRlGM r7jBrZBDTOGhy7HjrjpDPif056YU31giKmMQ/j17h1NaT3ciyXYSi0FuKEghDCII 2OFyH0wZ1vsp63GISosIKFLFoBmOd4He4/sKjdtOtnosan250t3/ZDH/7tw6Rq2V tf1o/dMbDmV9v0lAVBZO76Z74ZQbk3+TvFxyDwtBSQYBe2eVfNz0VY4YjSRlRIp0 1nIbJqiMLa7uquL2K4zZKapt7qsMaVqLO4YUVTzYPvv3luAqFLvC83a2+hapz4cs w4nET8lpWanUBK0hidQe1J6NPM4v1mnsvuZfM0p/QwKN9uvV5KoT6YJhRqfTy51g je+G80q0XqOH0H8x9iWuLiJe0G72UyhRqzSTxg+Cjj9cAhnsFPFLCNMWSVHqioLP JXGQiTp+6SQM6JDXkj5F8InsyT4KfzqizMSnAaH+6bsv9iQKDL4AbD7r92g6nbN9 PP43QQh23Fg= =4pKU -----END PGP SIGNATURE----- Merge tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull perf event updates from Ingo Molnar: "HW support updates: - Add uncore support for Intel Comet Lake - Add RAPL support for Hygon Fam18h - Add Intel "IIO stack to PMON mapping" support on Skylake-SP CPUs, which enumerates per device performance counters via sysfs and enables the perf stat --iiostat functionality - Add support for Intel "Architectural LBRs", which generalized the model specific LBR hardware tracing feature into a model-independent, architected performance monitoring feature. Usage is mostly seamless to tooling, as the pre-existing LBR features are kept, but there's a couple of advantages under the hood, such as faster context-switching, faster LBR reads, cleaner exposure of LBR features to guest kernels, etc. ( Since architectural LBRs are supported via XSAVE, there's related changes to the x86 FPU code as well. ) ftrace/perf updates: - Add support to add a text poke event to record changes to kernel text (i.e. self-modifying code) in order to support tracers like Intel PT decoding through jump labels, kprobes and ftrace trampolines. Misc cleanups, smaller fixes..." * tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits) perf/x86/rapl: Add Hygon Fam18h RAPL support kprobes: Remove unnecessary module_mutex locking from kprobe_optimizer() x86/perf: Fix a typo perf: <linux/perf_event.h>: drop a duplicated word perf/x86/intel/lbr: Support XSAVES for arch LBR read perf/x86/intel/lbr: Support XSAVES/XRSTORS for LBR context switch x86/fpu/xstate: Add helpers for LBR dynamic supervisor feature x86/fpu/xstate: Support dynamic supervisor feature for LBR x86/fpu: Use proper mask to replace full instruction mask perf/x86: Remove task_ctx_size perf/x86/intel/lbr: Create kmem_cache for the LBR context data perf/core: Use kmem_cache to allocate the PMU specific data perf/core: Factor out functions to allocate/free the task_ctx_data perf/x86/intel/lbr: Support Architectural LBR perf/x86/intel/lbr: Factor out intel_pmu_store_lbr perf/x86/intel/lbr: Factor out rdlbr_all() and wrlbr_all() perf/x86/intel/lbr: Mark the {rd,wr}lbr_{to,from} wrappers __always_inline perf/x86/intel/lbr: Unify the stored format of LBR information perf/x86/intel/lbr: Support LBR_CTL perf/x86: Expose CPUID enumeration bits for arch LBR ...
This commit is contained in:
commit
b34133fec8
33
Documentation/ABI/testing/sysfs-devices-mapping
Normal file
33
Documentation/ABI/testing/sysfs-devices-mapping
Normal file
@ -0,0 +1,33 @@
|
||||
What: /sys/devices/uncore_iio_x/dieX
|
||||
Date: February 2020
|
||||
Contact: Roman Sudarikov <roman.sudarikov@linux.intel.com>
|
||||
Description:
|
||||
Each IIO stack (PCIe root port) has its own IIO PMON block, so
|
||||
each dieX file (where X is die number) holds "Segment:Root Bus"
|
||||
for PCIe root port, which can be monitored by that IIO PMON
|
||||
block.
|
||||
For example, on 4-die Xeon platform with up to 6 IIO stacks per
|
||||
die and, therefore, 6 IIO PMON blocks per die, the mapping of
|
||||
IIO PMON block 0 exposes as the following:
|
||||
|
||||
$ ls /sys/devices/uncore_iio_0/die*
|
||||
-r--r--r-- /sys/devices/uncore_iio_0/die0
|
||||
-r--r--r-- /sys/devices/uncore_iio_0/die1
|
||||
-r--r--r-- /sys/devices/uncore_iio_0/die2
|
||||
-r--r--r-- /sys/devices/uncore_iio_0/die3
|
||||
|
||||
$ tail /sys/devices/uncore_iio_0/die*
|
||||
==> /sys/devices/uncore_iio_0/die0 <==
|
||||
0000:00
|
||||
==> /sys/devices/uncore_iio_0/die1 <==
|
||||
0000:40
|
||||
==> /sys/devices/uncore_iio_0/die2 <==
|
||||
0000:80
|
||||
==> /sys/devices/uncore_iio_0/die3 <==
|
||||
0000:c0
|
||||
|
||||
Which means:
|
||||
IIO PMU 0 on die 0 belongs to PCI RP on bus 0x00, domain 0x0000
|
||||
IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
|
||||
IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
|
||||
IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000
|
@ -71,10 +71,9 @@ u64 x86_perf_event_update(struct perf_event *event)
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int shift = 64 - x86_pmu.cntval_bits;
|
||||
u64 prev_raw_count, new_raw_count;
|
||||
int idx = hwc->idx;
|
||||
u64 delta;
|
||||
|
||||
if (idx == INTEL_PMC_IDX_FIXED_BTS)
|
||||
if (unlikely(!hwc->event_base))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@ -359,6 +358,7 @@ void x86_release_hardware(void)
|
||||
if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
|
||||
release_pmc_hardware();
|
||||
release_ds_buffers();
|
||||
release_lbr_buffers();
|
||||
mutex_unlock(&pmc_reserve_mutex);
|
||||
}
|
||||
}
|
||||
@ -1097,22 +1097,31 @@ static inline void x86_assign_hw_event(struct perf_event *event,
|
||||
struct cpu_hw_events *cpuc, int i)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx;
|
||||
|
||||
hwc->idx = cpuc->assign[i];
|
||||
idx = hwc->idx = cpuc->assign[i];
|
||||
hwc->last_cpu = smp_processor_id();
|
||||
hwc->last_tag = ++cpuc->tags[i];
|
||||
|
||||
if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
|
||||
switch (hwc->idx) {
|
||||
case INTEL_PMC_IDX_FIXED_BTS:
|
||||
case INTEL_PMC_IDX_FIXED_VLBR:
|
||||
hwc->config_base = 0;
|
||||
hwc->event_base = 0;
|
||||
} else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
|
||||
break;
|
||||
|
||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
|
||||
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
|
||||
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
|
||||
hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
|
||||
} else {
|
||||
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
|
||||
(idx - INTEL_PMC_IDX_FIXED);
|
||||
hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30;
|
||||
break;
|
||||
|
||||
default:
|
||||
hwc->config_base = x86_pmu_config_addr(hwc->idx);
|
||||
hwc->event_base = x86_pmu_event_addr(hwc->idx);
|
||||
hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1233,7 +1242,7 @@ int x86_perf_event_set_period(struct perf_event *event)
|
||||
s64 period = hwc->sample_period;
|
||||
int ret = 0, idx = hwc->idx;
|
||||
|
||||
if (idx == INTEL_PMC_IDX_FIXED_BTS)
|
||||
if (unlikely(!hwc->event_base))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@ -2363,7 +2372,6 @@ static struct pmu pmu = {
|
||||
|
||||
.event_idx = x86_pmu_event_idx,
|
||||
.sched_task = x86_pmu_sched_task,
|
||||
.task_ctx_size = sizeof(struct x86_perf_task_context),
|
||||
.swap_task_ctx = x86_pmu_swap_task_ctx,
|
||||
.check_period = x86_pmu_check_period,
|
||||
|
||||
|
@ -2136,8 +2136,35 @@ static inline void intel_pmu_ack_status(u64 ack)
|
||||
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
|
||||
}
|
||||
|
||||
static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
|
||||
static inline bool event_is_checkpointed(struct perf_event *event)
|
||||
{
|
||||
return unlikely(event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
|
||||
}
|
||||
|
||||
static inline void intel_set_masks(struct perf_event *event, int idx)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (event->attr.exclude_host)
|
||||
__set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
|
||||
if (event->attr.exclude_guest)
|
||||
__set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
|
||||
if (event_is_checkpointed(event))
|
||||
__set_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
|
||||
}
|
||||
|
||||
static inline void intel_clear_masks(struct perf_event *event, int idx)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
__clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
|
||||
__clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
|
||||
__clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
|
||||
}
|
||||
|
||||
static void intel_pmu_disable_fixed(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
|
||||
u64 ctrl_val, mask;
|
||||
|
||||
@ -2148,30 +2175,22 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
|
||||
wrmsrl(hwc->config_base, ctrl_val);
|
||||
}
|
||||
|
||||
static inline bool event_is_checkpointed(struct perf_event *event)
|
||||
{
|
||||
return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
|
||||
}
|
||||
|
||||
static void intel_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
|
||||
if (idx < INTEL_PMC_IDX_FIXED) {
|
||||
intel_clear_masks(event, idx);
|
||||
x86_pmu_disable_event(event);
|
||||
} else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
|
||||
intel_clear_masks(event, idx);
|
||||
intel_pmu_disable_fixed(event);
|
||||
} else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
|
||||
intel_pmu_disable_bts();
|
||||
intel_pmu_drain_bts_buffer();
|
||||
return;
|
||||
}
|
||||
|
||||
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
|
||||
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
|
||||
cpuc->intel_cp_status &= ~(1ull << hwc->idx);
|
||||
|
||||
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
|
||||
intel_pmu_disable_fixed(hwc);
|
||||
else
|
||||
x86_pmu_disable_event(event);
|
||||
} else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
|
||||
intel_clear_masks(event, idx);
|
||||
|
||||
/*
|
||||
* Needs to be called after x86_pmu_disable_event,
|
||||
@ -2238,33 +2257,23 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
|
||||
static void intel_pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
|
||||
if (!__this_cpu_read(cpu_hw_events.enabled))
|
||||
return;
|
||||
|
||||
intel_pmu_enable_bts(hwc->config);
|
||||
return;
|
||||
}
|
||||
|
||||
if (event->attr.exclude_host)
|
||||
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
|
||||
if (event->attr.exclude_guest)
|
||||
cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
|
||||
|
||||
if (unlikely(event_is_checkpointed(event)))
|
||||
cpuc->intel_cp_status |= (1ull << hwc->idx);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (unlikely(event->attr.precise_ip))
|
||||
intel_pmu_pebs_enable(event);
|
||||
|
||||
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
|
||||
if (idx < INTEL_PMC_IDX_FIXED) {
|
||||
intel_set_masks(event, idx);
|
||||
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
|
||||
} else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
|
||||
intel_set_masks(event, idx);
|
||||
intel_pmu_enable_fixed(event);
|
||||
return;
|
||||
}
|
||||
|
||||
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
|
||||
} else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
|
||||
if (!__this_cpu_read(cpu_hw_events.enabled))
|
||||
return;
|
||||
intel_pmu_enable_bts(hwc->config);
|
||||
} else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
|
||||
intel_set_masks(event, idx);
|
||||
}
|
||||
|
||||
static void intel_pmu_add_event(struct perf_event *event)
|
||||
@ -2614,6 +2623,20 @@ intel_bts_constraints(struct perf_event *event)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: matches a fake event, like Fixed2.
|
||||
*/
|
||||
static struct event_constraint *
|
||||
intel_vlbr_constraints(struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c = &vlbr_constraint;
|
||||
|
||||
if (unlikely(constraint_match(c, event->hw.config)))
|
||||
return c;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int intel_alt_er(int idx, u64 config)
|
||||
{
|
||||
int alt_idx = idx;
|
||||
@ -2804,6 +2827,10 @@ __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
{
|
||||
struct event_constraint *c;
|
||||
|
||||
c = intel_vlbr_constraints(event);
|
||||
if (c)
|
||||
return c;
|
||||
|
||||
c = intel_bts_constraints(event);
|
||||
if (c)
|
||||
return c;
|
||||
@ -3951,6 +3978,11 @@ static __initconst const struct x86_pmu core_pmu = {
|
||||
.cpu_dead = intel_pmu_cpu_dead,
|
||||
|
||||
.check_period = intel_pmu_check_period,
|
||||
|
||||
.lbr_reset = intel_pmu_lbr_reset_64,
|
||||
.lbr_read = intel_pmu_lbr_read_64,
|
||||
.lbr_save = intel_pmu_lbr_save,
|
||||
.lbr_restore = intel_pmu_lbr_restore,
|
||||
};
|
||||
|
||||
static __initconst const struct x86_pmu intel_pmu = {
|
||||
@ -3996,6 +4028,11 @@ static __initconst const struct x86_pmu intel_pmu = {
|
||||
.check_period = intel_pmu_check_period,
|
||||
|
||||
.aux_output_match = intel_pmu_aux_output_match,
|
||||
|
||||
.lbr_reset = intel_pmu_lbr_reset_64,
|
||||
.lbr_read = intel_pmu_lbr_read_64,
|
||||
.lbr_save = intel_pmu_lbr_save,
|
||||
.lbr_restore = intel_pmu_lbr_restore,
|
||||
};
|
||||
|
||||
static __init void intel_clovertown_quirk(void)
|
||||
@ -4622,6 +4659,14 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.intel_cap.capabilities = capabilities;
|
||||
}
|
||||
|
||||
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) {
|
||||
x86_pmu.lbr_reset = intel_pmu_lbr_reset_32;
|
||||
x86_pmu.lbr_read = intel_pmu_lbr_read_32;
|
||||
}
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
|
||||
intel_pmu_arch_lbr_init();
|
||||
|
||||
intel_ds_init();
|
||||
|
||||
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
|
||||
|
@ -954,7 +954,7 @@ static void adaptive_pebs_record_size_update(void)
|
||||
if (pebs_data_cfg & PEBS_DATACFG_XMMS)
|
||||
sz += sizeof(struct pebs_xmm);
|
||||
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
|
||||
sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry);
|
||||
sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
|
||||
|
||||
cpuc->pebs_record_size = sz;
|
||||
}
|
||||
@ -1595,10 +1595,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
}
|
||||
|
||||
if (format_size & PEBS_DATACFG_LBRS) {
|
||||
struct pebs_lbr *lbr = next_record;
|
||||
struct lbr_entry *lbr = next_record;
|
||||
int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
|
||||
& 0xff) + 1;
|
||||
next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry);
|
||||
next_record = next_record + num_lbr * sizeof(struct lbr_entry);
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
intel_pmu_store_pebs_lbrs(lbr);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -16,7 +16,7 @@ struct pci_driver *uncore_pci_driver;
|
||||
DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
|
||||
struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
|
||||
struct pci_extra_dev *uncore_extra_pci_dev;
|
||||
static int max_dies;
|
||||
int __uncore_max_dies;
|
||||
|
||||
/* mask of cpus that collect uncore events */
|
||||
static cpumask_t uncore_cpu_mask;
|
||||
@ -108,7 +108,7 @@ struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu
|
||||
* The unsigned check also catches the '-1' return value for non
|
||||
* existent mappings in the topology map.
|
||||
*/
|
||||
return dieid < max_dies ? pmu->boxes[dieid] : NULL;
|
||||
return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
|
||||
}
|
||||
|
||||
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
|
||||
@ -132,6 +132,9 @@ u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
|
||||
if (!box->io_addr)
|
||||
return 0;
|
||||
|
||||
if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
|
||||
return 0;
|
||||
|
||||
return readq(box->io_addr + event->hw.event_base);
|
||||
}
|
||||
|
||||
@ -843,10 +846,12 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
|
||||
.read = uncore_pmu_event_read,
|
||||
.module = THIS_MODULE,
|
||||
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
|
||||
.attr_update = pmu->type->attr_update,
|
||||
};
|
||||
} else {
|
||||
pmu->pmu = *pmu->type->pmu;
|
||||
pmu->pmu.attr_groups = pmu->type->attr_groups;
|
||||
pmu->pmu.attr_update = pmu->type->attr_update;
|
||||
}
|
||||
|
||||
if (pmu->type->num_boxes == 1) {
|
||||
@ -877,7 +882,7 @@ static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
|
||||
{
|
||||
int die;
|
||||
|
||||
for (die = 0; die < max_dies; die++)
|
||||
for (die = 0; die < uncore_max_dies(); die++)
|
||||
kfree(pmu->boxes[die]);
|
||||
kfree(pmu->boxes);
|
||||
}
|
||||
@ -887,6 +892,9 @@ static void uncore_type_exit(struct intel_uncore_type *type)
|
||||
struct intel_uncore_pmu *pmu = type->pmus;
|
||||
int i;
|
||||
|
||||
if (type->cleanup_mapping)
|
||||
type->cleanup_mapping(type);
|
||||
|
||||
if (pmu) {
|
||||
for (i = 0; i < type->num_boxes; i++, pmu++) {
|
||||
uncore_pmu_unregister(pmu);
|
||||
@ -915,7 +923,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
|
||||
if (!pmus)
|
||||
return -ENOMEM;
|
||||
|
||||
size = max_dies * sizeof(struct intel_uncore_box *);
|
||||
size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
|
||||
|
||||
for (i = 0; i < type->num_boxes; i++) {
|
||||
pmus[i].func_id = setid ? i : -1;
|
||||
@ -954,6 +962,9 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
|
||||
|
||||
type->pmu_group = &uncore_pmu_attr_group;
|
||||
|
||||
if (type->set_mapping)
|
||||
type->set_mapping(type);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
@ -1112,7 +1123,7 @@ static int __init uncore_pci_init(void)
|
||||
size_t size;
|
||||
int ret;
|
||||
|
||||
size = max_dies * sizeof(struct pci_extra_dev);
|
||||
size = uncore_max_dies() * sizeof(struct pci_extra_dev);
|
||||
uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
|
||||
if (!uncore_extra_pci_dev) {
|
||||
ret = -ENOMEM;
|
||||
@ -1514,6 +1525,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &skl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &skl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &skl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &skl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init),
|
||||
@ -1539,7 +1552,8 @@ static int __init intel_uncore_init(void)
|
||||
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return -ENODEV;
|
||||
|
||||
max_dies = topology_max_packages() * topology_max_die_per_package();
|
||||
__uncore_max_dies =
|
||||
topology_max_packages() * topology_max_die_per_package();
|
||||
|
||||
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
|
||||
if (uncore_init->pci_init) {
|
||||
|
@ -61,6 +61,7 @@ struct intel_uncore_type {
|
||||
unsigned msr_offset;
|
||||
unsigned mmio_offset;
|
||||
};
|
||||
unsigned mmio_map_size;
|
||||
unsigned num_shared_regs:8;
|
||||
unsigned single_fixed:1;
|
||||
unsigned pair_ctr_ctl:1;
|
||||
@ -72,7 +73,19 @@ struct intel_uncore_type {
|
||||
struct uncore_event_desc *event_descs;
|
||||
struct freerunning_counters *freerunning;
|
||||
const struct attribute_group *attr_groups[4];
|
||||
const struct attribute_group **attr_update;
|
||||
struct pmu *pmu; /* for custom pmu ops */
|
||||
/*
|
||||
* Uncore PMU would store relevant platform topology configuration here
|
||||
* to identify which platform component each PMON block of that type is
|
||||
* supposed to monitor.
|
||||
*/
|
||||
u64 *topology;
|
||||
/*
|
||||
* Optional callbacks for managing mapping of Uncore units to PMONs
|
||||
*/
|
||||
int (*set_mapping)(struct intel_uncore_type *type);
|
||||
void (*cleanup_mapping)(struct intel_uncore_type *type);
|
||||
};
|
||||
|
||||
#define pmu_group attr_groups[0]
|
||||
@ -169,6 +182,18 @@ int uncore_pcibus_to_physid(struct pci_bus *bus);
|
||||
ssize_t uncore_event_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf);
|
||||
|
||||
static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev)
|
||||
{
|
||||
return container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu);
|
||||
}
|
||||
|
||||
#define to_device_attribute(n) container_of(n, struct device_attribute, attr)
|
||||
#define to_dev_ext_attribute(n) container_of(n, struct dev_ext_attribute, attr)
|
||||
#define attr_to_ext_attr(n) to_dev_ext_attribute(to_device_attribute(n))
|
||||
|
||||
extern int __uncore_max_dies;
|
||||
#define uncore_max_dies() (__uncore_max_dies)
|
||||
|
||||
#define INTEL_UNCORE_EVENT_DESC(_name, _config) \
|
||||
{ \
|
||||
.attr = __ATTR(_name, 0444, uncore_event_show, NULL), \
|
||||
@ -196,6 +221,18 @@ static inline bool uncore_pmc_freerunning(int idx)
|
||||
return idx == UNCORE_PMC_IDX_FREERUNNING;
|
||||
}
|
||||
|
||||
static inline bool uncore_mmio_is_valid_offset(struct intel_uncore_box *box,
|
||||
unsigned long offset)
|
||||
{
|
||||
if (offset < box->pmu->type->mmio_map_size)
|
||||
return true;
|
||||
|
||||
pr_warn_once("perf uncore: Invalid offset 0x%lx exceeds mapped area of %s.\n",
|
||||
offset, box->pmu->type->name);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned int uncore_mmio_box_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
|
@ -42,6 +42,17 @@
|
||||
#define PCI_DEVICE_ID_INTEL_WHL_UQ_IMC 0x3ed0
|
||||
#define PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC 0x3e34
|
||||
#define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35
|
||||
#define PCI_DEVICE_ID_INTEL_CML_H1_IMC 0x9b44
|
||||
#define PCI_DEVICE_ID_INTEL_CML_H2_IMC 0x9b54
|
||||
#define PCI_DEVICE_ID_INTEL_CML_H3_IMC 0x9b64
|
||||
#define PCI_DEVICE_ID_INTEL_CML_U1_IMC 0x9b51
|
||||
#define PCI_DEVICE_ID_INTEL_CML_U2_IMC 0x9b61
|
||||
#define PCI_DEVICE_ID_INTEL_CML_U3_IMC 0x9b71
|
||||
#define PCI_DEVICE_ID_INTEL_CML_S1_IMC 0x9b33
|
||||
#define PCI_DEVICE_ID_INTEL_CML_S2_IMC 0x9b43
|
||||
#define PCI_DEVICE_ID_INTEL_CML_S3_IMC 0x9b53
|
||||
#define PCI_DEVICE_ID_INTEL_CML_S4_IMC 0x9b63
|
||||
#define PCI_DEVICE_ID_INTEL_CML_S5_IMC 0x9b73
|
||||
#define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02
|
||||
#define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12
|
||||
#define PCI_DEVICE_ID_INTEL_TGL_U1_IMC 0x9a02
|
||||
@ -415,6 +426,7 @@ static const struct attribute_group snb_uncore_imc_format_group = {
|
||||
|
||||
static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
struct intel_uncore_type *type = box->pmu->type;
|
||||
struct pci_dev *pdev = box->pci_dev;
|
||||
int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
|
||||
resource_size_t addr;
|
||||
@ -430,7 +442,10 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
|
||||
|
||||
addr &= ~(PAGE_SIZE - 1);
|
||||
|
||||
box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
|
||||
box->io_addr = ioremap(addr, type->mmio_map_size);
|
||||
if (!box->io_addr)
|
||||
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
|
||||
|
||||
box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
|
||||
}
|
||||
|
||||
@ -586,6 +601,7 @@ static struct intel_uncore_type snb_uncore_imc = {
|
||||
.num_counters = 2,
|
||||
.num_boxes = 1,
|
||||
.num_freerunning_types = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX,
|
||||
.mmio_map_size = SNB_UNCORE_PCI_IMC_MAP_SIZE,
|
||||
.freerunning = snb_uncore_imc_freerunning,
|
||||
.event_descs = snb_uncore_imc_events,
|
||||
.format_group = &snb_uncore_imc_format_group,
|
||||
@ -771,6 +787,50 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H1_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H2_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_H3_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U1_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U2_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_U3_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S1_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S2_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S3_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S4_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CML_S5_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
@ -863,6 +923,17 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
|
||||
IMC_DEV(WHL_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */
|
||||
IMC_DEV(WHL_4_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */
|
||||
IMC_DEV(WHL_UD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Dual Core */
|
||||
IMC_DEV(CML_H1_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(CML_H2_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(CML_H3_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(CML_U1_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(CML_U2_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(CML_U3_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(CML_S1_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(CML_S2_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(CML_S3_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(CML_S4_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(CML_S5_IMC, &skl_uncore_pci_driver),
|
||||
IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */
|
||||
IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */
|
||||
{ /* end marker */ }
|
||||
@ -1085,11 +1156,13 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void)
|
||||
}
|
||||
|
||||
#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000
|
||||
#define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000
|
||||
|
||||
static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
struct pci_dev *pdev = tgl_uncore_get_mc_dev();
|
||||
struct intel_uncore_pmu *pmu = box->pmu;
|
||||
struct intel_uncore_type *type = pmu->type;
|
||||
resource_size_t addr;
|
||||
u32 mch_bar;
|
||||
|
||||
@ -1112,7 +1185,9 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
|
||||
addr |= ((resource_size_t)mch_bar << 32);
|
||||
#endif
|
||||
|
||||
box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
|
||||
box->io_addr = ioremap(addr, type->mmio_map_size);
|
||||
if (!box->io_addr)
|
||||
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
|
||||
}
|
||||
|
||||
static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
|
||||
@ -1138,6 +1213,7 @@ static struct intel_uncore_type tgl_uncore_imc_free_running = {
|
||||
.num_counters = 3,
|
||||
.num_boxes = 2,
|
||||
.num_freerunning_types = TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX,
|
||||
.mmio_map_size = TGL_UNCORE_PCI_IMC_MAP_SIZE,
|
||||
.freerunning = tgl_uncore_imc_freerunning,
|
||||
.ops = &tgl_uncore_imc_freerunning_ops,
|
||||
.event_descs = tgl_uncore_imc_events,
|
||||
|
@ -273,6 +273,30 @@
|
||||
#define SKX_CPUNODEID 0xc0
|
||||
#define SKX_GIDNIDMAP 0xd4
|
||||
|
||||
/*
|
||||
* The CPU_BUS_NUMBER MSR returns the values of the respective CPUBUSNO CSR
|
||||
* that BIOS programmed. MSR has package scope.
|
||||
* | Bit | Default | Description
|
||||
* | [63] | 00h | VALID - When set, indicates the CPU bus
|
||||
* numbers have been initialized. (RO)
|
||||
* |[62:48]| --- | Reserved
|
||||
* |[47:40]| 00h | BUS_NUM_5 — Return the bus number BIOS assigned
|
||||
* CPUBUSNO(5). (RO)
|
||||
* |[39:32]| 00h | BUS_NUM_4 — Return the bus number BIOS assigned
|
||||
* CPUBUSNO(4). (RO)
|
||||
* |[31:24]| 00h | BUS_NUM_3 — Return the bus number BIOS assigned
|
||||
* CPUBUSNO(3). (RO)
|
||||
* |[23:16]| 00h | BUS_NUM_2 — Return the bus number BIOS assigned
|
||||
* CPUBUSNO(2). (RO)
|
||||
* |[15:8] | 00h | BUS_NUM_1 — Return the bus number BIOS assigned
|
||||
* CPUBUSNO(1). (RO)
|
||||
* | [7:0] | 00h | BUS_NUM_0 — Return the bus number BIOS assigned
|
||||
* CPUBUSNO(0). (RO)
|
||||
*/
|
||||
#define SKX_MSR_CPU_BUS_NUMBER 0x300
|
||||
#define SKX_MSR_CPU_BUS_VALID_BIT (1ULL << 63)
|
||||
#define BUS_NUM_STRIDE 8
|
||||
|
||||
/* SKX CHA */
|
||||
#define SKX_CHA_MSR_PMON_BOX_FILTER_TID (0x1ffULL << 0)
|
||||
#define SKX_CHA_MSR_PMON_BOX_FILTER_LINK (0xfULL << 9)
|
||||
@ -3612,6 +3636,170 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
|
||||
.read_counter = uncore_msr_read_counter,
|
||||
};
|
||||
|
||||
static inline u8 skx_iio_stack(struct intel_uncore_pmu *pmu, int die)
|
||||
{
|
||||
return pmu->type->topology[die] >> (pmu->pmu_idx * BUS_NUM_STRIDE);
|
||||
}
|
||||
|
||||
static umode_t
|
||||
skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
|
||||
{
|
||||
struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj));
|
||||
|
||||
/* Root bus 0x00 is valid only for die 0 AND pmu_idx = 0. */
|
||||
return (!skx_iio_stack(pmu, die) && pmu->pmu_idx) ? 0 : attr->mode;
|
||||
}
|
||||
|
||||
static ssize_t skx_iio_mapping_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct pci_bus *bus = pci_find_next_bus(NULL);
|
||||
struct intel_uncore_pmu *uncore_pmu = dev_to_uncore_pmu(dev);
|
||||
struct dev_ext_attribute *ea = to_dev_ext_attribute(attr);
|
||||
long die = (long)ea->var;
|
||||
|
||||
/*
|
||||
* Current implementation is for single segment configuration hence it's
|
||||
* safe to take the segment value from the first available root bus.
|
||||
*/
|
||||
return sprintf(buf, "%04x:%02x\n", pci_domain_nr(bus),
|
||||
skx_iio_stack(uncore_pmu, die));
|
||||
}
|
||||
|
||||
static int skx_msr_cpu_bus_read(int cpu, u64 *topology)
|
||||
{
|
||||
u64 msr_value;
|
||||
|
||||
if (rdmsrl_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) ||
|
||||
!(msr_value & SKX_MSR_CPU_BUS_VALID_BIT))
|
||||
return -ENXIO;
|
||||
|
||||
*topology = msr_value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int die_to_cpu(int die)
|
||||
{
|
||||
int res = 0, cpu, current_die;
|
||||
/*
|
||||
* Using cpus_read_lock() to ensure cpu is not going down between
|
||||
* looking at cpu_online_mask.
|
||||
*/
|
||||
cpus_read_lock();
|
||||
for_each_online_cpu(cpu) {
|
||||
current_die = topology_logical_die_id(cpu);
|
||||
if (current_die == die) {
|
||||
res = cpu;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cpus_read_unlock();
|
||||
return res;
|
||||
}
|
||||
|
||||
static int skx_iio_get_topology(struct intel_uncore_type *type)
|
||||
{
|
||||
int i, ret;
|
||||
struct pci_bus *bus = NULL;
|
||||
|
||||
/*
|
||||
* Verified single-segment environments only; disabled for multiple
|
||||
* segment topologies for now except VMD domains.
|
||||
* VMD domains start at 0x10000 to not clash with ACPI _SEG domains.
|
||||
*/
|
||||
while ((bus = pci_find_next_bus(bus))
|
||||
&& (!pci_domain_nr(bus) || pci_domain_nr(bus) > 0xffff))
|
||||
;
|
||||
if (bus)
|
||||
return -EPERM;
|
||||
|
||||
type->topology = kcalloc(uncore_max_dies(), sizeof(u64), GFP_KERNEL);
|
||||
if (!type->topology)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < uncore_max_dies(); i++) {
|
||||
ret = skx_msr_cpu_bus_read(die_to_cpu(i), &type->topology[i]);
|
||||
if (ret) {
|
||||
kfree(type->topology);
|
||||
type->topology = NULL;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct attribute_group skx_iio_mapping_group = {
|
||||
.is_visible = skx_iio_mapping_visible,
|
||||
};
|
||||
|
||||
static const struct attribute_group *skx_iio_attr_update[] = {
|
||||
&skx_iio_mapping_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int skx_iio_set_mapping(struct intel_uncore_type *type)
|
||||
{
|
||||
char buf[64];
|
||||
int ret;
|
||||
long die = -1;
|
||||
struct attribute **attrs = NULL;
|
||||
struct dev_ext_attribute *eas = NULL;
|
||||
|
||||
ret = skx_iio_get_topology(type);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* One more for NULL. */
|
||||
attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL);
|
||||
if (!attrs)
|
||||
goto err;
|
||||
|
||||
eas = kcalloc(uncore_max_dies(), sizeof(*eas), GFP_KERNEL);
|
||||
if (!eas)
|
||||
goto err;
|
||||
|
||||
for (die = 0; die < uncore_max_dies(); die++) {
|
||||
sprintf(buf, "die%ld", die);
|
||||
sysfs_attr_init(&eas[die].attr.attr);
|
||||
eas[die].attr.attr.name = kstrdup(buf, GFP_KERNEL);
|
||||
if (!eas[die].attr.attr.name)
|
||||
goto err;
|
||||
eas[die].attr.attr.mode = 0444;
|
||||
eas[die].attr.show = skx_iio_mapping_show;
|
||||
eas[die].attr.store = NULL;
|
||||
eas[die].var = (void *)die;
|
||||
attrs[die] = &eas[die].attr.attr;
|
||||
}
|
||||
skx_iio_mapping_group.attrs = attrs;
|
||||
|
||||
return 0;
|
||||
err:
|
||||
for (; die >= 0; die--)
|
||||
kfree(eas[die].attr.attr.name);
|
||||
kfree(eas);
|
||||
kfree(attrs);
|
||||
kfree(type->topology);
|
||||
type->attr_update = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
|
||||
{
|
||||
struct attribute **attr = skx_iio_mapping_group.attrs;
|
||||
|
||||
if (!attr)
|
||||
return;
|
||||
|
||||
for (; *attr; attr++)
|
||||
kfree((*attr)->name);
|
||||
kfree(attr_to_ext_attr(*skx_iio_mapping_group.attrs));
|
||||
kfree(skx_iio_mapping_group.attrs);
|
||||
skx_iio_mapping_group.attrs = NULL;
|
||||
kfree(type->topology);
|
||||
}
|
||||
|
||||
static struct intel_uncore_type skx_uncore_iio = {
|
||||
.name = "iio",
|
||||
.num_counters = 4,
|
||||
@ -3626,6 +3814,9 @@ static struct intel_uncore_type skx_uncore_iio = {
|
||||
.constraints = skx_uncore_iio_constraints,
|
||||
.ops = &skx_uncore_iio_ops,
|
||||
.format_group = &skx_uncore_iio_format_group,
|
||||
.attr_update = skx_iio_attr_update,
|
||||
.set_mapping = skx_iio_set_mapping,
|
||||
.cleanup_mapping = skx_iio_cleanup_mapping,
|
||||
};
|
||||
|
||||
enum perf_uncore_iio_freerunning_type_id {
|
||||
@ -4421,6 +4612,7 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
|
||||
unsigned int box_ctl, int mem_offset)
|
||||
{
|
||||
struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid);
|
||||
struct intel_uncore_type *type = box->pmu->type;
|
||||
resource_size_t addr;
|
||||
u32 pci_dword;
|
||||
|
||||
@ -4435,9 +4627,11 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
|
||||
|
||||
addr += box_ctl;
|
||||
|
||||
box->io_addr = ioremap(addr, SNR_IMC_MMIO_SIZE);
|
||||
if (!box->io_addr)
|
||||
box->io_addr = ioremap(addr, type->mmio_map_size);
|
||||
if (!box->io_addr) {
|
||||
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
|
||||
return;
|
||||
}
|
||||
|
||||
writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr);
|
||||
}
|
||||
@ -4480,6 +4674,9 @@ static void snr_uncore_mmio_enable_event(struct intel_uncore_box *box,
|
||||
if (!box->io_addr)
|
||||
return;
|
||||
|
||||
if (!uncore_mmio_is_valid_offset(box, hwc->config_base))
|
||||
return;
|
||||
|
||||
writel(hwc->config | SNBEP_PMON_CTL_EN,
|
||||
box->io_addr + hwc->config_base);
|
||||
}
|
||||
@ -4492,6 +4689,9 @@ static void snr_uncore_mmio_disable_event(struct intel_uncore_box *box,
|
||||
if (!box->io_addr)
|
||||
return;
|
||||
|
||||
if (!uncore_mmio_is_valid_offset(box, hwc->config_base))
|
||||
return;
|
||||
|
||||
writel(hwc->config, box->io_addr + hwc->config_base);
|
||||
}
|
||||
|
||||
@ -4530,6 +4730,7 @@ static struct intel_uncore_type snr_uncore_imc = {
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL,
|
||||
.mmio_offset = SNR_IMC_MMIO_OFFSET,
|
||||
.mmio_map_size = SNR_IMC_MMIO_SIZE,
|
||||
.ops = &snr_uncore_mmio_ops,
|
||||
.format_group = &skx_uncore_format_group,
|
||||
};
|
||||
@ -4570,6 +4771,7 @@ static struct intel_uncore_type snr_uncore_imc_free_running = {
|
||||
.num_counters = 3,
|
||||
.num_boxes = 1,
|
||||
.num_freerunning_types = SNR_IMC_FREERUNNING_TYPE_MAX,
|
||||
.mmio_map_size = SNR_IMC_MMIO_SIZE,
|
||||
.freerunning = snr_imc_freerunning,
|
||||
.ops = &snr_uncore_imc_freerunning_ops,
|
||||
.event_descs = snr_uncore_imc_freerunning_events,
|
||||
@ -4987,6 +5189,7 @@ static struct intel_uncore_type icx_uncore_imc = {
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL,
|
||||
.mmio_offset = SNR_IMC_MMIO_OFFSET,
|
||||
.mmio_map_size = SNR_IMC_MMIO_SIZE,
|
||||
.ops = &icx_uncore_mmio_ops,
|
||||
.format_group = &skx_uncore_format_group,
|
||||
};
|
||||
@ -5044,6 +5247,7 @@ static struct intel_uncore_type icx_uncore_imc_free_running = {
|
||||
.num_counters = 5,
|
||||
.num_boxes = 4,
|
||||
.num_freerunning_types = ICX_IMC_FREERUNNING_TYPE_MAX,
|
||||
.mmio_map_size = SNR_IMC_MMIO_SIZE,
|
||||
.freerunning = icx_imc_freerunning,
|
||||
.ops = &icx_uncore_imc_freerunning_ops,
|
||||
.event_descs = icx_uncore_imc_freerunning_events,
|
||||
|
@ -78,6 +78,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
|
||||
#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
|
||||
#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
|
||||
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
|
||||
#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
|
||||
|
||||
struct amd_nb {
|
||||
int nb_id; /* NorthBridge id */
|
||||
@ -178,6 +179,17 @@ struct intel_excl_cntrs {
|
||||
struct x86_perf_task_context;
|
||||
#define MAX_LBR_ENTRIES 32
|
||||
|
||||
enum {
|
||||
LBR_FORMAT_32 = 0x00,
|
||||
LBR_FORMAT_LIP = 0x01,
|
||||
LBR_FORMAT_EIP = 0x02,
|
||||
LBR_FORMAT_EIP_FLAGS = 0x03,
|
||||
LBR_FORMAT_EIP_FLAGS2 = 0x04,
|
||||
LBR_FORMAT_INFO = 0x05,
|
||||
LBR_FORMAT_TIME = 0x06,
|
||||
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
|
||||
};
|
||||
|
||||
enum {
|
||||
X86_PERF_KFREE_SHARED = 0,
|
||||
X86_PERF_KFREE_EXCL = 1,
|
||||
@ -233,10 +245,15 @@ struct cpu_hw_events {
|
||||
int lbr_pebs_users;
|
||||
struct perf_branch_stack lbr_stack;
|
||||
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
|
||||
struct er_account *lbr_sel;
|
||||
union {
|
||||
struct er_account *lbr_sel;
|
||||
struct er_account *lbr_ctl;
|
||||
};
|
||||
u64 br_sel;
|
||||
struct x86_perf_task_context *last_task_ctx;
|
||||
void *last_task_ctx;
|
||||
int last_log_id;
|
||||
int lbr_select;
|
||||
void *lbr_xsave;
|
||||
|
||||
/*
|
||||
* Intel host/guest exclude bits
|
||||
@ -673,13 +690,37 @@ struct x86_pmu {
|
||||
/*
|
||||
* Intel LBR
|
||||
*/
|
||||
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
|
||||
int lbr_nr; /* hardware stack size */
|
||||
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
|
||||
const int *lbr_sel_map; /* lbr_select mappings */
|
||||
unsigned int lbr_tos, lbr_from, lbr_to,
|
||||
lbr_info, lbr_nr; /* LBR base regs and size */
|
||||
union {
|
||||
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
|
||||
u64 lbr_ctl_mask; /* LBR_CTL valid bits */
|
||||
};
|
||||
union {
|
||||
const int *lbr_sel_map; /* lbr_select mappings */
|
||||
int *lbr_ctl_map; /* LBR_CTL mappings */
|
||||
};
|
||||
bool lbr_double_abort; /* duplicated lbr aborts */
|
||||
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
|
||||
|
||||
/*
|
||||
* Intel Architectural LBR CPUID Enumeration
|
||||
*/
|
||||
unsigned int lbr_depth_mask:8;
|
||||
unsigned int lbr_deep_c_reset:1;
|
||||
unsigned int lbr_lip:1;
|
||||
unsigned int lbr_cpl:1;
|
||||
unsigned int lbr_filter:1;
|
||||
unsigned int lbr_call_stack:1;
|
||||
unsigned int lbr_mispred:1;
|
||||
unsigned int lbr_timed_lbr:1;
|
||||
unsigned int lbr_br_type:1;
|
||||
|
||||
void (*lbr_reset)(void);
|
||||
void (*lbr_read)(struct cpu_hw_events *cpuc);
|
||||
void (*lbr_save)(void *ctx);
|
||||
void (*lbr_restore)(void *ctx);
|
||||
|
||||
/*
|
||||
* Intel PT/LBR/BTS are exclusive
|
||||
*/
|
||||
@ -718,17 +759,46 @@ struct x86_pmu {
|
||||
int (*aux_output_match) (struct perf_event *event);
|
||||
};
|
||||
|
||||
struct x86_perf_task_context {
|
||||
u64 lbr_from[MAX_LBR_ENTRIES];
|
||||
u64 lbr_to[MAX_LBR_ENTRIES];
|
||||
u64 lbr_info[MAX_LBR_ENTRIES];
|
||||
int tos;
|
||||
int valid_lbrs;
|
||||
struct x86_perf_task_context_opt {
|
||||
int lbr_callstack_users;
|
||||
int lbr_stack_state;
|
||||
int log_id;
|
||||
};
|
||||
|
||||
struct x86_perf_task_context {
|
||||
u64 lbr_sel;
|
||||
int tos;
|
||||
int valid_lbrs;
|
||||
struct x86_perf_task_context_opt opt;
|
||||
struct lbr_entry lbr[MAX_LBR_ENTRIES];
|
||||
};
|
||||
|
||||
struct x86_perf_task_context_arch_lbr {
|
||||
struct x86_perf_task_context_opt opt;
|
||||
struct lbr_entry entries[];
|
||||
};
|
||||
|
||||
/*
|
||||
* Add padding to guarantee the 64-byte alignment of the state buffer.
|
||||
*
|
||||
* The structure is dynamically allocated. The size of the LBR state may vary
|
||||
* based on the number of LBR registers.
|
||||
*
|
||||
* Do not put anything after the LBR state.
|
||||
*/
|
||||
struct x86_perf_task_context_arch_lbr_xsave {
|
||||
struct x86_perf_task_context_opt opt;
|
||||
|
||||
union {
|
||||
struct xregs_state xsave;
|
||||
struct {
|
||||
struct fxregs_state i387;
|
||||
struct xstate_header header;
|
||||
struct arch_lbr_state lbr;
|
||||
} __attribute__ ((packed, aligned (XSAVE_ALIGNMENT)));
|
||||
};
|
||||
};
|
||||
|
||||
#define x86_add_quirk(func_) \
|
||||
do { \
|
||||
static struct x86_pmu_quirk __quirk __initdata = { \
|
||||
@ -777,6 +847,14 @@ static struct perf_pmu_events_ht_attr event_attr_##v = { \
|
||||
struct pmu *x86_get_pmu(void);
|
||||
extern struct x86_pmu x86_pmu __read_mostly;
|
||||
|
||||
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
|
||||
{
|
||||
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
|
||||
return &((struct x86_perf_task_context_arch_lbr *)ctx)->opt;
|
||||
|
||||
return &((struct x86_perf_task_context *)ctx)->opt;
|
||||
}
|
||||
|
||||
static inline bool x86_pmu_has_lbr_callstack(void)
|
||||
{
|
||||
return x86_pmu.lbr_sel_map &&
|
||||
@ -989,7 +1067,10 @@ void release_ds_buffers(void);
|
||||
|
||||
void reserve_ds_buffers(void);
|
||||
|
||||
void release_lbr_buffers(void);
|
||||
|
||||
extern struct event_constraint bts_constraint;
|
||||
extern struct event_constraint vlbr_constraint;
|
||||
|
||||
void intel_pmu_enable_bts(u64 config);
|
||||
|
||||
@ -1041,7 +1122,7 @@ void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
|
||||
void intel_pmu_auto_reload_read(struct perf_event *event);
|
||||
|
||||
void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
|
||||
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
|
||||
|
||||
void intel_ds_init(void);
|
||||
|
||||
@ -1054,6 +1135,10 @@ u64 lbr_from_signext_quirk_wr(u64 val);
|
||||
|
||||
void intel_pmu_lbr_reset(void);
|
||||
|
||||
void intel_pmu_lbr_reset_32(void);
|
||||
|
||||
void intel_pmu_lbr_reset_64(void);
|
||||
|
||||
void intel_pmu_lbr_add(struct perf_event *event);
|
||||
|
||||
void intel_pmu_lbr_del(struct perf_event *event);
|
||||
@ -1064,6 +1149,14 @@ void intel_pmu_lbr_disable_all(void);
|
||||
|
||||
void intel_pmu_lbr_read(void);
|
||||
|
||||
void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc);
|
||||
|
||||
void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc);
|
||||
|
||||
void intel_pmu_lbr_save(void *ctx);
|
||||
|
||||
void intel_pmu_lbr_restore(void *ctx);
|
||||
|
||||
void intel_pmu_lbr_init_core(void);
|
||||
|
||||
void intel_pmu_lbr_init_nhm(void);
|
||||
@ -1080,6 +1173,8 @@ void intel_pmu_lbr_init_skl(void);
|
||||
|
||||
void intel_pmu_lbr_init_knl(void);
|
||||
|
||||
void intel_pmu_arch_lbr_init(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_nhm(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_skl(bool pmem);
|
||||
@ -1115,6 +1210,10 @@ static inline void release_ds_buffers(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void release_lbr_buffers(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int intel_pmu_init(void)
|
||||
{
|
||||
return 0;
|
||||
|
@ -787,7 +787,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
|
||||
X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
|
||||
X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
|
||||
X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h),
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
|
||||
|
@ -1,6 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Zhoaxin PMU; like Intel Architectural PerfMon-v2
|
||||
* Zhaoxin PMU; like Intel Architectural PerfMon-v2
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
@ -366,6 +366,7 @@
|
||||
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
|
||||
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
|
||||
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
|
||||
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
|
||||
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
|
||||
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
|
||||
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
|
||||
|
@ -274,7 +274,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
|
||||
*/
|
||||
static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
|
||||
{
|
||||
u64 mask = -1;
|
||||
u64 mask = xfeatures_mask_all;
|
||||
u32 lmask = mask;
|
||||
u32 hmask = mask >> 32;
|
||||
int err;
|
||||
@ -320,7 +320,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
|
||||
*/
|
||||
static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
|
||||
{
|
||||
u64 mask = -1;
|
||||
u64 mask = xfeatures_mask_all;
|
||||
u32 lmask = mask;
|
||||
u32 hmask = mask >> 32;
|
||||
int err;
|
||||
@ -356,6 +356,9 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
|
||||
*/
|
||||
static inline int copy_xregs_to_user(struct xregs_state __user *buf)
|
||||
{
|
||||
u64 mask = xfeatures_mask_user();
|
||||
u32 lmask = mask;
|
||||
u32 hmask = mask >> 32;
|
||||
int err;
|
||||
|
||||
/*
|
||||
@ -367,7 +370,7 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf)
|
||||
return -EFAULT;
|
||||
|
||||
stac();
|
||||
XSTATE_OP(XSAVE, buf, -1, -1, err);
|
||||
XSTATE_OP(XSAVE, buf, lmask, hmask, err);
|
||||
clac();
|
||||
|
||||
return err;
|
||||
@ -408,43 +411,7 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* These must be called with preempt disabled. Returns
|
||||
* 'true' if the FPU state is still intact and we can
|
||||
* keep registers active.
|
||||
*
|
||||
* The legacy FNSAVE instruction cleared all FPU state
|
||||
* unconditionally, so registers are essentially destroyed.
|
||||
* Modern FPU state can be kept in registers, if there are
|
||||
* no pending FP exceptions.
|
||||
*/
|
||||
static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
|
||||
{
|
||||
if (likely(use_xsave())) {
|
||||
copy_xregs_to_kernel(&fpu->state.xsave);
|
||||
|
||||
/*
|
||||
* AVX512 state is tracked here because its use is
|
||||
* known to slow the max clock speed of the core.
|
||||
*/
|
||||
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
|
||||
fpu->avx512_timestamp = jiffies;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (likely(use_fxsr())) {
|
||||
copy_fxregs_to_kernel(fpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Legacy FPU register saving, FNSAVE always clears FPU registers,
|
||||
* so we have to mark them inactive:
|
||||
*/
|
||||
asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
|
||||
|
||||
return 0;
|
||||
}
|
||||
extern int copy_fpregs_to_fpstate(struct fpu *fpu);
|
||||
|
||||
static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
|
||||
{
|
||||
|
@ -114,6 +114,12 @@ enum xfeature {
|
||||
XFEATURE_Hi16_ZMM,
|
||||
XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
|
||||
XFEATURE_PKRU,
|
||||
XFEATURE_RSRVD_COMP_10,
|
||||
XFEATURE_RSRVD_COMP_11,
|
||||
XFEATURE_RSRVD_COMP_12,
|
||||
XFEATURE_RSRVD_COMP_13,
|
||||
XFEATURE_RSRVD_COMP_14,
|
||||
XFEATURE_LBR,
|
||||
|
||||
XFEATURE_MAX,
|
||||
};
|
||||
@ -128,6 +134,7 @@ enum xfeature {
|
||||
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
|
||||
#define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
|
||||
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
|
||||
#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
|
||||
|
||||
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
|
||||
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
|
||||
@ -229,6 +236,26 @@ struct pkru_state {
|
||||
u32 pad;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* State component 15: Architectural LBR configuration state.
|
||||
* The size of Arch LBR state depends on the number of LBRs (lbr_depth).
|
||||
*/
|
||||
|
||||
struct lbr_entry {
|
||||
u64 from;
|
||||
u64 to;
|
||||
u64 info;
|
||||
};
|
||||
|
||||
struct arch_lbr_state {
|
||||
u64 lbr_ctl;
|
||||
u64 lbr_depth;
|
||||
u64 ler_from;
|
||||
u64 ler_to;
|
||||
u64 ler_info;
|
||||
struct lbr_entry entries[];
|
||||
} __packed;
|
||||
|
||||
struct xstate_header {
|
||||
u64 xfeatures;
|
||||
u64 xcomp_bv;
|
||||
|
@ -21,6 +21,8 @@
|
||||
#define XSAVE_YMM_SIZE 256
|
||||
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
|
||||
|
||||
#define XSAVE_ALIGNMENT 64
|
||||
|
||||
/* All currently supported user features */
|
||||
#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
|
||||
XFEATURE_MASK_SSE | \
|
||||
@ -35,6 +37,27 @@
|
||||
/* All currently supported supervisor features */
|
||||
#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0)
|
||||
|
||||
/*
|
||||
* A supervisor state component may not always contain valuable information,
|
||||
* and its size may be huge. Saving/restoring such supervisor state components
|
||||
* at each context switch can cause high CPU and space overhead, which should
|
||||
* be avoided. Such supervisor state components should only be saved/restored
|
||||
* on demand. The on-demand dynamic supervisor features are set in this mask.
|
||||
*
|
||||
* Unlike the existing supported supervisor features, a dynamic supervisor
|
||||
* feature does not allocate a buffer in task->fpu, and the corresponding
|
||||
* supervisor state component cannot be saved/restored at each context switch.
|
||||
*
|
||||
* To support a dynamic supervisor feature, a developer should follow the
|
||||
* dos and don'ts as below:
|
||||
* - Do dynamically allocate a buffer for the supervisor state component.
|
||||
* - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the
|
||||
* state component to/from the buffer.
|
||||
* - Don't set the bit corresponding to the dynamic supervisor feature in
|
||||
* IA32_XSS at run time, since it has been set at boot time.
|
||||
*/
|
||||
#define XFEATURE_MASK_DYNAMIC (XFEATURE_MASK_LBR)
|
||||
|
||||
/*
|
||||
* Unsupported supervisor features. When a supervisor feature in this mask is
|
||||
* supported in the future, move it to the supported supervisor feature mask.
|
||||
@ -43,6 +66,7 @@
|
||||
|
||||
/* All supervisor states including supported and unsupported states. */
|
||||
#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
|
||||
XFEATURE_MASK_DYNAMIC | \
|
||||
XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -63,6 +87,14 @@ static inline u64 xfeatures_mask_user(void)
|
||||
return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
|
||||
}
|
||||
|
||||
static inline u64 xfeatures_mask_dynamic(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_ARCH_LBR))
|
||||
return XFEATURE_MASK_DYNAMIC & ~XFEATURE_MASK_LBR;
|
||||
|
||||
return XFEATURE_MASK_DYNAMIC;
|
||||
}
|
||||
|
||||
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
|
||||
|
||||
extern void __init update_regset_xstate_info(unsigned int size,
|
||||
@ -71,11 +103,15 @@ extern void __init update_regset_xstate_info(unsigned int size,
|
||||
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
|
||||
const void *get_xsave_field_ptr(int xfeature_nr);
|
||||
int using_compacted_format(void);
|
||||
int xfeature_size(int xfeature_nr);
|
||||
int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
|
||||
int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
|
||||
int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
|
||||
int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
|
||||
void copy_supervisor_to_kernel(struct xregs_state *xsave);
|
||||
void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask);
|
||||
void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask);
|
||||
|
||||
|
||||
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
|
||||
int validate_user_xstate_header(const struct xstate_header *hdr);
|
||||
|
@ -66,6 +66,8 @@ struct arch_specific_insn {
|
||||
*/
|
||||
bool boostable;
|
||||
bool if_modifier;
|
||||
/* Number of bytes of text poked */
|
||||
int tp_len;
|
||||
};
|
||||
|
||||
struct arch_optimized_insn {
|
||||
|
@ -158,7 +158,23 @@
|
||||
#define LBR_INFO_MISPRED BIT_ULL(63)
|
||||
#define LBR_INFO_IN_TX BIT_ULL(62)
|
||||
#define LBR_INFO_ABORT BIT_ULL(61)
|
||||
#define LBR_INFO_CYC_CNT_VALID BIT_ULL(60)
|
||||
#define LBR_INFO_CYCLES 0xffff
|
||||
#define LBR_INFO_BR_TYPE_OFFSET 56
|
||||
#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
|
||||
|
||||
#define MSR_ARCH_LBR_CTL 0x000014ce
|
||||
#define ARCH_LBR_CTL_LBREN BIT(0)
|
||||
#define ARCH_LBR_CTL_CPL_OFFSET 1
|
||||
#define ARCH_LBR_CTL_CPL (0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
|
||||
#define ARCH_LBR_CTL_STACK_OFFSET 3
|
||||
#define ARCH_LBR_CTL_STACK (0x1ull << ARCH_LBR_CTL_STACK_OFFSET)
|
||||
#define ARCH_LBR_CTL_FILTER_OFFSET 16
|
||||
#define ARCH_LBR_CTL_FILTER (0x7full << ARCH_LBR_CTL_FILTER_OFFSET)
|
||||
#define MSR_ARCH_LBR_DEPTH 0x000014cf
|
||||
#define MSR_ARCH_LBR_FROM_0 0x00001500
|
||||
#define MSR_ARCH_LBR_TO_0 0x00001600
|
||||
#define MSR_ARCH_LBR_INFO_0 0x00001200
|
||||
|
||||
#define MSR_IA32_PEBS_ENABLE 0x000003f1
|
||||
#define MSR_PEBS_DATA_CFG 0x000003f2
|
||||
|
@ -142,6 +142,46 @@ union cpuid10_edx {
|
||||
unsigned int full;
|
||||
};
|
||||
|
||||
/*
|
||||
* Intel Architectural LBR CPUID detection/enumeration details:
|
||||
*/
|
||||
union cpuid28_eax {
|
||||
struct {
|
||||
/* Supported LBR depth values */
|
||||
unsigned int lbr_depth_mask:8;
|
||||
unsigned int reserved:22;
|
||||
/* Deep C-state Reset */
|
||||
unsigned int lbr_deep_c_reset:1;
|
||||
/* IP values contain LIP */
|
||||
unsigned int lbr_lip:1;
|
||||
} split;
|
||||
unsigned int full;
|
||||
};
|
||||
|
||||
union cpuid28_ebx {
|
||||
struct {
|
||||
/* CPL Filtering Supported */
|
||||
unsigned int lbr_cpl:1;
|
||||
/* Branch Filtering Supported */
|
||||
unsigned int lbr_filter:1;
|
||||
/* Call-stack Mode Supported */
|
||||
unsigned int lbr_call_stack:1;
|
||||
} split;
|
||||
unsigned int full;
|
||||
};
|
||||
|
||||
union cpuid28_ecx {
|
||||
struct {
|
||||
/* Mispredict Bit Supported */
|
||||
unsigned int lbr_mispred:1;
|
||||
/* Timed LBRs Supported */
|
||||
unsigned int lbr_timed_lbr:1;
|
||||
/* Branch Type Field Supported */
|
||||
unsigned int lbr_br_type:1;
|
||||
} split;
|
||||
unsigned int full;
|
||||
};
|
||||
|
||||
struct x86_pmu_capability {
|
||||
int version;
|
||||
int num_counters_gp;
|
||||
@ -192,9 +232,29 @@ struct x86_pmu_capability {
|
||||
#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
|
||||
#define GLOBAL_STATUS_ASIF BIT_ULL(60)
|
||||
#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
|
||||
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58)
|
||||
#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58
|
||||
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
|
||||
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
|
||||
|
||||
/*
|
||||
* We model guest LBR event tracing as another fixed-mode PMC like BTS.
|
||||
*
|
||||
* We choose bit 58 because it's used to indicate LBR stack frozen state
|
||||
* for architectural perfmon v4, also we unconditionally mask that bit in
|
||||
* the handle_pmi_common(), so it'll never be set in the overflow handling.
|
||||
*
|
||||
* With this fake counter assigned, the guest LBR event user (such as KVM),
|
||||
* can program the LBR registers on its own, and we don't actually do anything
|
||||
* with then in the host context.
|
||||
*/
|
||||
#define INTEL_PMC_IDX_FIXED_VLBR (GLOBAL_STATUS_LBRS_FROZEN_BIT)
|
||||
|
||||
/*
|
||||
* Pseudo-encoding the guest LBR event as event=0x00,umask=0x1b,
|
||||
* since it would claim bit 58 which is effectively Fixed26.
|
||||
*/
|
||||
#define INTEL_FIXED_VLBR_EVENT 0x1b00
|
||||
|
||||
/*
|
||||
* Adaptive PEBS v4
|
||||
*/
|
||||
@ -222,14 +282,6 @@ struct pebs_xmm {
|
||||
u64 xmm[16*2]; /* two entries for each register */
|
||||
};
|
||||
|
||||
struct pebs_lbr_entry {
|
||||
u64 from, to, info;
|
||||
};
|
||||
|
||||
struct pebs_lbr {
|
||||
struct pebs_lbr_entry lbr[0]; /* Variable length */
|
||||
};
|
||||
|
||||
/*
|
||||
* IBS cpuid feature detection
|
||||
*/
|
||||
@ -333,6 +385,13 @@ struct perf_guest_switch_msr {
|
||||
u64 host, guest;
|
||||
};
|
||||
|
||||
struct x86_pmu_lbr {
|
||||
unsigned int nr;
|
||||
unsigned int from;
|
||||
unsigned int to;
|
||||
unsigned int info;
|
||||
};
|
||||
|
||||
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
|
||||
extern void perf_check_microcode(void);
|
||||
extern int x86_perf_rdpmc_index(struct perf_event *event);
|
||||
@ -348,12 +407,17 @@ static inline void perf_check_microcode(void) { }
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
|
||||
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
|
||||
extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
|
||||
#else
|
||||
static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
|
||||
{
|
||||
*nr = 0;
|
||||
return NULL;
|
||||
}
|
||||
static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_INTEL
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/stringify.h>
|
||||
@ -1001,6 +1002,7 @@ struct text_poke_loc {
|
||||
s32 rel32;
|
||||
u8 opcode;
|
||||
const u8 text[POKE_MAX_OPCODE_SIZE];
|
||||
u8 old;
|
||||
};
|
||||
|
||||
struct bp_patching_desc {
|
||||
@ -1168,8 +1170,10 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
|
||||
/*
|
||||
* First step: add a int3 trap to the address that will be patched.
|
||||
*/
|
||||
for (i = 0; i < nr_entries; i++)
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
tp[i].old = *(u8 *)text_poke_addr(&tp[i]);
|
||||
text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
|
||||
}
|
||||
|
||||
text_poke_sync();
|
||||
|
||||
@ -1177,14 +1181,45 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
|
||||
* Second step: update all but the first byte of the patched range.
|
||||
*/
|
||||
for (do_sync = 0, i = 0; i < nr_entries; i++) {
|
||||
u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
|
||||
int len = text_opcode_size(tp[i].opcode);
|
||||
|
||||
if (len - INT3_INSN_SIZE > 0) {
|
||||
memcpy(old + INT3_INSN_SIZE,
|
||||
text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
|
||||
len - INT3_INSN_SIZE);
|
||||
text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
|
||||
(const char *)tp[i].text + INT3_INSN_SIZE,
|
||||
len - INT3_INSN_SIZE);
|
||||
do_sync++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit a perf event to record the text poke, primarily to
|
||||
* support Intel PT decoding which must walk the executable code
|
||||
* to reconstruct the trace. The flow up to here is:
|
||||
* - write INT3 byte
|
||||
* - IPI-SYNC
|
||||
* - write instruction tail
|
||||
* At this point the actual control flow will be through the
|
||||
* INT3 and handler and not hit the old or new instruction.
|
||||
* Intel PT outputs FUP/TIP packets for the INT3, so the flow
|
||||
* can still be decoded. Subsequently:
|
||||
* - emit RECORD_TEXT_POKE with the new instruction
|
||||
* - IPI-SYNC
|
||||
* - write first byte
|
||||
* - IPI-SYNC
|
||||
* So before the text poke event timestamp, the decoder will see
|
||||
* either the old instruction flow or FUP/TIP of INT3. After the
|
||||
* text poke event timestamp, the decoder will see either the
|
||||
* new instruction flow or FUP/TIP of INT3. Thus decoders can
|
||||
* use the timestamp as the point at which to modify the
|
||||
* executable code.
|
||||
* The old instruction is recorded so that the event can be
|
||||
* processed forwards or backwards.
|
||||
*/
|
||||
perf_event_text_poke(text_poke_addr(&tp[i]), old, len,
|
||||
tp[i].text, len);
|
||||
}
|
||||
|
||||
if (do_sync) {
|
||||
|
@ -82,6 +82,45 @@ bool irq_fpu_usable(void)
|
||||
}
|
||||
EXPORT_SYMBOL(irq_fpu_usable);
|
||||
|
||||
/*
|
||||
* These must be called with preempt disabled. Returns
|
||||
* 'true' if the FPU state is still intact and we can
|
||||
* keep registers active.
|
||||
*
|
||||
* The legacy FNSAVE instruction cleared all FPU state
|
||||
* unconditionally, so registers are essentially destroyed.
|
||||
* Modern FPU state can be kept in registers, if there are
|
||||
* no pending FP exceptions.
|
||||
*/
|
||||
int copy_fpregs_to_fpstate(struct fpu *fpu)
|
||||
{
|
||||
if (likely(use_xsave())) {
|
||||
copy_xregs_to_kernel(&fpu->state.xsave);
|
||||
|
||||
/*
|
||||
* AVX512 state is tracked here because its use is
|
||||
* known to slow the max clock speed of the core.
|
||||
*/
|
||||
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
|
||||
fpu->avx512_timestamp = jiffies;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (likely(use_fxsr())) {
|
||||
copy_fxregs_to_kernel(fpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Legacy FPU register saving, FNSAVE always clears FPU registers,
|
||||
* so we have to mark them inactive:
|
||||
*/
|
||||
asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(copy_fpregs_to_fpstate);
|
||||
|
||||
void kernel_fpu_begin(void)
|
||||
{
|
||||
preempt_disable();
|
||||
|
@ -233,8 +233,10 @@ void fpu__init_cpu_xstate(void)
|
||||
/*
|
||||
* MSR_IA32_XSS sets supervisor states managed by XSAVES.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
||||
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES)) {
|
||||
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
|
||||
xfeatures_mask_dynamic());
|
||||
}
|
||||
}
|
||||
|
||||
static bool xfeature_enabled(enum xfeature xfeature)
|
||||
@ -486,7 +488,7 @@ static int xfeature_uncompacted_offset(int xfeature_nr)
|
||||
return ebx;
|
||||
}
|
||||
|
||||
static int xfeature_size(int xfeature_nr)
|
||||
int xfeature_size(int xfeature_nr)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
@ -598,7 +600,8 @@ static void check_xstate_against_struct(int nr)
|
||||
*/
|
||||
if ((nr < XFEATURE_YMM) ||
|
||||
(nr >= XFEATURE_MAX) ||
|
||||
(nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) {
|
||||
(nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
|
||||
((nr >= XFEATURE_RSRVD_COMP_10) && (nr <= XFEATURE_LBR))) {
|
||||
WARN_ONCE(1, "no structure for xstate: %d\n", nr);
|
||||
XSTATE_WARN_ON(1);
|
||||
}
|
||||
@ -847,8 +850,10 @@ void fpu__resume_cpu(void)
|
||||
* Restore IA32_XSS. The same CPUID bit enumerates support
|
||||
* of XSAVES and MSR_IA32_XSS.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
||||
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES)) {
|
||||
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
|
||||
xfeatures_mask_dynamic());
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1356,6 +1361,78 @@ void copy_supervisor_to_kernel(struct xregs_state *xstate)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* copy_dynamic_supervisor_to_kernel() - Save dynamic supervisor states to
|
||||
* an xsave area
|
||||
* @xstate: A pointer to an xsave area
|
||||
* @mask: Represent the dynamic supervisor features saved into the xsave area
|
||||
*
|
||||
* Only the dynamic supervisor states sets in the mask are saved into the xsave
|
||||
* area (See the comment in XFEATURE_MASK_DYNAMIC for the details of dynamic
|
||||
* supervisor feature). Besides the dynamic supervisor states, the legacy
|
||||
* region and XSAVE header are also saved into the xsave area. The supervisor
|
||||
* features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and
|
||||
* XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not saved.
|
||||
*
|
||||
* The xsave area must be 64-bytes aligned.
|
||||
*/
|
||||
void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask)
|
||||
{
|
||||
u64 dynamic_mask = xfeatures_mask_dynamic() & mask;
|
||||
u32 lmask, hmask;
|
||||
int err;
|
||||
|
||||
if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES)))
|
||||
return;
|
||||
|
||||
if (WARN_ON_FPU(!dynamic_mask))
|
||||
return;
|
||||
|
||||
lmask = dynamic_mask;
|
||||
hmask = dynamic_mask >> 32;
|
||||
|
||||
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
|
||||
|
||||
/* Should never fault when copying to a kernel buffer */
|
||||
WARN_ON_FPU(err);
|
||||
}
|
||||
|
||||
/**
|
||||
* copy_kernel_to_dynamic_supervisor() - Restore dynamic supervisor states from
|
||||
* an xsave area
|
||||
* @xstate: A pointer to an xsave area
|
||||
* @mask: Represent the dynamic supervisor features restored from the xsave area
|
||||
*
|
||||
* Only the dynamic supervisor states sets in the mask are restored from the
|
||||
* xsave area (See the comment in XFEATURE_MASK_DYNAMIC for the details of
|
||||
* dynamic supervisor feature). Besides the dynamic supervisor states, the
|
||||
* legacy region and XSAVE header are also restored from the xsave area. The
|
||||
* supervisor features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and
|
||||
* XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not restored.
|
||||
*
|
||||
* The xsave area must be 64-bytes aligned.
|
||||
*/
|
||||
void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask)
|
||||
{
|
||||
u64 dynamic_mask = xfeatures_mask_dynamic() & mask;
|
||||
u32 lmask, hmask;
|
||||
int err;
|
||||
|
||||
if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES)))
|
||||
return;
|
||||
|
||||
if (WARN_ON_FPU(!dynamic_mask))
|
||||
return;
|
||||
|
||||
lmask = dynamic_mask;
|
||||
hmask = dynamic_mask >> 32;
|
||||
|
||||
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
|
||||
|
||||
/* Should never fault when copying from a kernel buffer */
|
||||
WARN_ON_FPU(err);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||
/*
|
||||
* Report the amount of time elapsed in millisecond since last AVX512
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/extable.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kallsyms.h>
|
||||
@ -472,6 +473,9 @@ static int arch_copy_kprobe(struct kprobe *p)
|
||||
/* Also, displacement change doesn't affect the first byte */
|
||||
p->opcode = buf[0];
|
||||
|
||||
p->ainsn.tp_len = len;
|
||||
perf_event_text_poke(p->ainsn.insn, NULL, 0, buf, len);
|
||||
|
||||
/* OK, write back the instruction(s) into ROX insn buffer */
|
||||
text_poke(p->ainsn.insn, buf, len);
|
||||
|
||||
@ -503,12 +507,18 @@ int arch_prepare_kprobe(struct kprobe *p)
|
||||
|
||||
void arch_arm_kprobe(struct kprobe *p)
|
||||
{
|
||||
text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1);
|
||||
u8 int3 = INT3_INSN_OPCODE;
|
||||
|
||||
text_poke(p->addr, &int3, 1);
|
||||
text_poke_sync();
|
||||
perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1);
|
||||
}
|
||||
|
||||
void arch_disarm_kprobe(struct kprobe *p)
|
||||
{
|
||||
u8 int3 = INT3_INSN_OPCODE;
|
||||
|
||||
perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1);
|
||||
text_poke(p->addr, &p->opcode, 1);
|
||||
text_poke_sync();
|
||||
}
|
||||
@ -516,6 +526,9 @@ void arch_disarm_kprobe(struct kprobe *p)
|
||||
void arch_remove_kprobe(struct kprobe *p)
|
||||
{
|
||||
if (p->ainsn.insn) {
|
||||
/* Record the perf event before freeing the slot */
|
||||
perf_event_text_poke(p->ainsn.insn, p->ainsn.insn,
|
||||
p->ainsn.tp_len, NULL, 0);
|
||||
free_insn_slot(p->ainsn.insn, p->ainsn.boostable);
|
||||
p->ainsn.insn = NULL;
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
* Copyright (C) Hitachi Ltd., 2012
|
||||
*/
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/slab.h>
|
||||
@ -352,8 +353,15 @@ int arch_within_optimized_kprobe(struct optimized_kprobe *op,
|
||||
static
|
||||
void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
|
||||
{
|
||||
if (op->optinsn.insn) {
|
||||
free_optinsn_slot(op->optinsn.insn, dirty);
|
||||
u8 *slot = op->optinsn.insn;
|
||||
if (slot) {
|
||||
int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
|
||||
|
||||
/* Record the perf event before freeing the slot */
|
||||
if (dirty)
|
||||
perf_event_text_poke(slot, slot, len, NULL, 0);
|
||||
|
||||
free_optinsn_slot(slot, dirty);
|
||||
op->optinsn.insn = NULL;
|
||||
op->optinsn.size = 0;
|
||||
}
|
||||
@ -424,8 +432,15 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
|
||||
(u8 *)op->kp.addr + op->optinsn.size);
|
||||
len += JMP32_INSN_SIZE;
|
||||
|
||||
/*
|
||||
* Note len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
|
||||
* used in __arch_remove_optimized_kprobe().
|
||||
*/
|
||||
|
||||
/* We have to use text_poke() for instruction buffer because it is RO */
|
||||
perf_event_text_poke(slot, NULL, 0, buf, len);
|
||||
text_poke(slot, buf, len);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
kfree(buf);
|
||||
@ -477,10 +492,23 @@ void arch_optimize_kprobes(struct list_head *oplist)
|
||||
*/
|
||||
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
|
||||
{
|
||||
arch_arm_kprobe(&op->kp);
|
||||
text_poke(op->kp.addr + INT3_INSN_SIZE,
|
||||
op->optinsn.copied_insn, DISP32_SIZE);
|
||||
u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
|
||||
u8 old[JMP32_INSN_SIZE];
|
||||
u8 *addr = op->kp.addr;
|
||||
|
||||
memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
|
||||
memcpy(new + INT3_INSN_SIZE,
|
||||
op->optinsn.copied_insn,
|
||||
JMP32_INSN_SIZE - INT3_INSN_SIZE);
|
||||
|
||||
text_poke(addr, new, INT3_INSN_SIZE);
|
||||
text_poke_sync();
|
||||
text_poke(addr + INT3_INSN_SIZE,
|
||||
new + INT3_INSN_SIZE,
|
||||
JMP32_INSN_SIZE - INT3_INSN_SIZE);
|
||||
text_poke_sync();
|
||||
|
||||
perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -58,9 +58,6 @@ struct ftrace_direct_func;
|
||||
const char *
|
||||
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
|
||||
unsigned long *off, char **modname, char *sym);
|
||||
int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
|
||||
char *type, char *name,
|
||||
char *module_name, int *exported);
|
||||
#else
|
||||
static inline const char *
|
||||
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
|
||||
@ -68,6 +65,13 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
|
||||
int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
|
||||
char *type, char *name,
|
||||
char *module_name, int *exported);
|
||||
#else
|
||||
static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
|
||||
char *type, char *name,
|
||||
char *module_name, int *exported)
|
||||
@ -76,7 +80,6 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef CONFIG_FUNCTION_TRACER
|
||||
|
||||
extern int ftrace_enabled;
|
||||
@ -207,6 +210,7 @@ struct ftrace_ops {
|
||||
struct ftrace_ops_hash old_hash;
|
||||
unsigned long trampoline;
|
||||
unsigned long trampoline_size;
|
||||
struct list_head list;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -242,6 +242,7 @@ struct kprobe_insn_cache {
|
||||
struct mutex mutex;
|
||||
void *(*alloc)(void); /* allocate insn page */
|
||||
void (*free)(void *); /* free insn page */
|
||||
const char *sym; /* symbol for insn pages */
|
||||
struct list_head pages; /* list of kprobe_insn_page */
|
||||
size_t insn_size; /* size of instruction slot */
|
||||
int nr_garbage;
|
||||
@ -272,6 +273,10 @@ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \
|
||||
{ \
|
||||
return __is_insn_slot_addr(&kprobe_##__name##_slots, addr); \
|
||||
}
|
||||
#define KPROBE_INSN_PAGE_SYM "kprobe_insn_page"
|
||||
#define KPROBE_OPTINSN_PAGE_SYM "kprobe_optinsn_page"
|
||||
int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
|
||||
unsigned long *value, char *type, char *sym);
|
||||
#else /* __ARCH_WANT_KPROBES_INSN_SLOT */
|
||||
#define DEFINE_INSN_CACHE_OPS(__name) \
|
||||
static inline bool is_kprobe_##__name##_slot(unsigned long addr) \
|
||||
@ -377,6 +382,11 @@ void dump_kprobe(struct kprobe *kp);
|
||||
void *alloc_insn_page(void);
|
||||
void free_insn_page(void *page);
|
||||
|
||||
int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
|
||||
char *sym);
|
||||
|
||||
int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
|
||||
char *type, char *sym);
|
||||
#else /* !CONFIG_KPROBES: */
|
||||
|
||||
static inline int kprobes_built_in(void)
|
||||
@ -439,6 +449,11 @@ static inline bool within_kprobe_blacklist(unsigned long addr)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
static inline int kprobe_get_kallsym(unsigned int symnum, unsigned long *value,
|
||||
char *type, char *sym)
|
||||
{
|
||||
return -ERANGE;
|
||||
}
|
||||
#endif /* CONFIG_KPROBES */
|
||||
static inline int disable_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
|
@ -366,7 +366,7 @@ struct pmu {
|
||||
* ->stop() with PERF_EF_UPDATE will read the counter and update
|
||||
* period/count values like ->read() would.
|
||||
*
|
||||
* ->start() with PERF_EF_RELOAD will reprogram the the counter
|
||||
* ->start() with PERF_EF_RELOAD will reprogram the counter
|
||||
* value, must be preceded by a ->stop() with PERF_EF_UPDATE.
|
||||
*/
|
||||
void (*start) (struct perf_event *event, int flags);
|
||||
@ -419,10 +419,11 @@ struct pmu {
|
||||
*/
|
||||
void (*sched_task) (struct perf_event_context *ctx,
|
||||
bool sched_in);
|
||||
|
||||
/*
|
||||
* PMU specific data size
|
||||
* Kmem cache of PMU specific data
|
||||
*/
|
||||
size_t task_ctx_size;
|
||||
struct kmem_cache *task_ctx_cache;
|
||||
|
||||
/*
|
||||
* PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
|
||||
@ -1232,6 +1233,9 @@ extern void perf_event_exec(void);
|
||||
extern void perf_event_comm(struct task_struct *tsk, bool exec);
|
||||
extern void perf_event_namespaces(struct task_struct *tsk);
|
||||
extern void perf_event_fork(struct task_struct *tsk);
|
||||
extern void perf_event_text_poke(const void *addr,
|
||||
const void *old_bytes, size_t old_len,
|
||||
const void *new_bytes, size_t new_len);
|
||||
|
||||
/* Callchains */
|
||||
DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
|
||||
@ -1479,6 +1483,11 @@ static inline void perf_event_exec(void) { }
|
||||
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
|
||||
static inline void perf_event_namespaces(struct task_struct *tsk) { }
|
||||
static inline void perf_event_fork(struct task_struct *tsk) { }
|
||||
static inline void perf_event_text_poke(const void *addr,
|
||||
const void *old_bytes,
|
||||
size_t old_len,
|
||||
const void *new_bytes,
|
||||
size_t new_len) { }
|
||||
static inline void perf_event_init(void) { }
|
||||
static inline int perf_swevent_get_recursion_context(void) { return -1; }
|
||||
static inline void perf_swevent_put_recursion_context(int rctx) { }
|
||||
|
@ -383,7 +383,8 @@ struct perf_event_attr {
|
||||
bpf_event : 1, /* include bpf events */
|
||||
aux_output : 1, /* generate AUX records instead of events */
|
||||
cgroup : 1, /* include cgroup events */
|
||||
__reserved_1 : 31;
|
||||
text_poke : 1, /* include text poke events */
|
||||
__reserved_1 : 30;
|
||||
|
||||
union {
|
||||
__u32 wakeup_events; /* wakeup every n events */
|
||||
@ -1041,12 +1042,35 @@ enum perf_event_type {
|
||||
*/
|
||||
PERF_RECORD_CGROUP = 19,
|
||||
|
||||
/*
|
||||
* Records changes to kernel text i.e. self-modified code. 'old_len' is
|
||||
* the number of old bytes, 'new_len' is the number of new bytes. Either
|
||||
* 'old_len' or 'new_len' may be zero to indicate, for example, the
|
||||
* addition or removal of a trampoline. 'bytes' contains the old bytes
|
||||
* followed immediately by the new bytes.
|
||||
*
|
||||
* struct {
|
||||
* struct perf_event_header header;
|
||||
* u64 addr;
|
||||
* u16 old_len;
|
||||
* u16 new_len;
|
||||
* u8 bytes[];
|
||||
* struct sample_id sample_id;
|
||||
* };
|
||||
*/
|
||||
PERF_RECORD_TEXT_POKE = 20,
|
||||
|
||||
PERF_RECORD_MAX, /* non-ABI */
|
||||
};
|
||||
|
||||
enum perf_record_ksymbol_type {
|
||||
PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0,
|
||||
PERF_RECORD_KSYMBOL_TYPE_BPF = 1,
|
||||
/*
|
||||
* Out of line code such as kprobe-replaced instructions or optimized
|
||||
* kprobes or ftrace trampolines.
|
||||
*/
|
||||
PERF_RECORD_KSYMBOL_TYPE_OOL = 2,
|
||||
PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */
|
||||
};
|
||||
|
||||
|
@ -394,6 +394,7 @@ static atomic_t nr_switch_events __read_mostly;
|
||||
static atomic_t nr_ksymbol_events __read_mostly;
|
||||
static atomic_t nr_bpf_events __read_mostly;
|
||||
static atomic_t nr_cgroup_events __read_mostly;
|
||||
static atomic_t nr_text_poke_events __read_mostly;
|
||||
|
||||
static LIST_HEAD(pmus);
|
||||
static DEFINE_MUTEX(pmus_lock);
|
||||
@ -1237,12 +1238,26 @@ static void get_ctx(struct perf_event_context *ctx)
|
||||
refcount_inc(&ctx->refcount);
|
||||
}
|
||||
|
||||
static void *alloc_task_ctx_data(struct pmu *pmu)
|
||||
{
|
||||
if (pmu->task_ctx_cache)
|
||||
return kmem_cache_zalloc(pmu->task_ctx_cache, GFP_KERNEL);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void free_task_ctx_data(struct pmu *pmu, void *task_ctx_data)
|
||||
{
|
||||
if (pmu->task_ctx_cache && task_ctx_data)
|
||||
kmem_cache_free(pmu->task_ctx_cache, task_ctx_data);
|
||||
}
|
||||
|
||||
static void free_ctx(struct rcu_head *head)
|
||||
{
|
||||
struct perf_event_context *ctx;
|
||||
|
||||
ctx = container_of(head, struct perf_event_context, rcu_head);
|
||||
kfree(ctx->task_ctx_data);
|
||||
free_task_ctx_data(ctx->pmu, ctx->task_ctx_data);
|
||||
kfree(ctx);
|
||||
}
|
||||
|
||||
@ -4470,7 +4485,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
|
||||
goto errout;
|
||||
|
||||
if (event->attach_state & PERF_ATTACH_TASK_DATA) {
|
||||
task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL);
|
||||
task_ctx_data = alloc_task_ctx_data(pmu);
|
||||
if (!task_ctx_data) {
|
||||
err = -ENOMEM;
|
||||
goto errout;
|
||||
@ -4528,11 +4543,11 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
|
||||
}
|
||||
}
|
||||
|
||||
kfree(task_ctx_data);
|
||||
free_task_ctx_data(pmu, task_ctx_data);
|
||||
return ctx;
|
||||
|
||||
errout:
|
||||
kfree(task_ctx_data);
|
||||
free_task_ctx_data(pmu, task_ctx_data);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
@ -4575,7 +4590,7 @@ static bool is_sb_event(struct perf_event *event)
|
||||
if (attr->mmap || attr->mmap_data || attr->mmap2 ||
|
||||
attr->comm || attr->comm_exec ||
|
||||
attr->task || attr->ksymbol ||
|
||||
attr->context_switch ||
|
||||
attr->context_switch || attr->text_poke ||
|
||||
attr->bpf_event)
|
||||
return true;
|
||||
return false;
|
||||
@ -4651,6 +4666,8 @@ static void unaccount_event(struct perf_event *event)
|
||||
atomic_dec(&nr_ksymbol_events);
|
||||
if (event->attr.bpf_event)
|
||||
atomic_dec(&nr_bpf_events);
|
||||
if (event->attr.text_poke)
|
||||
atomic_dec(&nr_text_poke_events);
|
||||
|
||||
if (dec) {
|
||||
if (!atomic_add_unless(&perf_sched_count, -1, 1))
|
||||
@ -8628,6 +8645,89 @@ void perf_event_bpf_event(struct bpf_prog *prog,
|
||||
perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
|
||||
}
|
||||
|
||||
struct perf_text_poke_event {
|
||||
const void *old_bytes;
|
||||
const void *new_bytes;
|
||||
size_t pad;
|
||||
u16 old_len;
|
||||
u16 new_len;
|
||||
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
|
||||
u64 addr;
|
||||
} event_id;
|
||||
};
|
||||
|
||||
static int perf_event_text_poke_match(struct perf_event *event)
|
||||
{
|
||||
return event->attr.text_poke;
|
||||
}
|
||||
|
||||
static void perf_event_text_poke_output(struct perf_event *event, void *data)
|
||||
{
|
||||
struct perf_text_poke_event *text_poke_event = data;
|
||||
struct perf_output_handle handle;
|
||||
struct perf_sample_data sample;
|
||||
u64 padding = 0;
|
||||
int ret;
|
||||
|
||||
if (!perf_event_text_poke_match(event))
|
||||
return;
|
||||
|
||||
perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);
|
||||
|
||||
ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
perf_output_put(&handle, text_poke_event->event_id);
|
||||
perf_output_put(&handle, text_poke_event->old_len);
|
||||
perf_output_put(&handle, text_poke_event->new_len);
|
||||
|
||||
__output_copy(&handle, text_poke_event->old_bytes, text_poke_event->old_len);
|
||||
__output_copy(&handle, text_poke_event->new_bytes, text_poke_event->new_len);
|
||||
|
||||
if (text_poke_event->pad)
|
||||
__output_copy(&handle, &padding, text_poke_event->pad);
|
||||
|
||||
perf_event__output_id_sample(event, &handle, &sample);
|
||||
|
||||
perf_output_end(&handle);
|
||||
}
|
||||
|
||||
void perf_event_text_poke(const void *addr, const void *old_bytes,
|
||||
size_t old_len, const void *new_bytes, size_t new_len)
|
||||
{
|
||||
struct perf_text_poke_event text_poke_event;
|
||||
size_t tot, pad;
|
||||
|
||||
if (!atomic_read(&nr_text_poke_events))
|
||||
return;
|
||||
|
||||
tot = sizeof(text_poke_event.old_len) + old_len;
|
||||
tot += sizeof(text_poke_event.new_len) + new_len;
|
||||
pad = ALIGN(tot, sizeof(u64)) - tot;
|
||||
|
||||
text_poke_event = (struct perf_text_poke_event){
|
||||
.old_bytes = old_bytes,
|
||||
.new_bytes = new_bytes,
|
||||
.pad = pad,
|
||||
.old_len = old_len,
|
||||
.new_len = new_len,
|
||||
.event_id = {
|
||||
.header = {
|
||||
.type = PERF_RECORD_TEXT_POKE,
|
||||
.misc = PERF_RECORD_MISC_KERNEL,
|
||||
.size = sizeof(text_poke_event.event_id) + tot + pad,
|
||||
},
|
||||
.addr = (unsigned long)addr,
|
||||
},
|
||||
};
|
||||
|
||||
perf_iterate_sb(perf_event_text_poke_output, &text_poke_event, NULL);
|
||||
}
|
||||
|
||||
void perf_event_itrace_started(struct perf_event *event)
|
||||
{
|
||||
event->attach_state |= PERF_ATTACH_ITRACE;
|
||||
@ -10945,6 +11045,8 @@ static void account_event(struct perf_event *event)
|
||||
atomic_inc(&nr_ksymbol_events);
|
||||
if (event->attr.bpf_event)
|
||||
atomic_inc(&nr_bpf_events);
|
||||
if (event->attr.text_poke)
|
||||
atomic_inc(&nr_text_poke_events);
|
||||
|
||||
if (inc) {
|
||||
/*
|
||||
@ -12409,8 +12511,7 @@ inherit_event(struct perf_event *parent_event,
|
||||
!child_ctx->task_ctx_data) {
|
||||
struct pmu *pmu = child_event->pmu;
|
||||
|
||||
child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size,
|
||||
GFP_KERNEL);
|
||||
child_ctx->task_ctx_data = alloc_task_ctx_data(pmu);
|
||||
if (!child_ctx->task_ctx_data) {
|
||||
free_event(child_event);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
/*
|
||||
@ -437,6 +438,7 @@ struct kallsym_iter {
|
||||
loff_t pos_arch_end;
|
||||
loff_t pos_mod_end;
|
||||
loff_t pos_ftrace_mod_end;
|
||||
loff_t pos_bpf_end;
|
||||
unsigned long value;
|
||||
unsigned int nameoff; /* If iterating in core kernel symbols. */
|
||||
char type;
|
||||
@ -480,6 +482,11 @@ static int get_ksymbol_mod(struct kallsym_iter *iter)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* ftrace_mod_get_kallsym() may also get symbols for pages allocated for ftrace
|
||||
* purposes. In that case "__builtin__ftrace" is used as a module name, even
|
||||
* though "__builtin__ftrace" is not a module.
|
||||
*/
|
||||
static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
|
||||
{
|
||||
int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_mod_end,
|
||||
@ -496,11 +503,33 @@ static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
|
||||
|
||||
static int get_ksymbol_bpf(struct kallsym_iter *iter)
|
||||
{
|
||||
int ret;
|
||||
|
||||
strlcpy(iter->module_name, "bpf", MODULE_NAME_LEN);
|
||||
iter->exported = 0;
|
||||
return bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end,
|
||||
&iter->value, &iter->type,
|
||||
iter->name) < 0 ? 0 : 1;
|
||||
ret = bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end,
|
||||
&iter->value, &iter->type,
|
||||
iter->name);
|
||||
if (ret < 0) {
|
||||
iter->pos_bpf_end = iter->pos;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This uses "__builtin__kprobes" as a module name for symbols for pages
|
||||
* allocated for kprobes' purposes, even though "__builtin__kprobes" is not a
|
||||
* module.
|
||||
*/
|
||||
static int get_ksymbol_kprobe(struct kallsym_iter *iter)
|
||||
{
|
||||
strlcpy(iter->module_name, "__builtin__kprobes", MODULE_NAME_LEN);
|
||||
iter->exported = 0;
|
||||
return kprobe_get_kallsym(iter->pos - iter->pos_bpf_end,
|
||||
&iter->value, &iter->type,
|
||||
iter->name) < 0 ? 0 : 1;
|
||||
}
|
||||
|
||||
/* Returns space to next name. */
|
||||
@ -527,6 +556,7 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
|
||||
iter->pos_arch_end = 0;
|
||||
iter->pos_mod_end = 0;
|
||||
iter->pos_ftrace_mod_end = 0;
|
||||
iter->pos_bpf_end = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -551,7 +581,11 @@ static int update_iter_mod(struct kallsym_iter *iter, loff_t pos)
|
||||
get_ksymbol_ftrace_mod(iter))
|
||||
return 1;
|
||||
|
||||
return get_ksymbol_bpf(iter);
|
||||
if ((!iter->pos_bpf_end || iter->pos_bpf_end > pos) &&
|
||||
get_ksymbol_bpf(iter))
|
||||
return 1;
|
||||
|
||||
return get_ksymbol_kprobe(iter);
|
||||
}
|
||||
|
||||
/* Returns false if pos at or past end of file. */
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include <asm/sections.h>
|
||||
#include <asm/cacheflush.h>
|
||||
@ -123,6 +124,7 @@ struct kprobe_insn_cache kprobe_insn_slots = {
|
||||
.mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
|
||||
.alloc = alloc_insn_page,
|
||||
.free = free_insn_page,
|
||||
.sym = KPROBE_INSN_PAGE_SYM,
|
||||
.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
|
||||
.insn_size = MAX_INSN_SIZE,
|
||||
.nr_garbage = 0,
|
||||
@ -188,6 +190,10 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
|
||||
kip->cache = c;
|
||||
list_add_rcu(&kip->list, &c->pages);
|
||||
slot = kip->insns;
|
||||
|
||||
/* Record the perf ksymbol register event after adding the page */
|
||||
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns,
|
||||
PAGE_SIZE, false, c->sym);
|
||||
out:
|
||||
mutex_unlock(&c->mutex);
|
||||
return slot;
|
||||
@ -206,6 +212,13 @@ static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
|
||||
* next time somebody inserts a probe.
|
||||
*/
|
||||
if (!list_is_singular(&kip->list)) {
|
||||
/*
|
||||
* Record perf ksymbol unregister event before removing
|
||||
* the page.
|
||||
*/
|
||||
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
|
||||
(unsigned long)kip->insns, PAGE_SIZE, true,
|
||||
kip->cache->sym);
|
||||
list_del_rcu(&kip->list);
|
||||
synchronize_rcu();
|
||||
kip->cache->free(kip->insns);
|
||||
@ -295,12 +308,34 @@ bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
|
||||
unsigned long *value, char *type, char *sym)
|
||||
{
|
||||
struct kprobe_insn_page *kip;
|
||||
int ret = -ERANGE;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(kip, &c->pages, list) {
|
||||
if ((*symnum)--)
|
||||
continue;
|
||||
strlcpy(sym, c->sym, KSYM_NAME_LEN);
|
||||
*type = 't';
|
||||
*value = (unsigned long)kip->insns;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_OPTPROBES
|
||||
/* For optimized_kprobe buffer */
|
||||
struct kprobe_insn_cache kprobe_optinsn_slots = {
|
||||
.mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
|
||||
.alloc = alloc_insn_page,
|
||||
.free = free_insn_page,
|
||||
.sym = KPROBE_OPTINSN_PAGE_SYM,
|
||||
.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
|
||||
/* .insn_size is initialized later */
|
||||
.nr_garbage = 0,
|
||||
@ -563,8 +598,6 @@ static void kprobe_optimizer(struct work_struct *work)
|
||||
mutex_lock(&kprobe_mutex);
|
||||
cpus_read_lock();
|
||||
mutex_lock(&text_mutex);
|
||||
/* Lock modules while optimizing kprobes */
|
||||
mutex_lock(&module_mutex);
|
||||
|
||||
/*
|
||||
* Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
|
||||
@ -589,7 +622,6 @@ static void kprobe_optimizer(struct work_struct *work)
|
||||
/* Step 4: Free cleaned kprobes after quiesence period */
|
||||
do_free_cleaned_kprobes();
|
||||
|
||||
mutex_unlock(&module_mutex);
|
||||
mutex_unlock(&text_mutex);
|
||||
cpus_read_unlock();
|
||||
|
||||
@ -2232,6 +2264,28 @@ static void kprobe_remove_ksym_blacklist(unsigned long entry)
|
||||
kprobe_remove_area_blacklist(entry, entry + 1);
|
||||
}
|
||||
|
||||
int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
|
||||
char *type, char *sym)
|
||||
{
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
|
||||
char *sym)
|
||||
{
|
||||
#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
|
||||
if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym))
|
||||
return 0;
|
||||
#ifdef CONFIG_OPTPROBES
|
||||
if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym))
|
||||
return 0;
|
||||
#endif
|
||||
#endif
|
||||
if (!arch_kprobe_get_kallsym(&symnum, value, type, sym))
|
||||
return 0;
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
int __init __weak arch_populate_kprobe_blacklist(void)
|
||||
{
|
||||
return 0;
|
||||
|
@ -2764,6 +2764,50 @@ void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops)
|
||||
{
|
||||
}
|
||||
|
||||
/* List of trace_ops that have allocated trampolines */
|
||||
static LIST_HEAD(ftrace_ops_trampoline_list);
|
||||
|
||||
static void ftrace_add_trampoline_to_kallsyms(struct ftrace_ops *ops)
|
||||
{
|
||||
lockdep_assert_held(&ftrace_lock);
|
||||
list_add_rcu(&ops->list, &ftrace_ops_trampoline_list);
|
||||
}
|
||||
|
||||
static void ftrace_remove_trampoline_from_kallsyms(struct ftrace_ops *ops)
|
||||
{
|
||||
lockdep_assert_held(&ftrace_lock);
|
||||
list_del_rcu(&ops->list);
|
||||
}
|
||||
|
||||
/*
|
||||
* "__builtin__ftrace" is used as a module name in /proc/kallsyms for symbols
|
||||
* for pages allocated for ftrace purposes, even though "__builtin__ftrace" is
|
||||
* not a module.
|
||||
*/
|
||||
#define FTRACE_TRAMPOLINE_MOD "__builtin__ftrace"
|
||||
#define FTRACE_TRAMPOLINE_SYM "ftrace_trampoline"
|
||||
|
||||
static void ftrace_trampoline_free(struct ftrace_ops *ops)
|
||||
{
|
||||
if (ops && (ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP) &&
|
||||
ops->trampoline) {
|
||||
/*
|
||||
* Record the text poke event before the ksymbol unregister
|
||||
* event.
|
||||
*/
|
||||
perf_event_text_poke((void *)ops->trampoline,
|
||||
(void *)ops->trampoline,
|
||||
ops->trampoline_size, NULL, 0);
|
||||
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
|
||||
ops->trampoline, ops->trampoline_size,
|
||||
true, FTRACE_TRAMPOLINE_SYM);
|
||||
/* Remove from kallsyms after the perf events */
|
||||
ftrace_remove_trampoline_from_kallsyms(ops);
|
||||
}
|
||||
|
||||
arch_ftrace_trampoline_free(ops);
|
||||
}
|
||||
|
||||
static void ftrace_startup_enable(int command)
|
||||
{
|
||||
if (saved_ftrace_func != ftrace_trace_function) {
|
||||
@ -2934,7 +2978,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
|
||||
synchronize_rcu_tasks();
|
||||
|
||||
free_ops:
|
||||
arch_ftrace_trampoline_free(ops);
|
||||
ftrace_trampoline_free(ops);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -6178,6 +6222,27 @@ struct ftrace_mod_map {
|
||||
unsigned int num_funcs;
|
||||
};
|
||||
|
||||
static int ftrace_get_trampoline_kallsym(unsigned int symnum,
|
||||
unsigned long *value, char *type,
|
||||
char *name, char *module_name,
|
||||
int *exported)
|
||||
{
|
||||
struct ftrace_ops *op;
|
||||
|
||||
list_for_each_entry_rcu(op, &ftrace_ops_trampoline_list, list) {
|
||||
if (!op->trampoline || symnum--)
|
||||
continue;
|
||||
*value = op->trampoline;
|
||||
*type = 't';
|
||||
strlcpy(name, FTRACE_TRAMPOLINE_SYM, KSYM_NAME_LEN);
|
||||
strlcpy(module_name, FTRACE_TRAMPOLINE_MOD, MODULE_NAME_LEN);
|
||||
*exported = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
|
||||
#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
|
||||
@ -6514,6 +6579,7 @@ int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
|
||||
{
|
||||
struct ftrace_mod_map *mod_map;
|
||||
struct ftrace_mod_func *mod_func;
|
||||
int ret;
|
||||
|
||||
preempt_disable();
|
||||
list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) {
|
||||
@ -6540,8 +6606,10 @@ int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
|
||||
WARN_ON(1);
|
||||
break;
|
||||
}
|
||||
ret = ftrace_get_trampoline_kallsym(symnum, value, type, name,
|
||||
module_name, exported);
|
||||
preempt_enable();
|
||||
return -ERANGE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
@ -6553,6 +6621,18 @@ allocate_ftrace_mod_map(struct module *mod,
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
|
||||
char *type, char *name, char *module_name,
|
||||
int *exported)
|
||||
{
|
||||
int ret;
|
||||
|
||||
preempt_disable();
|
||||
ret = ftrace_get_trampoline_kallsym(symnum, value, type, name,
|
||||
module_name, exported);
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_MODULES */
|
||||
|
||||
struct ftrace_init_func {
|
||||
@ -6733,7 +6813,24 @@ void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops)
|
||||
|
||||
static void ftrace_update_trampoline(struct ftrace_ops *ops)
|
||||
{
|
||||
unsigned long trampoline = ops->trampoline;
|
||||
|
||||
arch_ftrace_update_trampoline(ops);
|
||||
if (ops->trampoline && ops->trampoline != trampoline &&
|
||||
(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) {
|
||||
/* Add to kallsyms before the perf events */
|
||||
ftrace_add_trampoline_to_kallsyms(ops);
|
||||
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
|
||||
ops->trampoline, ops->trampoline_size, false,
|
||||
FTRACE_TRAMPOLINE_SYM);
|
||||
/*
|
||||
* Record the perf text poke event after the ksymbol register
|
||||
* event.
|
||||
*/
|
||||
perf_event_text_poke((void *)ops->trampoline, NULL, 0,
|
||||
(void *)ops->trampoline,
|
||||
ops->trampoline_size);
|
||||
}
|
||||
}
|
||||
|
||||
void ftrace_init_trace_array(struct trace_array *tr)
|
||||
|
Loading…
Reference in New Issue
Block a user