mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-06 05:06:29 +00:00
KVM x86 PMU changes for 6.9:
- Fix several bugs where KVM speciously prevents the guest from utilizing fixed counters and architectural event encodings based on whether or not guest CPUID reports support for the _architectural_ encoding. - Fix a variety of bugs in KVM's emulation of RDPMC, e.g. for "fast" reads, priority of VMX interception vs #GP, PMC types in architectural PMUs, etc. - Add a selftest to verify KVM correctly emulates RDMPC, counter availability, and a variety of other PMC-related behaviors that depend on guest CPUID, i.e. are difficult to validate via KVM-Unit-Tests. - Zero out PMU metadata on AMD if the virtual PMU is disabled to avoid wasting cycles, e.g. when checking if a PMC event needs to be synthesized when skipping an instruction. - Optimize triggering of emulated events, e.g. for "count instructions" events when skipping an instruction, which yields a ~10% performance improvement in VM-Exit microbenchmarks when a vPMU is exposed to the guest. - Tighten the check for "PMI in guest" to reduce false positives if an NMI arrives in the host while KVM is handling an IRQ VM-Exit. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmXrUFQACgkQOlYIJqCj N/11dhAAnr9e6mPmXvaH4YKcvOGgTmwIQdi5W4IBzGm27ErEb0Vyskx3UATRhRm+ gZyp3wNgEA9LeifICDNu4ypn7HZcl2VtRql6FYcB8Bcu8OiHfU8PhWL0/qrpY20e zffUj2tDweq2ft9Iks1SQJD0sxFkcXIcSKOffP7pRZJHFTKLltGORXwxzd9HJHPY nc4nERKegK2yH4A4gY6nZ0oV5L3OMUNHx815db5Y+HxXOIjBCjTQiNNd6mUdyX1N C5sIiElXLdvRTSDvirHfA32LqNwnajDGox4QKZkB3wszCxJ3kRd4OCkTEKMYKHxd KoKCJQnAdJFFW9xqbT8nNKXZ+hg2+ZQuoSaBuwKryf7jWi0e6a7jcV0OH+cQSZw7 UNudKhs3r4ambfvnFp2IVZlZREMDB+LAjo2So48Jn/JGCAzqte3XqwVKskn9pS9S qeauXCdOLioZALYtTBl8RM1rEY5mbwQrpPv9CzbeU09qQ/hpXV14W9GmbyeOZcI1 T1cYgEqlLuifRluwT/hxrY321+4noF116gSK1yb07x/sJU8/lhRooEk9V562066E qo6nIvc7Bv9gTGLwo6VReKSPcTT/6t3HwgPsRjqe+evso3EFN9f9hG+uPxtO6TUj pdPm3mkj2KfxDdJLf+Ys16gyGdiwI0ZImIkA0uLdM0zftNsrb4Y= =vayI -----END PGP SIGNATURE----- Merge tag 'kvm-x86-pmu-6.9' of https://github.com/kvm-x86/linux into HEAD KVM x86 PMU changes for 6.9: - Fix several bugs where KVM speciously prevents the guest from utilizing fixed counters and architectural event encodings based on whether or not guest CPUID reports support for the _architectural_ encoding. - Fix a variety of bugs in KVM's emulation of RDPMC, e.g. for "fast" reads, priority of VMX interception vs #GP, PMC types in architectural PMUs, etc. - Add a selftest to verify KVM correctly emulates RDMPC, counter availability, and a variety of other PMC-related behaviors that depend on guest CPUID, i.e. are difficult to validate via KVM-Unit-Tests. - Zero out PMU metadata on AMD if the virtual PMU is disabled to avoid wasting cycles, e.g. when checking if a PMC event needs to be synthesized when skipping an instruction. - Optimize triggering of emulated events, e.g. for "count instructions" events when skipping an instruction, which yields a ~10% performance improvement in VM-Exit microbenchmarks when a vPMU is exposed to the guest. - Tighten the check for "PMI in guest" to reduce false positives if an NMI arrives in the host while KVM is handling an IRQ VM-Exit.
This commit is contained in:
commit
e9025cdd8c
@ -12,11 +12,9 @@ BUILD_BUG_ON(1)
|
||||
* a NULL definition, for example if "static_call_cond()" will be used
|
||||
* at the call sites.
|
||||
*/
|
||||
KVM_X86_PMU_OP(hw_event_available)
|
||||
KVM_X86_PMU_OP(pmc_idx_to_pmc)
|
||||
KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
|
||||
KVM_X86_PMU_OP(msr_idx_to_pmc)
|
||||
KVM_X86_PMU_OP(is_valid_rdpmc_ecx)
|
||||
KVM_X86_PMU_OP_OPTIONAL(check_rdpmc_early)
|
||||
KVM_X86_PMU_OP(is_valid_msr)
|
||||
KVM_X86_PMU_OP(get_msr)
|
||||
KVM_X86_PMU_OP(set_msr)
|
||||
|
@ -536,6 +536,7 @@ struct kvm_pmc {
|
||||
#define KVM_PMC_MAX_FIXED 3
|
||||
#define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1)
|
||||
#define KVM_AMD_PMC_MAX_GENERIC 6
|
||||
|
||||
struct kvm_pmu {
|
||||
u8 version;
|
||||
unsigned nr_arch_gp_counters;
|
||||
@ -1889,8 +1890,16 @@ static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn,
|
||||
}
|
||||
#endif /* CONFIG_HYPERV */
|
||||
|
||||
enum kvm_intr_type {
|
||||
/* Values are arbitrary, but must be non-zero. */
|
||||
KVM_HANDLING_IRQ = 1,
|
||||
KVM_HANDLING_NMI,
|
||||
};
|
||||
|
||||
/* Enable perf NMI and timer modes to work, and minimise false positives. */
|
||||
#define kvm_arch_pmi_in_guest(vcpu) \
|
||||
((vcpu) && (vcpu)->arch.handling_intr_from_guest)
|
||||
((vcpu) && (vcpu)->arch.handling_intr_from_guest && \
|
||||
(!!in_nmi() == ((vcpu)->arch.handling_intr_from_guest == KVM_HANDLING_NMI)))
|
||||
|
||||
void __init kvm_mmu_x86_module_init(void);
|
||||
int kvm_mmu_vendor_module_init(void);
|
||||
|
@ -3955,7 +3955,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
|
||||
* protected mode.
|
||||
*/
|
||||
if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
|
||||
ctxt->ops->check_pmc(ctxt, rcx))
|
||||
ctxt->ops->check_rdpmc_early(ctxt, rcx))
|
||||
return emulate_gp(ctxt, 0);
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
|
@ -208,7 +208,7 @@ struct x86_emulate_ops {
|
||||
int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
|
||||
int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
|
||||
int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
|
||||
int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
|
||||
int (*check_rdpmc_early)(struct x86_emulate_ctxt *ctxt, u32 pmc);
|
||||
int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
|
||||
void (*halt)(struct x86_emulate_ctxt *ctxt);
|
||||
void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
|
||||
|
@ -29,6 +29,9 @@
|
||||
struct x86_pmu_capability __read_mostly kvm_pmu_cap;
|
||||
EXPORT_SYMBOL_GPL(kvm_pmu_cap);
|
||||
|
||||
struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel;
|
||||
EXPORT_SYMBOL_GPL(kvm_pmu_eventsel);
|
||||
|
||||
/* Precise Distribution of Instructions Retired (PDIR) */
|
||||
static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL),
|
||||
@ -67,7 +70,7 @@ static const struct x86_cpu_id vmx_pebs_pdist_cpu[] = {
|
||||
* all perf counters (both gp and fixed). The mapping relationship
|
||||
* between pmc and perf counters is as the following:
|
||||
* * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters
|
||||
* [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
|
||||
* [KVM_FIXED_PMC_BASE_IDX .. KVM_FIXED_PMC_BASE_IDX + 2] <=> fixed
|
||||
* * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
|
||||
* and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
|
||||
*/
|
||||
@ -411,7 +414,7 @@ static bool is_gp_event_allowed(struct kvm_x86_pmu_event_filter *f,
|
||||
static bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter,
|
||||
int idx)
|
||||
{
|
||||
int fixed_idx = idx - INTEL_PMC_IDX_FIXED;
|
||||
int fixed_idx = idx - KVM_FIXED_PMC_BASE_IDX;
|
||||
|
||||
if (filter->action == KVM_PMU_EVENT_DENY &&
|
||||
test_bit(fixed_idx, (ulong *)&filter->fixed_counter_bitmap))
|
||||
@ -441,11 +444,10 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
|
||||
static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
|
||||
{
|
||||
return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
|
||||
static_call(kvm_x86_pmu_hw_event_available)(pmc) &&
|
||||
check_pmu_event_filter(pmc);
|
||||
}
|
||||
|
||||
static void reprogram_counter(struct kvm_pmc *pmc)
|
||||
static int reprogram_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
u64 eventsel = pmc->eventsel;
|
||||
@ -456,7 +458,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
|
||||
emulate_overflow = pmc_pause_counter(pmc);
|
||||
|
||||
if (!pmc_event_is_allowed(pmc))
|
||||
goto reprogram_complete;
|
||||
return 0;
|
||||
|
||||
if (emulate_overflow)
|
||||
__kvm_perf_overflow(pmc, false);
|
||||
@ -466,7 +468,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
|
||||
|
||||
if (pmc_is_fixed(pmc)) {
|
||||
fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
|
||||
pmc->idx - INTEL_PMC_IDX_FIXED);
|
||||
pmc->idx - KVM_FIXED_PMC_BASE_IDX);
|
||||
if (fixed_ctr_ctrl & 0x1)
|
||||
eventsel |= ARCH_PERFMON_EVENTSEL_OS;
|
||||
if (fixed_ctr_ctrl & 0x2)
|
||||
@ -477,43 +479,45 @@ static void reprogram_counter(struct kvm_pmc *pmc)
|
||||
}
|
||||
|
||||
if (pmc->current_config == new_config && pmc_resume_counter(pmc))
|
||||
goto reprogram_complete;
|
||||
return 0;
|
||||
|
||||
pmc_release_perf_event(pmc);
|
||||
|
||||
pmc->current_config = new_config;
|
||||
|
||||
/*
|
||||
* If reprogramming fails, e.g. due to contention, leave the counter's
|
||||
* regprogram bit set, i.e. opportunistically try again on the next PMU
|
||||
* refresh. Don't make a new request as doing so can stall the guest
|
||||
* if reprogramming repeatedly fails.
|
||||
*/
|
||||
if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
|
||||
(eventsel & pmu->raw_event_mask),
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
|
||||
eventsel & ARCH_PERFMON_EVENTSEL_INT))
|
||||
return;
|
||||
|
||||
reprogram_complete:
|
||||
clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
|
||||
return pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
|
||||
(eventsel & pmu->raw_event_mask),
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
|
||||
!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
|
||||
eventsel & ARCH_PERFMON_EVENTSEL_INT);
|
||||
}
|
||||
|
||||
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *pmc;
|
||||
int bit;
|
||||
|
||||
for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
|
||||
struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
|
||||
bitmap_copy(bitmap, pmu->reprogram_pmi, X86_PMC_IDX_MAX);
|
||||
|
||||
if (unlikely(!pmc)) {
|
||||
clear_bit(bit, pmu->reprogram_pmi);
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* The reprogramming bitmap can be written asynchronously by something
|
||||
* other than the task that holds vcpu->mutex, take care to clear only
|
||||
* the bits that will actually processed.
|
||||
*/
|
||||
BUILD_BUG_ON(sizeof(bitmap) != sizeof(atomic64_t));
|
||||
atomic64_andnot(*(s64 *)bitmap, &pmu->__reprogram_pmi);
|
||||
|
||||
reprogram_counter(pmc);
|
||||
kvm_for_each_pmc(pmu, pmc, bit, bitmap) {
|
||||
/*
|
||||
* If reprogramming fails, e.g. due to contention, re-set the
|
||||
* regprogram bit set, i.e. opportunistically try again on the
|
||||
* next PMU refresh. Don't make a new request as doing so can
|
||||
* stall the guest if reprogramming repeatedly fails.
|
||||
*/
|
||||
if (reprogram_counter(pmc))
|
||||
set_bit(pmc->idx, pmu->reprogram_pmi);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -525,10 +529,20 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
|
||||
kvm_pmu_cleanup(vcpu);
|
||||
}
|
||||
|
||||
/* check if idx is a valid index to access PMU */
|
||||
bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
{
|
||||
return static_call(kvm_x86_pmu_is_valid_rdpmc_ecx)(vcpu, idx);
|
||||
/*
|
||||
* On Intel, VMX interception has priority over RDPMC exceptions that
|
||||
* aren't already handled by the emulator, i.e. there are no additional
|
||||
* check needed for Intel PMUs.
|
||||
*
|
||||
* On AMD, _all_ exceptions on RDPMC have priority over SVM intercepts,
|
||||
* i.e. an invalid PMC results in a #GP, not #VMEXIT.
|
||||
*/
|
||||
if (!kvm_pmu_ops.check_rdpmc_early)
|
||||
return 0;
|
||||
|
||||
return static_call(kvm_x86_pmu_check_rdpmc_early)(vcpu, idx);
|
||||
}
|
||||
|
||||
bool is_vmware_backdoor_pmc(u32 pmc_idx)
|
||||
@ -567,10 +581,9 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
|
||||
|
||||
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
|
||||
{
|
||||
bool fast_mode = idx & (1u << 31);
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *pmc;
|
||||
u64 mask = fast_mode ? ~0u : ~0ull;
|
||||
u64 mask = ~0ull;
|
||||
|
||||
if (!pmu->version)
|
||||
return 1;
|
||||
@ -716,11 +729,7 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
|
||||
bitmap_zero(pmu->reprogram_pmi, X86_PMC_IDX_MAX);
|
||||
|
||||
for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
|
||||
pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
|
||||
if (!pmc)
|
||||
continue;
|
||||
|
||||
kvm_for_each_pmc(pmu, pmc, i, pmu->all_valid_pmc_idx) {
|
||||
pmc_stop_counter(pmc);
|
||||
pmc->counter = 0;
|
||||
pmc->emulated_counter = 0;
|
||||
@ -741,6 +750,8 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm))
|
||||
return;
|
||||
|
||||
@ -750,8 +761,22 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
kvm_pmu_reset(vcpu);
|
||||
|
||||
bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX);
|
||||
static_call(kvm_x86_pmu_refresh)(vcpu);
|
||||
pmu->version = 0;
|
||||
pmu->nr_arch_gp_counters = 0;
|
||||
pmu->nr_arch_fixed_counters = 0;
|
||||
pmu->counter_bitmask[KVM_PMC_GP] = 0;
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
|
||||
pmu->reserved_bits = 0xffffffff00200000ull;
|
||||
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
|
||||
pmu->global_ctrl_mask = ~0ull;
|
||||
pmu->global_status_mask = ~0ull;
|
||||
pmu->fixed_ctr_ctrl_mask = ~0ull;
|
||||
pmu->pebs_enable_mask = ~0ull;
|
||||
pmu->pebs_data_cfg_mask = ~0ull;
|
||||
bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
|
||||
|
||||
if (vcpu->kvm->arch.enable_pmu)
|
||||
static_call(kvm_x86_pmu_refresh)(vcpu);
|
||||
}
|
||||
|
||||
void kvm_pmu_init(struct kvm_vcpu *vcpu)
|
||||
@ -776,10 +801,8 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
|
||||
bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
|
||||
pmu->pmc_in_use, X86_PMC_IDX_MAX);
|
||||
|
||||
for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
|
||||
pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
|
||||
|
||||
if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
|
||||
kvm_for_each_pmc(pmu, pmc, i, bitmask) {
|
||||
if (pmc->perf_event && !pmc_speculative_in_use(pmc))
|
||||
pmc_stop_counter(pmc);
|
||||
}
|
||||
|
||||
@ -799,13 +822,6 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
|
||||
kvm_pmu_request_counter_reprogram(pmc);
|
||||
}
|
||||
|
||||
static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
|
||||
unsigned int perf_hw_id)
|
||||
{
|
||||
return !((pmc->eventsel ^ perf_get_hw_event_config(perf_hw_id)) &
|
||||
AMD64_RAW_EVENT_MASK_NB);
|
||||
}
|
||||
|
||||
static inline bool cpl_is_matched(struct kvm_pmc *pmc)
|
||||
{
|
||||
bool select_os, select_user;
|
||||
@ -817,29 +833,56 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
|
||||
select_user = config & ARCH_PERFMON_EVENTSEL_USR;
|
||||
} else {
|
||||
config = fixed_ctrl_field(pmc_to_pmu(pmc)->fixed_ctr_ctrl,
|
||||
pmc->idx - INTEL_PMC_IDX_FIXED);
|
||||
pmc->idx - KVM_FIXED_PMC_BASE_IDX);
|
||||
select_os = config & 0x1;
|
||||
select_user = config & 0x2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip the CPL lookup, which isn't free on Intel, if the result will
|
||||
* be the same regardless of the CPL.
|
||||
*/
|
||||
if (select_os == select_user)
|
||||
return select_os;
|
||||
|
||||
return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
|
||||
}
|
||||
|
||||
void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
|
||||
void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
|
||||
{
|
||||
DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct kvm_pmc *pmc;
|
||||
int i;
|
||||
|
||||
for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
|
||||
pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
|
||||
BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX);
|
||||
|
||||
if (!pmc || !pmc_event_is_allowed(pmc))
|
||||
if (!kvm_pmu_has_perf_global_ctrl(pmu))
|
||||
bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
|
||||
else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx,
|
||||
(unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX))
|
||||
return;
|
||||
|
||||
kvm_for_each_pmc(pmu, pmc, i, bitmap) {
|
||||
/*
|
||||
* Ignore checks for edge detect (all events currently emulated
|
||||
* but KVM are always rising edges), pin control (unsupported
|
||||
* by modern CPUs), and counter mask and its invert flag (KVM
|
||||
* doesn't emulate multiple events in a single clock cycle).
|
||||
*
|
||||
* Note, the uppermost nibble of AMD's mask overlaps Intel's
|
||||
* IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved
|
||||
* bits (bits 35:34). Checking the "in HLE/RTM transaction"
|
||||
* flags is correct as the vCPU can't be in a transaction if
|
||||
* KVM is emulating an instruction. Checking the reserved bits
|
||||
* might be wrong if they are defined in the future, but so
|
||||
* could ignoring them, so do the simple thing for now.
|
||||
*/
|
||||
if (((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB) ||
|
||||
!pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc))
|
||||
continue;
|
||||
|
||||
/* Ignore checks for edge detect, pin control, invert and CMASK bits */
|
||||
if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
|
||||
kvm_pmu_incr_counter(pmc);
|
||||
kvm_pmu_incr_counter(pmc);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
|
||||
|
@ -4,6 +4,8 @@
|
||||
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include <asm/kvm_host.h>
|
||||
|
||||
#define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu)
|
||||
#define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu))
|
||||
#define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu)
|
||||
@ -18,13 +20,18 @@
|
||||
#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
|
||||
#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
|
||||
|
||||
#define KVM_FIXED_PMC_BASE_IDX INTEL_PMC_IDX_FIXED
|
||||
|
||||
struct kvm_pmu_emulated_event_selectors {
|
||||
u64 INSTRUCTIONS_RETIRED;
|
||||
u64 BRANCH_INSTRUCTIONS_RETIRED;
|
||||
};
|
||||
|
||||
struct kvm_pmu_ops {
|
||||
bool (*hw_event_available)(struct kvm_pmc *pmc);
|
||||
struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
|
||||
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
|
||||
unsigned int idx, u64 *mask);
|
||||
struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
|
||||
bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
|
||||
int (*check_rdpmc_early)(struct kvm_vcpu *vcpu, unsigned int idx);
|
||||
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
|
||||
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
@ -55,6 +62,38 @@ static inline bool kvm_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
|
||||
return pmu->version > 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM tracks all counters in 64-bit bitmaps, with general purpose counters
|
||||
* mapped to bits 31:0 and fixed counters mapped to 63:32, e.g. fixed counter 0
|
||||
* is tracked internally via index 32. On Intel, (AMD doesn't support fixed
|
||||
* counters), this mirrors how fixed counters are mapped to PERF_GLOBAL_CTRL
|
||||
* and similar MSRs, i.e. tracking fixed counters at base index 32 reduces the
|
||||
* amounter of boilerplate needed to iterate over PMCs *and* simplifies common
|
||||
* enabling/disable/reset operations.
|
||||
*
|
||||
* WARNING! This helper is only for lookups that are initiated by KVM, it is
|
||||
* NOT safe for guest lookups, e.g. will do the wrong thing if passed a raw
|
||||
* ECX value from RDPMC (fixed counters are accessed by setting bit 30 in ECX
|
||||
* for RDPMC, not by adding 32 to the fixed counter index).
|
||||
*/
|
||||
static inline struct kvm_pmc *kvm_pmc_idx_to_pmc(struct kvm_pmu *pmu, int idx)
|
||||
{
|
||||
if (idx < pmu->nr_arch_gp_counters)
|
||||
return &pmu->gp_counters[idx];
|
||||
|
||||
idx -= KVM_FIXED_PMC_BASE_IDX;
|
||||
if (idx >= 0 && idx < pmu->nr_arch_fixed_counters)
|
||||
return &pmu->fixed_counters[idx];
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define kvm_for_each_pmc(pmu, pmc, i, bitmap) \
|
||||
for_each_set_bit(i, bitmap, X86_PMC_IDX_MAX) \
|
||||
if (!(pmc = kvm_pmc_idx_to_pmc(pmu, i))) \
|
||||
continue; \
|
||||
else \
|
||||
|
||||
static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
@ -131,12 +170,13 @@ static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
|
||||
|
||||
if (pmc_is_fixed(pmc))
|
||||
return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
|
||||
pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
|
||||
pmc->idx - KVM_FIXED_PMC_BASE_IDX) & 0x3;
|
||||
|
||||
return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
}
|
||||
|
||||
extern struct x86_pmu_capability kvm_pmu_cap;
|
||||
extern struct kvm_pmu_emulated_event_selectors kvm_pmu_eventsel;
|
||||
|
||||
static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
|
||||
{
|
||||
@ -178,6 +218,11 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
|
||||
pmu_ops->MAX_NR_GP_COUNTERS);
|
||||
kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
|
||||
KVM_PMC_MAX_FIXED);
|
||||
|
||||
kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
|
||||
perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
|
||||
kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
|
||||
perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
|
||||
}
|
||||
|
||||
static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
|
||||
@ -216,7 +261,7 @@ static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc)
|
||||
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
|
||||
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
|
||||
bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
|
||||
int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx);
|
||||
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
|
||||
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
|
||||
@ -225,7 +270,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
|
||||
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
|
||||
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
|
||||
void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id);
|
||||
void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel);
|
||||
|
||||
bool is_vmware_backdoor_pmc(u32 pmc_idx);
|
||||
|
||||
|
@ -25,7 +25,7 @@ enum pmu_type {
|
||||
PMU_TYPE_EVNTSEL,
|
||||
};
|
||||
|
||||
static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
|
||||
static struct kvm_pmc *amd_pmu_get_pmc(struct kvm_pmu *pmu, int pmc_idx)
|
||||
{
|
||||
unsigned int num_counters = pmu->nr_arch_gp_counters;
|
||||
|
||||
@ -70,28 +70,24 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return amd_pmc_idx_to_pmc(pmu, idx);
|
||||
return amd_pmu_get_pmc(pmu, idx);
|
||||
}
|
||||
|
||||
static bool amd_hw_event_available(struct kvm_pmc *pmc)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
static int amd_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
|
||||
idx &= ~(3u << 30);
|
||||
if (idx >= pmu->nr_arch_gp_counters)
|
||||
return -EINVAL;
|
||||
|
||||
return idx < pmu->nr_arch_gp_counters;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* idx is the ECX register of RDPMC instruction */
|
||||
static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
|
||||
unsigned int idx, u64 *mask)
|
||||
{
|
||||
return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
|
||||
return amd_pmu_get_pmc(vcpu_to_pmu(vcpu), idx);
|
||||
}
|
||||
|
||||
static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
|
||||
@ -233,11 +229,9 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
struct kvm_pmu_ops amd_pmu_ops __initdata = {
|
||||
.hw_event_available = amd_hw_event_available,
|
||||
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
|
||||
.rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
|
||||
.msr_idx_to_pmc = amd_msr_idx_to_pmc,
|
||||
.is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx,
|
||||
.check_rdpmc_early = amd_check_rdpmc_early,
|
||||
.is_valid_msr = amd_is_valid_msr,
|
||||
.get_msr = amd_pmu_get_msr,
|
||||
.set_msr = amd_pmu_set_msr,
|
||||
|
@ -3606,7 +3606,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
||||
return 1;
|
||||
}
|
||||
|
||||
kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
|
||||
kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
|
||||
|
||||
if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
|
||||
return nested_vmx_failInvalid(vcpu);
|
||||
|
@ -20,54 +20,20 @@
|
||||
#include "nested.h"
|
||||
#include "pmu.h"
|
||||
|
||||
/*
|
||||
* Perf's "BASE" is wildly misleading, architectural PMUs use bits 31:16 of ECX
|
||||
* to encode the "type" of counter to read, i.e. this is not a "base". And to
|
||||
* further confuse things, non-architectural PMUs use bit 31 as a flag for
|
||||
* "fast" reads, whereas the "type" is an explicit value.
|
||||
*/
|
||||
#define INTEL_RDPMC_GP 0
|
||||
#define INTEL_RDPMC_FIXED INTEL_PMC_FIXED_RDPMC_BASE
|
||||
|
||||
#define INTEL_RDPMC_TYPE_MASK GENMASK(31, 16)
|
||||
#define INTEL_RDPMC_INDEX_MASK GENMASK(15, 0)
|
||||
|
||||
#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
|
||||
|
||||
enum intel_pmu_architectural_events {
|
||||
/*
|
||||
* The order of the architectural events matters as support for each
|
||||
* event is enumerated via CPUID using the index of the event.
|
||||
*/
|
||||
INTEL_ARCH_CPU_CYCLES,
|
||||
INTEL_ARCH_INSTRUCTIONS_RETIRED,
|
||||
INTEL_ARCH_REFERENCE_CYCLES,
|
||||
INTEL_ARCH_LLC_REFERENCES,
|
||||
INTEL_ARCH_LLC_MISSES,
|
||||
INTEL_ARCH_BRANCHES_RETIRED,
|
||||
INTEL_ARCH_BRANCHES_MISPREDICTED,
|
||||
|
||||
NR_REAL_INTEL_ARCH_EVENTS,
|
||||
|
||||
/*
|
||||
* Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
|
||||
* TSC reference cycles. The architectural reference cycles event may
|
||||
* or may not actually use the TSC as the reference, e.g. might use the
|
||||
* core crystal clock or the bus clock (yeah, "architectural").
|
||||
*/
|
||||
PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
|
||||
NR_INTEL_ARCH_EVENTS,
|
||||
};
|
||||
|
||||
static struct {
|
||||
u8 eventsel;
|
||||
u8 unit_mask;
|
||||
} const intel_arch_events[] = {
|
||||
[INTEL_ARCH_CPU_CYCLES] = { 0x3c, 0x00 },
|
||||
[INTEL_ARCH_INSTRUCTIONS_RETIRED] = { 0xc0, 0x00 },
|
||||
[INTEL_ARCH_REFERENCE_CYCLES] = { 0x3c, 0x01 },
|
||||
[INTEL_ARCH_LLC_REFERENCES] = { 0x2e, 0x4f },
|
||||
[INTEL_ARCH_LLC_MISSES] = { 0x2e, 0x41 },
|
||||
[INTEL_ARCH_BRANCHES_RETIRED] = { 0xc4, 0x00 },
|
||||
[INTEL_ARCH_BRANCHES_MISPREDICTED] = { 0xc5, 0x00 },
|
||||
[PSEUDO_ARCH_REFERENCE_CYCLES] = { 0x00, 0x03 },
|
||||
};
|
||||
|
||||
/* mapping between fixed pmc index and intel_arch_events array */
|
||||
static int fixed_pmc_events[] = {
|
||||
[0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
|
||||
[1] = INTEL_ARCH_CPU_CYCLES,
|
||||
[2] = PSEUDO_ARCH_REFERENCE_CYCLES,
|
||||
};
|
||||
|
||||
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
|
||||
{
|
||||
struct kvm_pmc *pmc;
|
||||
@ -84,77 +50,61 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
|
||||
|
||||
pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
|
||||
|
||||
__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
|
||||
__set_bit(KVM_FIXED_PMC_BASE_IDX + i, pmu->pmc_in_use);
|
||||
kvm_pmu_request_counter_reprogram(pmc);
|
||||
}
|
||||
}
|
||||
|
||||
static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
|
||||
{
|
||||
if (pmc_idx < INTEL_PMC_IDX_FIXED) {
|
||||
return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx,
|
||||
MSR_P6_EVNTSEL0);
|
||||
} else {
|
||||
u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED;
|
||||
|
||||
return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0);
|
||||
}
|
||||
}
|
||||
|
||||
static bool intel_hw_event_available(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||
u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
|
||||
u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
|
||||
int i;
|
||||
|
||||
BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS);
|
||||
|
||||
/*
|
||||
* Disallow events reported as unavailable in guest CPUID. Note, this
|
||||
* doesn't apply to pseudo-architectural events.
|
||||
*/
|
||||
for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) {
|
||||
if (intel_arch_events[i].eventsel != event_select ||
|
||||
intel_arch_events[i].unit_mask != unit_mask)
|
||||
continue;
|
||||
|
||||
return pmu->available_event_types & BIT(i);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
bool fixed = idx & (1u << 30);
|
||||
|
||||
idx &= ~(3u << 30);
|
||||
|
||||
return fixed ? idx < pmu->nr_arch_fixed_counters
|
||||
: idx < pmu->nr_arch_gp_counters;
|
||||
}
|
||||
|
||||
static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
|
||||
unsigned int idx, u64 *mask)
|
||||
{
|
||||
unsigned int type = idx & INTEL_RDPMC_TYPE_MASK;
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
bool fixed = idx & (1u << 30);
|
||||
struct kvm_pmc *counters;
|
||||
unsigned int num_counters;
|
||||
u64 bitmask;
|
||||
|
||||
idx &= ~(3u << 30);
|
||||
if (fixed) {
|
||||
/*
|
||||
* The encoding of ECX for RDPMC is different for architectural versus
|
||||
* non-architecturals PMUs (PMUs with version '0'). For architectural
|
||||
* PMUs, bits 31:16 specify the PMC type and bits 15:0 specify the PMC
|
||||
* index. For non-architectural PMUs, bit 31 is a "fast" flag, and
|
||||
* bits 30:0 specify the PMC index.
|
||||
*
|
||||
* Yell and reject attempts to read PMCs for a non-architectural PMU,
|
||||
* as KVM doesn't support such PMUs.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!pmu->version))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* General Purpose (GP) PMCs are supported on all PMUs, and fixed PMCs
|
||||
* are supported on all architectural PMUs, i.e. on all virtual PMUs
|
||||
* supported by KVM. Note, KVM only emulates fixed PMCs for PMU v2+,
|
||||
* but the type itself is still valid, i.e. let RDPMC fail due to
|
||||
* accessing a non-existent counter. Reject attempts to read all other
|
||||
* types, which are unknown/unsupported.
|
||||
*/
|
||||
switch (type) {
|
||||
case INTEL_RDPMC_FIXED:
|
||||
counters = pmu->fixed_counters;
|
||||
num_counters = pmu->nr_arch_fixed_counters;
|
||||
} else {
|
||||
bitmask = pmu->counter_bitmask[KVM_PMC_FIXED];
|
||||
break;
|
||||
case INTEL_RDPMC_GP:
|
||||
counters = pmu->gp_counters;
|
||||
num_counters = pmu->nr_arch_gp_counters;
|
||||
bitmask = pmu->counter_bitmask[KVM_PMC_GP];
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
idx &= INTEL_RDPMC_INDEX_MASK;
|
||||
if (idx >= num_counters)
|
||||
return NULL;
|
||||
*mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
|
||||
|
||||
*mask &= bitmask;
|
||||
return &counters[array_index_nospec(idx, num_counters)];
|
||||
}
|
||||
|
||||
@ -464,20 +414,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
|
||||
/*
|
||||
* Map fixed counter events to architectural general purpose event encodings.
|
||||
* Perf doesn't provide APIs to allow KVM to directly program a fixed counter,
|
||||
* and so KVM instead programs the architectural event to effectively request
|
||||
* the fixed counter. Perf isn't guaranteed to use a fixed counter and may
|
||||
* instead program the encoding into a general purpose counter, e.g. if a
|
||||
* different perf_event is already utilizing the requested counter, but the end
|
||||
* result is the same (ignoring the fact that using a general purpose counter
|
||||
* will likely exacerbate counter contention).
|
||||
*
|
||||
* Forcibly inlined to allow asserting on @index at build time, and there should
|
||||
* never be more than one user.
|
||||
*/
|
||||
static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index)
|
||||
{
|
||||
int i;
|
||||
const enum perf_hw_id fixed_pmc_perf_ids[] = {
|
||||
[0] = PERF_COUNT_HW_INSTRUCTIONS,
|
||||
[1] = PERF_COUNT_HW_CPU_CYCLES,
|
||||
[2] = PERF_COUNT_HW_REF_CPU_CYCLES,
|
||||
};
|
||||
u64 eventsel;
|
||||
|
||||
BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
|
||||
BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_PMC_MAX_FIXED);
|
||||
BUILD_BUG_ON(index >= KVM_PMC_MAX_FIXED);
|
||||
|
||||
for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
|
||||
int index = array_index_nospec(i, KVM_PMC_MAX_FIXED);
|
||||
struct kvm_pmc *pmc = &pmu->fixed_counters[index];
|
||||
u32 event = fixed_pmc_events[index];
|
||||
|
||||
pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
|
||||
intel_arch_events[event].eventsel;
|
||||
}
|
||||
/*
|
||||
* Yell if perf reports support for a fixed counter but perf doesn't
|
||||
* have a known encoding for the associated general purpose event.
|
||||
*/
|
||||
eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]);
|
||||
WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed);
|
||||
return eventsel;
|
||||
}
|
||||
|
||||
static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
@ -491,19 +459,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
u64 counter_mask;
|
||||
int i;
|
||||
|
||||
pmu->nr_arch_gp_counters = 0;
|
||||
pmu->nr_arch_fixed_counters = 0;
|
||||
pmu->counter_bitmask[KVM_PMC_GP] = 0;
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
|
||||
pmu->version = 0;
|
||||
pmu->reserved_bits = 0xffffffff00200000ull;
|
||||
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
|
||||
pmu->global_ctrl_mask = ~0ull;
|
||||
pmu->global_status_mask = ~0ull;
|
||||
pmu->fixed_ctr_ctrl_mask = ~0ull;
|
||||
pmu->pebs_enable_mask = ~0ull;
|
||||
pmu->pebs_data_cfg_mask = ~0ull;
|
||||
|
||||
memset(&lbr_desc->records, 0, sizeof(lbr_desc->records));
|
||||
|
||||
/*
|
||||
@ -515,8 +470,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, 0xa);
|
||||
if (!entry || !vcpu->kvm->arch.enable_pmu)
|
||||
if (!entry)
|
||||
return;
|
||||
|
||||
eax.full = entry->eax;
|
||||
edx.full = entry->edx;
|
||||
|
||||
@ -543,13 +499,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
kvm_pmu_cap.bit_width_fixed);
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] =
|
||||
((u64)1 << edx.split.bit_width_fixed) - 1;
|
||||
setup_fixed_pmc_eventsel(pmu);
|
||||
}
|
||||
|
||||
for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
|
||||
pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4));
|
||||
counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) |
|
||||
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED));
|
||||
(((1ull << pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX));
|
||||
pmu->global_ctrl_mask = counter_mask;
|
||||
|
||||
/*
|
||||
@ -593,7 +548,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE;
|
||||
for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
|
||||
pmu->fixed_ctr_ctrl_mask &=
|
||||
~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4));
|
||||
~(1ULL << (KVM_FIXED_PMC_BASE_IDX + i * 4));
|
||||
}
|
||||
pmu->pebs_data_cfg_mask = ~0xff00000full;
|
||||
} else {
|
||||
@ -619,8 +574,9 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
|
||||
for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
|
||||
pmu->fixed_counters[i].type = KVM_PMC_FIXED;
|
||||
pmu->fixed_counters[i].vcpu = vcpu;
|
||||
pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
|
||||
pmu->fixed_counters[i].idx = i + KVM_FIXED_PMC_BASE_IDX;
|
||||
pmu->fixed_counters[i].current_config = 0;
|
||||
pmu->fixed_counters[i].eventsel = intel_get_fixed_pmc_eventsel(i);
|
||||
}
|
||||
|
||||
lbr_desc->records.nr = 0;
|
||||
@ -748,11 +704,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
|
||||
struct kvm_pmc *pmc = NULL;
|
||||
int bit, hw_idx;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
|
||||
X86_PMC_IDX_MAX) {
|
||||
pmc = intel_pmc_idx_to_pmc(pmu, bit);
|
||||
|
||||
if (!pmc || !pmc_speculative_in_use(pmc) ||
|
||||
kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) {
|
||||
if (!pmc_speculative_in_use(pmc) ||
|
||||
!pmc_is_globally_enabled(pmc) || !pmc->perf_event)
|
||||
continue;
|
||||
|
||||
@ -767,11 +720,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
|
||||
}
|
||||
|
||||
struct kvm_pmu_ops intel_pmu_ops __initdata = {
|
||||
.hw_event_available = intel_hw_event_available,
|
||||
.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
|
||||
.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
|
||||
.msr_idx_to_pmc = intel_msr_idx_to_pmc,
|
||||
.is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
|
||||
.is_valid_msr = intel_is_valid_msr,
|
||||
.get_msr = intel_pmu_get_msr,
|
||||
.set_msr = intel_pmu_set_msr,
|
||||
|
@ -8394,12 +8394,9 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
|
||||
return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
|
||||
}
|
||||
|
||||
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
|
||||
u32 pmc)
|
||||
static int emulator_check_rdpmc_early(struct x86_emulate_ctxt *ctxt, u32 pmc)
|
||||
{
|
||||
if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
|
||||
return 0;
|
||||
return -EINVAL;
|
||||
return kvm_pmu_check_rdpmc_early(emul_to_vcpu(ctxt), pmc);
|
||||
}
|
||||
|
||||
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
|
||||
@ -8531,7 +8528,7 @@ static const struct x86_emulate_ops emulate_ops = {
|
||||
.set_msr_with_filter = emulator_set_msr_with_filter,
|
||||
.get_msr_with_filter = emulator_get_msr_with_filter,
|
||||
.get_msr = emulator_get_msr,
|
||||
.check_pmc = emulator_check_pmc,
|
||||
.check_rdpmc_early = emulator_check_rdpmc_early,
|
||||
.read_pmc = emulator_read_pmc,
|
||||
.halt = emulator_halt,
|
||||
.wbinvd = emulator_wbinvd,
|
||||
@ -8904,7 +8901,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
if (unlikely(!r))
|
||||
return 0;
|
||||
|
||||
kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
|
||||
kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
|
||||
|
||||
/*
|
||||
* rflags is the old, "raw" value of the flags. The new value has
|
||||
@ -9217,9 +9214,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
*/
|
||||
if (!ctxt->have_exception ||
|
||||
exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
|
||||
kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
|
||||
kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
|
||||
if (ctxt->is_branch)
|
||||
kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
|
||||
kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
|
||||
kvm_rip_write(vcpu, ctxt->eip);
|
||||
if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
|
||||
r = kvm_vcpu_do_singlestep(vcpu);
|
||||
|
@ -431,12 +431,6 @@ static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)
|
||||
return kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_ENABLED;
|
||||
}
|
||||
|
||||
enum kvm_intr_type {
|
||||
/* Values are arbitrary, but must be non-zero. */
|
||||
KVM_HANDLING_IRQ = 1,
|
||||
KVM_HANDLING_NMI,
|
||||
};
|
||||
|
||||
static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
|
||||
enum kvm_intr_type intr)
|
||||
{
|
||||
|
@ -36,6 +36,7 @@ LIBKVM_x86_64 += lib/x86_64/apic.c
|
||||
LIBKVM_x86_64 += lib/x86_64/handlers.S
|
||||
LIBKVM_x86_64 += lib/x86_64/hyperv.c
|
||||
LIBKVM_x86_64 += lib/x86_64/memstress.c
|
||||
LIBKVM_x86_64 += lib/x86_64/pmu.c
|
||||
LIBKVM_x86_64 += lib/x86_64/processor.c
|
||||
LIBKVM_x86_64 += lib/x86_64/sev.c
|
||||
LIBKVM_x86_64 += lib/x86_64/svm.c
|
||||
@ -82,6 +83,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
|
||||
|
@ -270,6 +270,10 @@ bool get_kvm_param_bool(const char *param);
|
||||
bool get_kvm_intel_param_bool(const char *param);
|
||||
bool get_kvm_amd_param_bool(const char *param);
|
||||
|
||||
int get_kvm_param_integer(const char *param);
|
||||
int get_kvm_intel_param_integer(const char *param);
|
||||
int get_kvm_amd_param_integer(const char *param);
|
||||
|
||||
unsigned int kvm_check_cap(long cap);
|
||||
|
||||
static inline bool kvm_has_cap(long cap)
|
||||
|
97
tools/testing/selftests/kvm/include/x86_64/pmu.h
Normal file
97
tools/testing/selftests/kvm/include/x86_64/pmu.h
Normal file
@ -0,0 +1,97 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2023, Tencent, Inc.
|
||||
*/
|
||||
#ifndef SELFTEST_KVM_PMU_H
|
||||
#define SELFTEST_KVM_PMU_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
|
||||
|
||||
/*
|
||||
* Encode an eventsel+umask pair into event-select MSR format. Note, this is
|
||||
* technically AMD's format, as Intel's format only supports 8 bits for the
|
||||
* event selector, i.e. doesn't use bits 24:16 for the selector. But, OR-ing
|
||||
* in '0' is a nop and won't clobber the CMASK.
|
||||
*/
|
||||
#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) | \
|
||||
((eventsel) & 0xff) | \
|
||||
((umask) & 0xff) << 8)
|
||||
|
||||
/*
|
||||
* These are technically Intel's definitions, but except for CMASK (see above),
|
||||
* AMD's layout is compatible with Intel's.
|
||||
*/
|
||||
#define ARCH_PERFMON_EVENTSEL_EVENT GENMASK_ULL(7, 0)
|
||||
#define ARCH_PERFMON_EVENTSEL_UMASK GENMASK_ULL(15, 8)
|
||||
#define ARCH_PERFMON_EVENTSEL_USR BIT_ULL(16)
|
||||
#define ARCH_PERFMON_EVENTSEL_OS BIT_ULL(17)
|
||||
#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18)
|
||||
#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19)
|
||||
#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20)
|
||||
#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21)
|
||||
#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22)
|
||||
#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23)
|
||||
#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24)
|
||||
|
||||
/* RDPMC control flags, Intel only. */
|
||||
#define INTEL_RDPMC_METRICS BIT_ULL(29)
|
||||
#define INTEL_RDPMC_FIXED BIT_ULL(30)
|
||||
#define INTEL_RDPMC_FAST BIT_ULL(31)
|
||||
|
||||
/* Fixed PMC controls, Intel only. */
|
||||
#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx)))
|
||||
|
||||
#define FIXED_PMC_KERNEL BIT_ULL(0)
|
||||
#define FIXED_PMC_USER BIT_ULL(1)
|
||||
#define FIXED_PMC_ANYTHREAD BIT_ULL(2)
|
||||
#define FIXED_PMC_ENABLE_PMI BIT_ULL(3)
|
||||
#define FIXED_PMC_NR_BITS 4
|
||||
#define FIXED_PMC_CTRL(_idx, _val) ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
|
||||
|
||||
#define PMU_CAP_FW_WRITES BIT_ULL(13)
|
||||
#define PMU_CAP_LBR_FMT 0x3f
|
||||
|
||||
#define INTEL_ARCH_CPU_CYCLES RAW_EVENT(0x3c, 0x00)
|
||||
#define INTEL_ARCH_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
|
||||
#define INTEL_ARCH_REFERENCE_CYCLES RAW_EVENT(0x3c, 0x01)
|
||||
#define INTEL_ARCH_LLC_REFERENCES RAW_EVENT(0x2e, 0x4f)
|
||||
#define INTEL_ARCH_LLC_MISSES RAW_EVENT(0x2e, 0x41)
|
||||
#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00)
|
||||
#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00)
|
||||
#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01)
|
||||
|
||||
#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00)
|
||||
#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
|
||||
#define AMD_ZEN_BRANCHES_RETIRED RAW_EVENT(0xc2, 0x00)
|
||||
#define AMD_ZEN_BRANCHES_MISPREDICTED RAW_EVENT(0xc3, 0x00)
|
||||
|
||||
/*
|
||||
* Note! The order and thus the index of the architectural events matters as
|
||||
* support for each event is enumerated via CPUID using the index of the event.
|
||||
*/
|
||||
enum intel_pmu_architectural_events {
|
||||
INTEL_ARCH_CPU_CYCLES_INDEX,
|
||||
INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
|
||||
INTEL_ARCH_REFERENCE_CYCLES_INDEX,
|
||||
INTEL_ARCH_LLC_REFERENCES_INDEX,
|
||||
INTEL_ARCH_LLC_MISSES_INDEX,
|
||||
INTEL_ARCH_BRANCHES_RETIRED_INDEX,
|
||||
INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
|
||||
INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
|
||||
NR_INTEL_ARCH_EVENTS,
|
||||
};
|
||||
|
||||
enum amd_pmu_zen_events {
|
||||
AMD_ZEN_CORE_CYCLES_INDEX,
|
||||
AMD_ZEN_INSTRUCTIONS_INDEX,
|
||||
AMD_ZEN_BRANCHES_INDEX,
|
||||
AMD_ZEN_BRANCH_MISSES_INDEX,
|
||||
NR_AMD_ZEN_EVENTS,
|
||||
};
|
||||
|
||||
extern const uint64_t intel_pmu_arch_events[];
|
||||
extern const uint64_t amd_pmu_zen_events[];
|
||||
|
||||
#endif /* SELFTEST_KVM_PMU_H */
|
@ -29,6 +29,9 @@ enum vm_guest_x86_subtype {
|
||||
VM_SUBTYPE_SEV_ES,
|
||||
};
|
||||
|
||||
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
|
||||
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
|
||||
|
||||
#define NMI_VECTOR 0x02
|
||||
|
||||
#define X86_EFLAGS_FIXED (1u << 1)
|
||||
@ -289,24 +292,41 @@ struct kvm_x86_cpu_property {
|
||||
* that indicates the feature is _not_ supported, and a property that states
|
||||
* the length of the bit mask of unsupported features. A feature is supported
|
||||
* if the size of the bit mask is larger than the "unavailable" bit, and said
|
||||
* bit is not set.
|
||||
* bit is not set. Fixed counters also bizarre enumeration, but inverted from
|
||||
* arch events for general purpose counters. Fixed counters are supported if a
|
||||
* feature flag is set **OR** the total number of fixed counters is greater
|
||||
* than index of the counter.
|
||||
*
|
||||
* Wrap the "unavailable" feature to simplify checking whether or not a given
|
||||
* architectural event is supported.
|
||||
* Wrap the events for general purpose and fixed counters to simplify checking
|
||||
* whether or not a given architectural event is supported.
|
||||
*/
|
||||
struct kvm_x86_pmu_feature {
|
||||
struct kvm_x86_cpu_feature anti_feature;
|
||||
struct kvm_x86_cpu_feature f;
|
||||
};
|
||||
#define KVM_X86_PMU_FEATURE(name, __bit) \
|
||||
({ \
|
||||
struct kvm_x86_pmu_feature feature = { \
|
||||
.anti_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit), \
|
||||
}; \
|
||||
\
|
||||
feature; \
|
||||
#define KVM_X86_PMU_FEATURE(__reg, __bit) \
|
||||
({ \
|
||||
struct kvm_x86_pmu_feature feature = { \
|
||||
.f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit), \
|
||||
}; \
|
||||
\
|
||||
kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX || \
|
||||
KVM_CPUID_##__reg == KVM_CPUID_ECX); \
|
||||
feature; \
|
||||
})
|
||||
|
||||
#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5)
|
||||
#define X86_PMU_FEATURE_CPU_CYCLES KVM_X86_PMU_FEATURE(EBX, 0)
|
||||
#define X86_PMU_FEATURE_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 1)
|
||||
#define X86_PMU_FEATURE_REFERENCE_CYCLES KVM_X86_PMU_FEATURE(EBX, 2)
|
||||
#define X86_PMU_FEATURE_LLC_REFERENCES KVM_X86_PMU_FEATURE(EBX, 3)
|
||||
#define X86_PMU_FEATURE_LLC_MISSES KVM_X86_PMU_FEATURE(EBX, 4)
|
||||
#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5)
|
||||
#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6)
|
||||
#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7)
|
||||
|
||||
#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0)
|
||||
#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1)
|
||||
#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 2)
|
||||
#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED KVM_X86_PMU_FEATURE(ECX, 3)
|
||||
|
||||
static inline unsigned int x86_family(unsigned int eax)
|
||||
{
|
||||
@ -705,10 +725,16 @@ static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
|
||||
|
||||
static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
|
||||
{
|
||||
uint32_t nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
|
||||
uint32_t nr_bits;
|
||||
|
||||
return nr_bits > feature.anti_feature.bit &&
|
||||
!this_cpu_has(feature.anti_feature);
|
||||
if (feature.f.reg == KVM_CPUID_EBX) {
|
||||
nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
|
||||
return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
|
||||
}
|
||||
|
||||
GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
|
||||
nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
|
||||
return nr_bits > feature.f.bit || this_cpu_has(feature.f);
|
||||
}
|
||||
|
||||
static __always_inline uint64_t this_cpu_supported_xcr0(void)
|
||||
@ -924,10 +950,16 @@ static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
|
||||
|
||||
static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
|
||||
{
|
||||
uint32_t nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
|
||||
uint32_t nr_bits;
|
||||
|
||||
return nr_bits > feature.anti_feature.bit &&
|
||||
!kvm_cpu_has(feature.anti_feature);
|
||||
if (feature.f.reg == KVM_CPUID_EBX) {
|
||||
nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
|
||||
return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
|
||||
}
|
||||
|
||||
TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
|
||||
nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
|
||||
return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
|
||||
}
|
||||
|
||||
static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
|
||||
@ -1002,7 +1034,9 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
|
||||
}
|
||||
|
||||
void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
|
||||
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
|
||||
struct kvm_x86_cpu_property property,
|
||||
uint32_t value);
|
||||
|
||||
void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
|
||||
void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
|
||||
@ -1128,16 +1162,19 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
|
||||
* r9 = exception vector (non-zero)
|
||||
* r10 = error code
|
||||
*/
|
||||
#define KVM_ASM_SAFE(insn) \
|
||||
#define __KVM_ASM_SAFE(insn, fep) \
|
||||
"mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \
|
||||
"lea 1f(%%rip), %%r10\n\t" \
|
||||
"lea 2f(%%rip), %%r11\n\t" \
|
||||
"1: " insn "\n\t" \
|
||||
fep "1: " insn "\n\t" \
|
||||
"xor %%r9, %%r9\n\t" \
|
||||
"2:\n\t" \
|
||||
"mov %%r9b, %[vector]\n\t" \
|
||||
"mov %%r10, %[error_code]\n\t"
|
||||
|
||||
#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
|
||||
#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
|
||||
|
||||
#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec)
|
||||
#define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11"
|
||||
|
||||
@ -1164,21 +1201,58 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
|
||||
vector; \
|
||||
})
|
||||
|
||||
static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val)
|
||||
{
|
||||
uint64_t error_code;
|
||||
uint8_t vector;
|
||||
uint32_t a, d;
|
||||
#define kvm_asm_safe_fep(insn, inputs...) \
|
||||
({ \
|
||||
uint64_t ign_error_code; \
|
||||
uint8_t vector; \
|
||||
\
|
||||
asm volatile(KVM_ASM_SAFE(insn) \
|
||||
: KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
|
||||
: inputs \
|
||||
: KVM_ASM_SAFE_CLOBBERS); \
|
||||
vector; \
|
||||
})
|
||||
|
||||
asm volatile(KVM_ASM_SAFE("rdmsr")
|
||||
: "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector, error_code)
|
||||
: "c"(msr)
|
||||
: KVM_ASM_SAFE_CLOBBERS);
|
||||
#define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \
|
||||
({ \
|
||||
uint8_t vector; \
|
||||
\
|
||||
asm volatile(KVM_ASM_SAFE_FEP(insn) \
|
||||
: KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
|
||||
: inputs \
|
||||
: KVM_ASM_SAFE_CLOBBERS); \
|
||||
vector; \
|
||||
})
|
||||
|
||||
*val = (uint64_t)a | ((uint64_t)d << 32);
|
||||
return vector;
|
||||
#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
|
||||
static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \
|
||||
{ \
|
||||
uint64_t error_code; \
|
||||
uint8_t vector; \
|
||||
uint32_t a, d; \
|
||||
\
|
||||
asm volatile(KVM_ASM_SAFE##_FEP(#insn) \
|
||||
: "=a"(a), "=d"(d), \
|
||||
KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
|
||||
: "c"(idx) \
|
||||
: KVM_ASM_SAFE_CLOBBERS); \
|
||||
\
|
||||
*val = (uint64_t)a | ((uint64_t)d << 32); \
|
||||
return vector; \
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
|
||||
* use ECX as in input index, and EDX:EAX as a 64-bit output.
|
||||
*/
|
||||
#define BUILD_READ_U64_SAFE_HELPERS(insn) \
|
||||
BUILD_READ_U64_SAFE_HELPER(insn, , ) \
|
||||
BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
|
||||
|
||||
BUILD_READ_U64_SAFE_HELPERS(rdmsr)
|
||||
BUILD_READ_U64_SAFE_HELPERS(rdpmc)
|
||||
BUILD_READ_U64_SAFE_HELPERS(xgetbv)
|
||||
|
||||
static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
|
||||
{
|
||||
return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
|
||||
@ -1194,6 +1268,16 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
|
||||
|
||||
bool kvm_is_tdp_enabled(void);
|
||||
|
||||
static inline bool kvm_is_pmu_enabled(void)
|
||||
{
|
||||
return get_kvm_param_bool("enable_pmu");
|
||||
}
|
||||
|
||||
static inline bool kvm_is_forced_emulation_enabled(void)
|
||||
{
|
||||
return !!get_kvm_param_integer("force_emulation_prefix");
|
||||
}
|
||||
|
||||
uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
|
||||
int *level);
|
||||
uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
|
||||
|
@ -52,13 +52,13 @@ int open_kvm_dev_path_or_exit(void)
|
||||
return _open_kvm_dev_path_or_exit(O_RDONLY);
|
||||
}
|
||||
|
||||
static bool get_module_param_bool(const char *module_name, const char *param)
|
||||
static ssize_t get_module_param(const char *module_name, const char *param,
|
||||
void *buffer, size_t buffer_size)
|
||||
{
|
||||
const int path_size = 128;
|
||||
char path[path_size];
|
||||
char value;
|
||||
ssize_t r;
|
||||
int fd;
|
||||
ssize_t bytes_read;
|
||||
int fd, r;
|
||||
|
||||
r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
|
||||
module_name, param);
|
||||
@ -67,11 +67,46 @@ static bool get_module_param_bool(const char *module_name, const char *param)
|
||||
|
||||
fd = open_path_or_exit(path, O_RDONLY);
|
||||
|
||||
r = read(fd, &value, 1);
|
||||
TEST_ASSERT(r == 1, "read(%s) failed", path);
|
||||
bytes_read = read(fd, buffer, buffer_size);
|
||||
TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes",
|
||||
path, bytes_read, buffer_size);
|
||||
|
||||
r = close(fd);
|
||||
TEST_ASSERT(!r, "close(%s) failed", path);
|
||||
return bytes_read;
|
||||
}
|
||||
|
||||
static int get_module_param_integer(const char *module_name, const char *param)
|
||||
{
|
||||
/*
|
||||
* 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the
|
||||
* NUL char, and 1 byte because the kernel sucks and inserts a newline
|
||||
* at the end.
|
||||
*/
|
||||
char value[16 + 1 + 1];
|
||||
ssize_t r;
|
||||
|
||||
memset(value, '\0', sizeof(value));
|
||||
|
||||
r = get_module_param(module_name, param, value, sizeof(value));
|
||||
TEST_ASSERT(value[r - 1] == '\n',
|
||||
"Expected trailing newline, got char '%c'", value[r - 1]);
|
||||
|
||||
/*
|
||||
* Squash the newline, otherwise atoi_paranoid() will complain about
|
||||
* trailing non-NUL characters in the string.
|
||||
*/
|
||||
value[r - 1] = '\0';
|
||||
return atoi_paranoid(value);
|
||||
}
|
||||
|
||||
static bool get_module_param_bool(const char *module_name, const char *param)
|
||||
{
|
||||
char value;
|
||||
ssize_t r;
|
||||
|
||||
r = get_module_param(module_name, param, &value, sizeof(value));
|
||||
TEST_ASSERT_EQ(r, 1);
|
||||
|
||||
if (value == 'Y')
|
||||
return true;
|
||||
@ -96,6 +131,21 @@ bool get_kvm_amd_param_bool(const char *param)
|
||||
return get_module_param_bool("kvm_amd", param);
|
||||
}
|
||||
|
||||
int get_kvm_param_integer(const char *param)
|
||||
{
|
||||
return get_module_param_integer("kvm", param);
|
||||
}
|
||||
|
||||
int get_kvm_intel_param_integer(const char *param)
|
||||
{
|
||||
return get_module_param_integer("kvm_intel", param);
|
||||
}
|
||||
|
||||
int get_kvm_amd_param_integer(const char *param)
|
||||
{
|
||||
return get_module_param_integer("kvm_amd", param);
|
||||
}
|
||||
|
||||
/*
|
||||
* Capability
|
||||
*
|
||||
|
31
tools/testing/selftests/kvm/lib/x86_64/pmu.c
Normal file
31
tools/testing/selftests/kvm/lib/x86_64/pmu.c
Normal file
@ -0,0 +1,31 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2023, Tencent, Inc.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "pmu.h"
|
||||
|
||||
const uint64_t intel_pmu_arch_events[] = {
|
||||
INTEL_ARCH_CPU_CYCLES,
|
||||
INTEL_ARCH_INSTRUCTIONS_RETIRED,
|
||||
INTEL_ARCH_REFERENCE_CYCLES,
|
||||
INTEL_ARCH_LLC_REFERENCES,
|
||||
INTEL_ARCH_LLC_MISSES,
|
||||
INTEL_ARCH_BRANCHES_RETIRED,
|
||||
INTEL_ARCH_BRANCHES_MISPREDICTED,
|
||||
INTEL_ARCH_TOPDOWN_SLOTS,
|
||||
};
|
||||
kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
|
||||
|
||||
const uint64_t amd_pmu_zen_events[] = {
|
||||
AMD_ZEN_CORE_CYCLES,
|
||||
AMD_ZEN_INSTRUCTIONS_RETIRED,
|
||||
AMD_ZEN_BRANCHES_RETIRED,
|
||||
AMD_ZEN_BRANCHES_MISPREDICTED,
|
||||
};
|
||||
kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
|
@ -781,12 +781,21 @@ void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
|
||||
vcpu_set_cpuid(vcpu);
|
||||
}
|
||||
|
||||
void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr)
|
||||
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
|
||||
struct kvm_x86_cpu_property property,
|
||||
uint32_t value)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, 0x80000008);
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
|
||||
entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
|
||||
|
||||
(&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
|
||||
(&entry->eax)[property.reg] |= value << property.lo_bit;
|
||||
|
||||
entry->eax = (entry->eax & ~0xff) | maxphyaddr;
|
||||
vcpu_set_cpuid(vcpu);
|
||||
|
||||
/* Sanity check that @value doesn't exceed the bounds in any way. */
|
||||
TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
|
||||
}
|
||||
|
||||
void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
|
||||
|
620
tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
Normal file
620
tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
Normal file
@ -0,0 +1,620 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023, Tencent, Inc.
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE /* for program_invocation_short_name */
|
||||
#include <x86intrin.h>
|
||||
|
||||
#include "pmu.h"
|
||||
#include "processor.h"
|
||||
|
||||
/* Number of LOOP instructions for the guest measurement payload. */
|
||||
#define NUM_BRANCHES 10
|
||||
/*
|
||||
* Number of "extra" instructions that will be counted, i.e. the number of
|
||||
* instructions that are needed to set up the loop and then disabled the
|
||||
* counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
|
||||
*/
|
||||
#define NUM_EXTRA_INSNS 7
|
||||
#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS)
|
||||
|
||||
static uint8_t kvm_pmu_version;
|
||||
static bool kvm_has_perf_caps;
|
||||
static bool is_forced_emulation_enabled;
|
||||
|
||||
static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
|
||||
void *guest_code,
|
||||
uint8_t pmu_version,
|
||||
uint64_t perf_capabilities)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
|
||||
vm = vm_create_with_one_vcpu(vcpu, guest_code);
|
||||
vm_init_descriptor_tables(vm);
|
||||
vcpu_init_descriptor_tables(*vcpu);
|
||||
|
||||
sync_global_to_guest(vm, kvm_pmu_version);
|
||||
sync_global_to_guest(vm, is_forced_emulation_enabled);
|
||||
|
||||
/*
|
||||
* Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
|
||||
* features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
|
||||
*/
|
||||
if (kvm_has_perf_caps)
|
||||
vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
|
||||
|
||||
vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
|
||||
return vm;
|
||||
}
|
||||
|
||||
static void run_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct ucall uc;
|
||||
|
||||
do {
|
||||
vcpu_run(vcpu);
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_SYNC:
|
||||
break;
|
||||
case UCALL_ABORT:
|
||||
REPORT_GUEST_ASSERT(uc);
|
||||
break;
|
||||
case UCALL_PRINTF:
|
||||
pr_info("%s", uc.buffer);
|
||||
break;
|
||||
case UCALL_DONE:
|
||||
break;
|
||||
default:
|
||||
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
|
||||
}
|
||||
} while (uc.cmd != UCALL_DONE);
|
||||
}
|
||||
|
||||
static uint8_t guest_get_pmu_version(void)
|
||||
{
|
||||
/*
|
||||
* Return the effective PMU version, i.e. the minimum between what KVM
|
||||
* supports and what is enumerated to the guest. The host deliberately
|
||||
* advertises a PMU version to the guest beyond what is actually
|
||||
* supported by KVM to verify KVM doesn't freak out and do something
|
||||
* bizarre with an architecturally valid, but unsupported, version.
|
||||
*/
|
||||
return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
|
||||
}
|
||||
|
||||
/*
|
||||
* If an architectural event is supported and guaranteed to generate at least
|
||||
* one "hit, assert that its count is non-zero. If an event isn't supported or
|
||||
* the test can't guarantee the associated action will occur, then all bets are
|
||||
* off regarding the count, i.e. no checks can be done.
|
||||
*
|
||||
* Sanity check that in all cases, the event doesn't count when it's disabled,
|
||||
* and that KVM correctly emulates the write of an arbitrary value.
|
||||
*/
|
||||
static void guest_assert_event_count(uint8_t idx,
|
||||
struct kvm_x86_pmu_feature event,
|
||||
uint32_t pmc, uint32_t pmc_msr)
|
||||
{
|
||||
uint64_t count;
|
||||
|
||||
count = _rdpmc(pmc);
|
||||
if (!this_pmu_has(event))
|
||||
goto sanity_checks;
|
||||
|
||||
switch (idx) {
|
||||
case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
|
||||
GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
|
||||
break;
|
||||
case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
|
||||
GUEST_ASSERT_EQ(count, NUM_BRANCHES);
|
||||
break;
|
||||
case INTEL_ARCH_LLC_REFERENCES_INDEX:
|
||||
case INTEL_ARCH_LLC_MISSES_INDEX:
|
||||
if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
|
||||
!this_cpu_has(X86_FEATURE_CLFLUSH))
|
||||
break;
|
||||
fallthrough;
|
||||
case INTEL_ARCH_CPU_CYCLES_INDEX:
|
||||
case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
|
||||
GUEST_ASSERT_NE(count, 0);
|
||||
break;
|
||||
case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
|
||||
GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
sanity_checks:
|
||||
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
|
||||
GUEST_ASSERT_EQ(_rdpmc(pmc), count);
|
||||
|
||||
wrmsr(pmc_msr, 0xdead);
|
||||
GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable and disable the PMC in a monolithic asm blob to ensure that the
|
||||
* compiler can't insert _any_ code into the measured sequence. Note, ECX
|
||||
* doesn't need to be clobbered as the input value, @pmc_msr, is restored
|
||||
* before the end of the sequence.
|
||||
*
|
||||
* If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
|
||||
* start of the loop to force LLC references and misses, i.e. to allow testing
|
||||
* that those events actually count.
|
||||
*
|
||||
* If forced emulation is enabled (and specified), force emulation on a subset
|
||||
* of the measured code to verify that KVM correctly emulates instructions and
|
||||
* branches retired events in conjunction with hardware also counting said
|
||||
* events.
|
||||
*/
|
||||
#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \
|
||||
do { \
|
||||
__asm__ __volatile__("wrmsr\n\t" \
|
||||
clflush "\n\t" \
|
||||
"mfence\n\t" \
|
||||
"1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \
|
||||
FEP "loop .\n\t" \
|
||||
FEP "mov %%edi, %%ecx\n\t" \
|
||||
FEP "xor %%eax, %%eax\n\t" \
|
||||
FEP "xor %%edx, %%edx\n\t" \
|
||||
"wrmsr\n\t" \
|
||||
:: "a"((uint32_t)_value), "d"(_value >> 32), \
|
||||
"c"(_msr), "D"(_msr) \
|
||||
); \
|
||||
} while (0)
|
||||
|
||||
#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
|
||||
do { \
|
||||
wrmsr(pmc_msr, 0); \
|
||||
\
|
||||
if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
|
||||
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP); \
|
||||
else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
|
||||
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP); \
|
||||
else \
|
||||
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
|
||||
\
|
||||
guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \
|
||||
} while (0)
|
||||
|
||||
static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
|
||||
uint32_t pmc, uint32_t pmc_msr,
|
||||
uint32_t ctrl_msr, uint64_t ctrl_msr_value)
|
||||
{
|
||||
GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
|
||||
|
||||
if (is_forced_emulation_enabled)
|
||||
GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
|
||||
}
|
||||
|
||||
#define X86_PMU_FEATURE_NULL \
|
||||
({ \
|
||||
struct kvm_x86_pmu_feature feature = {}; \
|
||||
\
|
||||
feature; \
|
||||
})
|
||||
|
||||
static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
|
||||
{
|
||||
return !(*(u64 *)&event);
|
||||
}
|
||||
|
||||
static void guest_test_arch_event(uint8_t idx)
|
||||
{
|
||||
const struct {
|
||||
struct kvm_x86_pmu_feature gp_event;
|
||||
struct kvm_x86_pmu_feature fixed_event;
|
||||
} intel_event_to_feature[] = {
|
||||
[INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
|
||||
[INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
|
||||
/*
|
||||
* Note, the fixed counter for reference cycles is NOT the same
|
||||
* as the general purpose architectural event. The fixed counter
|
||||
* explicitly counts at the same frequency as the TSC, whereas
|
||||
* the GP event counts at a fixed, but uarch specific, frequency.
|
||||
* Bundle them here for simplicity.
|
||||
*/
|
||||
[INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
|
||||
[INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
|
||||
[INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
|
||||
[INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
|
||||
[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
|
||||
[INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
|
||||
};
|
||||
|
||||
uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
|
||||
uint32_t pmu_version = guest_get_pmu_version();
|
||||
/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
|
||||
bool guest_has_perf_global_ctrl = pmu_version >= 2;
|
||||
struct kvm_x86_pmu_feature gp_event, fixed_event;
|
||||
uint32_t base_pmc_msr;
|
||||
unsigned int i;
|
||||
|
||||
/* The host side shouldn't invoke this without a guest PMU. */
|
||||
GUEST_ASSERT(pmu_version);
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_PDCM) &&
|
||||
rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
|
||||
base_pmc_msr = MSR_IA32_PMC0;
|
||||
else
|
||||
base_pmc_msr = MSR_IA32_PERFCTR0;
|
||||
|
||||
gp_event = intel_event_to_feature[idx].gp_event;
|
||||
GUEST_ASSERT_EQ(idx, gp_event.f.bit);
|
||||
|
||||
GUEST_ASSERT(nr_gp_counters);
|
||||
|
||||
for (i = 0; i < nr_gp_counters; i++) {
|
||||
uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
|
||||
ARCH_PERFMON_EVENTSEL_ENABLE |
|
||||
intel_pmu_arch_events[idx];
|
||||
|
||||
wrmsr(MSR_P6_EVNTSEL0 + i, 0);
|
||||
if (guest_has_perf_global_ctrl)
|
||||
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
|
||||
|
||||
__guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
|
||||
MSR_P6_EVNTSEL0 + i, eventsel);
|
||||
}
|
||||
|
||||
if (!guest_has_perf_global_ctrl)
|
||||
return;
|
||||
|
||||
fixed_event = intel_event_to_feature[idx].fixed_event;
|
||||
if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
|
||||
return;
|
||||
|
||||
i = fixed_event.f.bit;
|
||||
|
||||
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
|
||||
|
||||
__guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
|
||||
MSR_CORE_PERF_FIXED_CTR0 + i,
|
||||
MSR_CORE_PERF_GLOBAL_CTRL,
|
||||
FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
|
||||
}
|
||||
|
||||
static void guest_test_arch_events(void)
|
||||
{
|
||||
uint8_t i;
|
||||
|
||||
for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
|
||||
guest_test_arch_event(i);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
|
||||
uint8_t length, uint8_t unavailable_mask)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vm *vm;
|
||||
|
||||
/* Testing arch events requires a vPMU (there are no negative tests). */
|
||||
if (!pmu_version)
|
||||
return;
|
||||
|
||||
vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
|
||||
pmu_version, perf_capabilities);
|
||||
|
||||
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
|
||||
length);
|
||||
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
|
||||
unavailable_mask);
|
||||
|
||||
run_vcpu(vcpu);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
/*
|
||||
* Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs
|
||||
* that aren't defined counter MSRs *probably* don't exist, but there's no
|
||||
* guarantee that currently undefined MSR indices won't be used for something
|
||||
* other than PMCs in the future.
|
||||
*/
|
||||
#define MAX_NR_GP_COUNTERS 8
|
||||
#define MAX_NR_FIXED_COUNTERS 3
|
||||
|
||||
#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
|
||||
__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
|
||||
"Expected %s on " #insn "(0x%x), got vector %u", \
|
||||
expect_gp ? "#GP" : "no fault", msr, vector) \
|
||||
|
||||
#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
|
||||
__GUEST_ASSERT(val == expected_val, \
|
||||
"Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
|
||||
msr, expected_val, val);
|
||||
|
||||
static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
|
||||
uint64_t expected_val)
|
||||
{
|
||||
uint8_t vector;
|
||||
uint64_t val;
|
||||
|
||||
vector = rdpmc_safe(rdpmc_idx, &val);
|
||||
GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
|
||||
if (expect_success)
|
||||
GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
|
||||
|
||||
if (!is_forced_emulation_enabled)
|
||||
return;
|
||||
|
||||
vector = rdpmc_safe_fep(rdpmc_idx, &val);
|
||||
GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
|
||||
if (expect_success)
|
||||
GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
|
||||
}
|
||||
|
||||
static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
|
||||
uint8_t nr_counters, uint32_t or_mask)
|
||||
{
|
||||
const bool pmu_has_fast_mode = !guest_get_pmu_version();
|
||||
uint8_t i;
|
||||
|
||||
for (i = 0; i < nr_possible_counters; i++) {
|
||||
/*
|
||||
* TODO: Test a value that validates full-width writes and the
|
||||
* width of the counters.
|
||||
*/
|
||||
const uint64_t test_val = 0xffff;
|
||||
const uint32_t msr = base_msr + i;
|
||||
|
||||
/*
|
||||
* Fixed counters are supported if the counter is less than the
|
||||
* number of enumerated contiguous counters *or* the counter is
|
||||
* explicitly enumerated in the supported counters mask.
|
||||
*/
|
||||
const bool expect_success = i < nr_counters || (or_mask & BIT(i));
|
||||
|
||||
/*
|
||||
* KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
|
||||
* unsupported, i.e. doesn't #GP and reads back '0'.
|
||||
*/
|
||||
const uint64_t expected_val = expect_success ? test_val : 0;
|
||||
const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
|
||||
msr != MSR_P6_PERFCTR1;
|
||||
uint32_t rdpmc_idx;
|
||||
uint8_t vector;
|
||||
uint64_t val;
|
||||
|
||||
vector = wrmsr_safe(msr, test_val);
|
||||
GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
|
||||
|
||||
vector = rdmsr_safe(msr, &val);
|
||||
GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
|
||||
|
||||
/* On #GP, the result of RDMSR is undefined. */
|
||||
if (!expect_gp)
|
||||
GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
|
||||
|
||||
/*
|
||||
* Redo the read tests with RDPMC, which has different indexing
|
||||
* semantics and additional capabilities.
|
||||
*/
|
||||
rdpmc_idx = i;
|
||||
if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
|
||||
rdpmc_idx |= INTEL_RDPMC_FIXED;
|
||||
|
||||
guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
|
||||
|
||||
/*
|
||||
* KVM doesn't support non-architectural PMUs, i.e. it should
|
||||
* impossible to have fast mode RDPMC. Verify that attempting
|
||||
* to use fast RDPMC always #GPs.
|
||||
*/
|
||||
GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
|
||||
rdpmc_idx |= INTEL_RDPMC_FAST;
|
||||
guest_test_rdpmc(rdpmc_idx, false, -1ull);
|
||||
|
||||
vector = wrmsr_safe(msr, 0);
|
||||
GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
|
||||
}
|
||||
}
|
||||
|
||||
static void guest_test_gp_counters(void)
|
||||
{
|
||||
uint8_t nr_gp_counters = 0;
|
||||
uint32_t base_msr;
|
||||
|
||||
if (guest_get_pmu_version())
|
||||
nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_PDCM) &&
|
||||
rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
|
||||
base_msr = MSR_IA32_PMC0;
|
||||
else
|
||||
base_msr = MSR_IA32_PERFCTR0;
|
||||
|
||||
guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
|
||||
uint8_t nr_gp_counters)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vm *vm;
|
||||
|
||||
vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
|
||||
pmu_version, perf_capabilities);
|
||||
|
||||
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
|
||||
nr_gp_counters);
|
||||
|
||||
run_vcpu(vcpu);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void guest_test_fixed_counters(void)
|
||||
{
|
||||
uint64_t supported_bitmask = 0;
|
||||
uint8_t nr_fixed_counters = 0;
|
||||
uint8_t i;
|
||||
|
||||
/* Fixed counters require Architectural vPMU Version 2+. */
|
||||
if (guest_get_pmu_version() >= 2)
|
||||
nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
|
||||
|
||||
/*
|
||||
* The supported bitmask for fixed counters was introduced in PMU
|
||||
* version 5.
|
||||
*/
|
||||
if (guest_get_pmu_version() >= 5)
|
||||
supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
|
||||
|
||||
guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
|
||||
nr_fixed_counters, supported_bitmask);
|
||||
|
||||
for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
|
||||
uint8_t vector;
|
||||
uint64_t val;
|
||||
|
||||
if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
|
||||
vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
|
||||
FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
|
||||
__GUEST_ASSERT(vector == GP_VECTOR,
|
||||
"Expected #GP for counter %u in FIXED_CTR_CTRL", i);
|
||||
|
||||
vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
|
||||
FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
|
||||
__GUEST_ASSERT(vector == GP_VECTOR,
|
||||
"Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
|
||||
continue;
|
||||
}
|
||||
|
||||
wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
|
||||
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
|
||||
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
|
||||
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
|
||||
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
|
||||
val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
|
||||
|
||||
GUEST_ASSERT_NE(val, 0);
|
||||
}
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
|
||||
uint8_t nr_fixed_counters,
|
||||
uint32_t supported_bitmask)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vm *vm;
|
||||
|
||||
vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
|
||||
pmu_version, perf_capabilities);
|
||||
|
||||
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
|
||||
supported_bitmask);
|
||||
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
|
||||
nr_fixed_counters);
|
||||
|
||||
run_vcpu(vcpu);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void test_intel_counters(void)
|
||||
{
|
||||
uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
|
||||
uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
|
||||
uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
|
||||
uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
|
||||
unsigned int i;
|
||||
uint8_t v, j;
|
||||
uint32_t k;
|
||||
|
||||
const uint64_t perf_caps[] = {
|
||||
0,
|
||||
PMU_CAP_FW_WRITES,
|
||||
};
|
||||
|
||||
/*
|
||||
* Test up to PMU v5, which is the current maximum version defined by
|
||||
* Intel, i.e. is the last version that is guaranteed to be backwards
|
||||
* compatible with KVM's existing behavior.
|
||||
*/
|
||||
uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
|
||||
|
||||
/*
|
||||
* Detect the existence of events that aren't supported by selftests.
|
||||
* This will (obviously) fail any time the kernel adds support for a
|
||||
* new event, but it's worth paying that price to keep the test fresh.
|
||||
*/
|
||||
TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
|
||||
"New architectural event(s) detected; please update this test (length = %u, mask = %x)",
|
||||
nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
|
||||
|
||||
/*
|
||||
* Force iterating over known arch events regardless of whether or not
|
||||
* KVM/hardware supports a given event.
|
||||
*/
|
||||
nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
|
||||
|
||||
for (v = 0; v <= max_pmu_version; v++) {
|
||||
for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
|
||||
if (!kvm_has_perf_caps && perf_caps[i])
|
||||
continue;
|
||||
|
||||
pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
|
||||
v, perf_caps[i]);
|
||||
/*
|
||||
* To keep the total runtime reasonable, test every
|
||||
* possible non-zero, non-reserved bitmap combination
|
||||
* only with the native PMU version and the full bit
|
||||
* vector length.
|
||||
*/
|
||||
if (v == pmu_version) {
|
||||
for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
|
||||
test_arch_events(v, perf_caps[i], nr_arch_events, k);
|
||||
}
|
||||
/*
|
||||
* Test single bits for all PMU version and lengths up
|
||||
* the number of events +1 (to verify KVM doesn't do
|
||||
* weird things if the guest length is greater than the
|
||||
* host length). Explicitly test a mask of '0' and all
|
||||
* ones i.e. all events being available and unavailable.
|
||||
*/
|
||||
for (j = 0; j <= nr_arch_events + 1; j++) {
|
||||
test_arch_events(v, perf_caps[i], j, 0);
|
||||
test_arch_events(v, perf_caps[i], j, 0xff);
|
||||
|
||||
for (k = 0; k < nr_arch_events; k++)
|
||||
test_arch_events(v, perf_caps[i], j, BIT(k));
|
||||
}
|
||||
|
||||
pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
|
||||
v, perf_caps[i]);
|
||||
for (j = 0; j <= nr_gp_counters; j++)
|
||||
test_gp_counters(v, perf_caps[i], j);
|
||||
|
||||
pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
|
||||
v, perf_caps[i]);
|
||||
for (j = 0; j <= nr_fixed_counters; j++) {
|
||||
for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
|
||||
test_fixed_counters(v, perf_caps[i], j, k);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
TEST_REQUIRE(kvm_is_pmu_enabled());
|
||||
|
||||
TEST_REQUIRE(host_cpu_is_intel);
|
||||
TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
|
||||
TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
|
||||
|
||||
kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
|
||||
kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
|
||||
is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
|
||||
|
||||
test_intel_counters();
|
||||
|
||||
return 0;
|
||||
}
|
@ -11,72 +11,18 @@
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE /* for program_invocation_short_name */
|
||||
#include "test_util.h"
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "pmu.h"
|
||||
#include "processor.h"
|
||||
|
||||
/*
|
||||
* In lieu of copying perf_event.h into tools...
|
||||
*/
|
||||
#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
|
||||
#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
|
||||
|
||||
/* End of stuff taken from perf_event.h. */
|
||||
|
||||
/* Oddly, this isn't in perf_event.h. */
|
||||
#define ARCH_PERFMON_BRANCHES_RETIRED 5
|
||||
#include "test_util.h"
|
||||
|
||||
#define NUM_BRANCHES 42
|
||||
#define INTEL_PMC_IDX_FIXED 32
|
||||
|
||||
/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
|
||||
#define MAX_FILTER_EVENTS 300
|
||||
#define MAX_TEST_EVENTS 10
|
||||
|
||||
#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1)
|
||||
#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
|
||||
#define PMU_EVENT_FILTER_INVALID_NEVENTS (MAX_FILTER_EVENTS + 1)
|
||||
|
||||
/*
|
||||
* This is how the event selector and unit mask are stored in an AMD
|
||||
* core performance event-select register. Intel's format is similar,
|
||||
* but the event selector is only 8 bits.
|
||||
*/
|
||||
#define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
|
||||
(umask & 0xff) << 8)
|
||||
|
||||
/*
|
||||
* "Branch instructions retired", from the Intel SDM, volume 3,
|
||||
* "Pre-defined Architectural Performance Events."
|
||||
*/
|
||||
|
||||
#define INTEL_BR_RETIRED EVENT(0xc4, 0)
|
||||
|
||||
/*
|
||||
* "Retired branch instructions", from Processor Programming Reference
|
||||
* (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
|
||||
* Preliminary Processor Programming Reference (PPR) for AMD Family
|
||||
* 17h Model 31h, Revision B0 Processors, and Preliminary Processor
|
||||
* Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
|
||||
* B1 Processors Volume 1 of 2.
|
||||
*/
|
||||
|
||||
#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
|
||||
|
||||
|
||||
/*
|
||||
* "Retired instructions", from Processor Programming Reference
|
||||
* (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
|
||||
* Preliminary Processor Programming Reference (PPR) for AMD Family
|
||||
* 17h Model 31h, Revision B0 Processors, and Preliminary Processor
|
||||
* Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
|
||||
* B1 Processors Volume 1 of 2.
|
||||
* --- and ---
|
||||
* "Instructions retired", from the Intel SDM, volume 3,
|
||||
* "Pre-defined Architectural Performance Events."
|
||||
*/
|
||||
|
||||
#define INST_RETIRED EVENT(0xc0, 0)
|
||||
#define PMU_EVENT_FILTER_INVALID_NEVENTS (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
|
||||
|
||||
struct __kvm_pmu_event_filter {
|
||||
__u32 action;
|
||||
@ -84,26 +30,28 @@ struct __kvm_pmu_event_filter {
|
||||
__u32 fixed_counter_bitmap;
|
||||
__u32 flags;
|
||||
__u32 pad[4];
|
||||
__u64 events[MAX_FILTER_EVENTS];
|
||||
__u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
|
||||
};
|
||||
|
||||
/*
|
||||
* This event list comprises Intel's eight architectural events plus
|
||||
* AMD's "retired branch instructions" for Zen[123] (and possibly
|
||||
* other AMD CPUs).
|
||||
* This event list comprises Intel's known architectural events, plus AMD's
|
||||
* "retired branch instructions" for Zen1-Zen3 (and* possibly other AMD CPUs).
|
||||
* Note, AMD and Intel use the same encoding for instructions retired.
|
||||
*/
|
||||
kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
|
||||
|
||||
static const struct __kvm_pmu_event_filter base_event_filter = {
|
||||
.nevents = ARRAY_SIZE(base_event_filter.events),
|
||||
.events = {
|
||||
EVENT(0x3c, 0),
|
||||
INST_RETIRED,
|
||||
EVENT(0x3c, 1),
|
||||
EVENT(0x2e, 0x4f),
|
||||
EVENT(0x2e, 0x41),
|
||||
EVENT(0xc4, 0),
|
||||
EVENT(0xc5, 0),
|
||||
EVENT(0xa4, 1),
|
||||
AMD_ZEN_BR_RETIRED,
|
||||
INTEL_ARCH_CPU_CYCLES,
|
||||
INTEL_ARCH_INSTRUCTIONS_RETIRED,
|
||||
INTEL_ARCH_REFERENCE_CYCLES,
|
||||
INTEL_ARCH_LLC_REFERENCES,
|
||||
INTEL_ARCH_LLC_MISSES,
|
||||
INTEL_ARCH_BRANCHES_RETIRED,
|
||||
INTEL_ARCH_BRANCHES_MISPREDICTED,
|
||||
INTEL_ARCH_TOPDOWN_SLOTS,
|
||||
AMD_ZEN_BRANCHES_RETIRED,
|
||||
},
|
||||
};
|
||||
|
||||
@ -165,9 +113,9 @@ static void intel_guest_code(void)
|
||||
for (;;) {
|
||||
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
|
||||
wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
|
||||
ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
|
||||
ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
|
||||
wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
|
||||
ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
|
||||
ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
|
||||
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
|
||||
|
||||
run_and_measure_loop(MSR_IA32_PMC0);
|
||||
@ -189,9 +137,9 @@ static void amd_guest_code(void)
|
||||
for (;;) {
|
||||
wrmsr(MSR_K7_EVNTSEL0, 0);
|
||||
wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
|
||||
ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
|
||||
ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
|
||||
wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
|
||||
ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
|
||||
ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
|
||||
|
||||
run_and_measure_loop(MSR_K7_PERFCTR0);
|
||||
GUEST_SYNC(0);
|
||||
@ -312,7 +260,7 @@ static void test_amd_deny_list(struct kvm_vcpu *vcpu)
|
||||
.action = KVM_PMU_EVENT_DENY,
|
||||
.nevents = 1,
|
||||
.events = {
|
||||
EVENT(0x1C2, 0),
|
||||
RAW_EVENT(0x1C2, 0),
|
||||
},
|
||||
};
|
||||
|
||||
@ -347,9 +295,9 @@ static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
|
||||
|
||||
f.action = KVM_PMU_EVENT_DENY;
|
||||
|
||||
remove_event(&f, INST_RETIRED);
|
||||
remove_event(&f, INTEL_BR_RETIRED);
|
||||
remove_event(&f, AMD_ZEN_BR_RETIRED);
|
||||
remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
|
||||
remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
|
||||
remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
|
||||
test_with_filter(vcpu, &f);
|
||||
|
||||
ASSERT_PMC_COUNTING_INSTRUCTIONS();
|
||||
@ -361,9 +309,9 @@ static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
|
||||
|
||||
f.action = KVM_PMU_EVENT_ALLOW;
|
||||
|
||||
remove_event(&f, INST_RETIRED);
|
||||
remove_event(&f, INTEL_BR_RETIRED);
|
||||
remove_event(&f, AMD_ZEN_BR_RETIRED);
|
||||
remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
|
||||
remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
|
||||
remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
|
||||
test_with_filter(vcpu, &f);
|
||||
|
||||
ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
|
||||
@ -452,9 +400,9 @@ static bool use_amd_pmu(void)
|
||||
* - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
|
||||
*/
|
||||
#define MEM_INST_RETIRED 0xD0
|
||||
#define MEM_INST_RETIRED_LOAD EVENT(MEM_INST_RETIRED, 0x81)
|
||||
#define MEM_INST_RETIRED_STORE EVENT(MEM_INST_RETIRED, 0x82)
|
||||
#define MEM_INST_RETIRED_LOAD_STORE EVENT(MEM_INST_RETIRED, 0x83)
|
||||
#define MEM_INST_RETIRED_LOAD RAW_EVENT(MEM_INST_RETIRED, 0x81)
|
||||
#define MEM_INST_RETIRED_STORE RAW_EVENT(MEM_INST_RETIRED, 0x82)
|
||||
#define MEM_INST_RETIRED_LOAD_STORE RAW_EVENT(MEM_INST_RETIRED, 0x83)
|
||||
|
||||
static bool supports_event_mem_inst_retired(void)
|
||||
{
|
||||
@ -486,9 +434,9 @@ static bool supports_event_mem_inst_retired(void)
|
||||
* B1 Processors Volume 1 of 2.
|
||||
*/
|
||||
#define LS_DISPATCH 0x29
|
||||
#define LS_DISPATCH_LOAD EVENT(LS_DISPATCH, BIT(0))
|
||||
#define LS_DISPATCH_STORE EVENT(LS_DISPATCH, BIT(1))
|
||||
#define LS_DISPATCH_LOAD_STORE EVENT(LS_DISPATCH, BIT(2))
|
||||
#define LS_DISPATCH_LOAD RAW_EVENT(LS_DISPATCH, BIT(0))
|
||||
#define LS_DISPATCH_STORE RAW_EVENT(LS_DISPATCH, BIT(1))
|
||||
#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
|
||||
|
||||
#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
|
||||
KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
|
||||
@ -729,14 +677,14 @@ static void add_dummy_events(uint64_t *events, int nevents)
|
||||
|
||||
static void test_masked_events(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int nevents = MAX_FILTER_EVENTS - MAX_TEST_EVENTS;
|
||||
uint64_t events[MAX_FILTER_EVENTS];
|
||||
int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
|
||||
uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
|
||||
|
||||
/* Run the test cases against a sparse PMU event filter. */
|
||||
run_masked_events_tests(vcpu, events, 0);
|
||||
|
||||
/* Run the test cases against a dense PMU event filter. */
|
||||
add_dummy_events(events, MAX_FILTER_EVENTS);
|
||||
add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
|
||||
run_masked_events_tests(vcpu, events, nevents);
|
||||
}
|
||||
|
||||
@ -809,20 +757,19 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu)
|
||||
TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
|
||||
}
|
||||
|
||||
static void intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx)
|
||||
static void intel_run_fixed_counter_guest_code(uint8_t idx)
|
||||
{
|
||||
for (;;) {
|
||||
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
|
||||
wrmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx, 0);
|
||||
wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
|
||||
|
||||
/* Only OS_EN bit is enabled for fixed counter[idx]. */
|
||||
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * fixed_ctr_idx));
|
||||
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL,
|
||||
BIT_ULL(INTEL_PMC_IDX_FIXED + fixed_ctr_idx));
|
||||
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
|
||||
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
|
||||
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
|
||||
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
|
||||
|
||||
GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx));
|
||||
GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
|
||||
}
|
||||
}
|
||||
|
||||
@ -920,7 +867,7 @@ int main(int argc, char *argv[])
|
||||
struct kvm_vcpu *vcpu, *vcpu2 = NULL;
|
||||
struct kvm_vm *vm;
|
||||
|
||||
TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
|
||||
TEST_REQUIRE(kvm_is_pmu_enabled());
|
||||
TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
|
||||
TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
|
||||
|
||||
|
@ -63,7 +63,7 @@ int main(int argc, char *argv[])
|
||||
vm_init_descriptor_tables(vm);
|
||||
vcpu_init_descriptor_tables(vcpu);
|
||||
|
||||
vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR);
|
||||
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
|
||||
|
||||
rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
|
||||
TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
|
||||
|
@ -13,10 +13,7 @@
|
||||
#include "kvm_util.h"
|
||||
#include "vmx.h"
|
||||
|
||||
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
|
||||
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
|
||||
#define KVM_FEP_LENGTH 5
|
||||
static int fep_available = 1;
|
||||
static bool fep_available;
|
||||
|
||||
#define MSR_NON_EXISTENT 0x474f4f00
|
||||
|
||||
@ -261,13 +258,6 @@ static void guest_code_filter_allow(void)
|
||||
GUEST_ASSERT(data == 2);
|
||||
GUEST_ASSERT(guest_exception_count == 0);
|
||||
|
||||
/*
|
||||
* Test to see if the instruction emulator is available (ie: the module
|
||||
* parameter 'kvm.force_emulation_prefix=1' is set). This instruction
|
||||
* will #UD if it isn't available.
|
||||
*/
|
||||
__asm__ __volatile__(KVM_FEP "nop");
|
||||
|
||||
if (fep_available) {
|
||||
/* Let userspace know we aren't done. */
|
||||
GUEST_SYNC(0);
|
||||
@ -389,12 +379,6 @@ static void guest_fep_gp_handler(struct ex_regs *regs)
|
||||
&em_wrmsr_start, &em_wrmsr_end);
|
||||
}
|
||||
|
||||
static void guest_ud_handler(struct ex_regs *regs)
|
||||
{
|
||||
fep_available = 0;
|
||||
regs->rip += KVM_FEP_LENGTH;
|
||||
}
|
||||
|
||||
static void check_for_guest_assert(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct ucall uc;
|
||||
@ -533,8 +517,11 @@ KVM_ONE_VCPU_TEST_SUITE(user_msr);
|
||||
KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
|
||||
{
|
||||
struct kvm_vm *vm = vcpu->vm;
|
||||
uint64_t cmd;
|
||||
int rc;
|
||||
|
||||
sync_global_to_guest(vm, fep_available);
|
||||
|
||||
rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
|
||||
TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
|
||||
vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
|
||||
@ -561,11 +548,11 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
|
||||
run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
|
||||
run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
|
||||
|
||||
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
|
||||
vcpu_run(vcpu);
|
||||
vm_install_exception_handler(vm, UD_VECTOR, NULL);
|
||||
cmd = process_ucall(vcpu);
|
||||
|
||||
if (process_ucall(vcpu) != UCALL_DONE) {
|
||||
if (fep_available) {
|
||||
TEST_ASSERT_EQ(cmd, UCALL_SYNC);
|
||||
vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
|
||||
|
||||
/* Process emulated rdmsr and wrmsr instructions. */
|
||||
@ -583,6 +570,7 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
|
||||
/* Confirm the guest completed without issues. */
|
||||
run_guest_then_process_ucall_done(vcpu);
|
||||
} else {
|
||||
TEST_ASSERT_EQ(cmd, UCALL_DONE);
|
||||
printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
|
||||
}
|
||||
}
|
||||
@ -786,5 +774,7 @@ KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
fep_available = kvm_is_forced_emulation_enabled();
|
||||
|
||||
return test_harness_run(argc, argv);
|
||||
}
|
||||
|
@ -213,7 +213,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
|
||||
TEST_REQUIRE(kvm_is_pmu_enabled());
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
|
||||
|
||||
TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
|
||||
|
Loading…
Reference in New Issue
Block a user