This commit is contained in:
Stephen Rothwell 2024-12-20 13:58:02 +11:00
commit 4942c1f9b5
44 changed files with 1398 additions and 857 deletions

View File

@ -1825,15 +1825,18 @@ emulate them efficiently. The fields in each entry are defined as follows:
the values returned by the cpuid instruction for
this function/index combination
The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
support. Instead it is reported via::
x2APIC (CPUID leaf 1, ecx[21) and TSC deadline timer (CPUID leaf 1, ecx[24])
may be returned as true, but they depend on KVM_CREATE_IRQCHIP for in-kernel
emulation of the local APIC. TSC deadline timer support is also reported via::
ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
Enabling x2APIC in KVM_SET_CPUID2 requires KVM_CREATE_IRQCHIP as KVM doesn't
support forwarding x2APIC MSR accesses to userspace, i.e. KVM does not support
emulating x2APIC in userspace.
4.47 KVM_PPC_GET_PVINFO
-----------------------
@ -7670,6 +7673,7 @@ branch to guests' 0x200 interrupt vector.
:Architectures: x86
:Parameters: args[0] defines which exits are disabled
:Returns: 0 on success, -EINVAL when args[0] contains invalid exits
or if any vCPUs have already been created
Valid bits in args[0] are::

View File

@ -454,6 +454,7 @@
#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */

View File

@ -83,7 +83,6 @@ KVM_X86_OP(enable_nmi_window)
KVM_X86_OP(enable_irq_window)
KVM_X86_OP_OPTIONAL(update_cr8_intercept)
KVM_X86_OP(refresh_apicv_exec_ctrl)
KVM_X86_OP_OPTIONAL(hwapic_irr_update)
KVM_X86_OP_OPTIONAL(hwapic_isr_update)
KVM_X86_OP_OPTIONAL(load_eoi_exitmap)
KVM_X86_OP_OPTIONAL(set_virtual_apic_mode)
@ -100,6 +99,7 @@ KVM_X86_OP(get_l2_tsc_multiplier)
KVM_X86_OP(write_tsc_offset)
KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
KVM_X86_OP(get_entry_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)

View File

@ -739,6 +739,23 @@ struct kvm_queued_exception {
bool has_payload;
};
/*
* Hardware-defined CPUID leafs that are either scattered by the kernel or are
* unknown to the kernel, but need to be directly used by KVM. Note, these
* word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
*/
enum kvm_only_cpuid_leafs {
CPUID_12_EAX = NCAPINTS,
CPUID_7_1_EDX,
CPUID_8000_0007_EDX,
CPUID_8000_0022_EAX,
CPUID_7_2_EDX,
CPUID_24_0_EBX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
};
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@ -854,27 +871,23 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
struct kvm_hypervisor_cpuid kvm_cpuid;
bool is_amd_compatible;
/*
* FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
* when "struct kvm_vcpu_arch" is no longer defined in an
* arch/x86/include/asm header. The max is mostly arbitrary, i.e.
* can be increased as necessary.
* cpu_caps holds the effective guest capabilities, i.e. the features
* the vCPU is allowed to use. Typically, but not always, features can
* be used by the guest if and only if both KVM and userspace want to
* expose the feature to the guest.
*
* A common exception is for virtualization holes, i.e. when KVM can't
* prevent the guest from using a feature, in which case the vCPU "has"
* the feature regardless of what KVM or userspace desires.
*
* Note, features that don't require KVM involvement in any way are
* NOT enforced/sanitized by KVM, i.e. are taken verbatim from the
* guest CPUID provided by userspace.
*/
#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
/*
* Track whether or not the guest is allowed to use features that are
* governed by KVM, where "governed" means KVM needs to manage state
* and/or explicitly enable the feature in hardware. Typically, but
* not always, governed features can be used by the guest if and only
* if both KVM and userspace want to expose the feature to the guest.
*/
struct {
DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
} governed_features;
u32 cpu_caps[NR_KVM_CPU_CAPS];
u64 reserved_gpa_bits;
int maxphyaddr;
@ -1734,8 +1747,7 @@ struct kvm_x86_ops {
const unsigned long required_apicv_inhibits;
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(int isr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
@ -1757,12 +1769,15 @@ struct kvm_x86_ops {
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
* Retrieve somewhat arbitrary exit information. Intended to
* Retrieve somewhat arbitrary exit/entry information. Intended to
* be used only from within tracepoints or error paths.
*/
void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2,
u32 *exit_int_info, u32 *exit_int_info_err_code);
u32 *intr_info, u32 *error_code);
void (*get_entry_info)(struct kvm_vcpu *vcpu,
u32 *intr_info, u32 *error_code);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
@ -2019,8 +2034,8 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
* VMware backdoor emulation handles select instructions
* and reinjects the #GP for all other cases.
*
* EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
* case the CR2/GPA value pass on the stack is valid.
* EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case
* the CR2/GPA value pass on the stack is valid.
*
* EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
* state and inject single-step #DBs after skipping
@ -2055,6 +2070,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
static inline bool kvm_can_emulate_event_vectoring(int emul_type)
{
return !(emul_type & EMULTYPE_PF);
}
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
@ -2062,6 +2082,8 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
u64 *data, u8 ndata);
void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);

File diff suppressed because it is too large Load Diff

View File

@ -10,8 +10,8 @@
extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
void kvm_set_cpu_caps(void);
void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
u32 function, u32 index);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
@ -67,41 +67,40 @@ static __always_inline void cpuid_entry_override(struct kvm_cpuid_entry2 *entry,
*reg = kvm_cpu_caps[leaf];
}
static __always_inline u32 *guest_cpuid_get_register(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
{
const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
struct kvm_cpuid_entry2 *entry;
u32 *reg;
/*
* XSAVES is a special snowflake. Due to lack of a dedicated intercept
* on SVM, KVM must assume that XSAVES (and thus XRSTORS) is usable by
* the guest if the host supports XSAVES and *XSAVE* is exposed to the
* guest. Because the guest can execute XSAVES and XRSTORS, i.e. can
* indirectly consume XSS, KVM must ensure XSS is zeroed when running
* the guest, i.e. must set XSAVES in vCPU capabilities. But to reject
* direct XSS reads and writes (to minimize the virtualization hole and
* honor userspace's CPUID), KVM needs to check the raw guest CPUID,
* not KVM's view of guest capabilities.
*
* For all other features, guest capabilities are accurate. Expand
* this allowlist with extreme vigilance.
*/
BUILD_BUG_ON(x86_feature != X86_FEATURE_XSAVES);
entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index);
if (!entry)
return NULL;
return __cpuid_entry_get_reg(entry, cpuid.reg);
}
static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
{
u32 *reg;
reg = guest_cpuid_get_register(vcpu, x86_feature);
reg = __cpuid_entry_get_reg(entry, cpuid.reg);
if (!reg)
return false;
return *reg & __feature_bit(x86_feature);
}
static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
{
u32 *reg;
reg = guest_cpuid_get_register(vcpu, x86_feature);
if (reg)
*reg &= ~__feature_bit(x86_feature);
}
static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
{
return vcpu->arch.is_amd_compatible;
@ -150,21 +149,6 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
return x86_stepping(best->eax);
}
static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu)
{
return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) ||
guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) ||
guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD));
}
static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
{
return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB) ||
guest_cpuid_has(vcpu, X86_FEATURE_SBPB));
}
static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
{
return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
@ -180,7 +164,6 @@ static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
reverse_cpuid_check(x86_leaf);
kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
}
@ -188,7 +171,6 @@ static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
reverse_cpuid_check(x86_leaf);
kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
}
@ -196,7 +178,6 @@ static __always_inline u32 kvm_cpu_cap_get(unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
reverse_cpuid_check(x86_leaf);
return kvm_cpu_caps[x86_leaf] & __feature_bit(x86_feature);
}
@ -220,58 +201,61 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
}
enum kvm_governed_features {
#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x,
#include "governed_features.h"
KVM_NR_GOVERNED_FEATURES
};
static __always_inline int kvm_governed_feature_index(unsigned int x86_feature)
static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
{
switch (x86_feature) {
#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x;
#include "governed_features.h"
default:
return -1;
}
unsigned int x86_leaf = __feature_leaf(x86_feature);
vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
}
static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature)
static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
{
return kvm_governed_feature_index(x86_feature) >= 0;
unsigned int x86_leaf = __feature_leaf(x86_feature);
vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
}
static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu,
unsigned int x86_feature,
bool guest_has_cap)
{
BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
__set_bit(kvm_governed_feature_index(x86_feature),
vcpu->arch.governed_features.enabled);
if (guest_has_cap)
guest_cpu_cap_set(vcpu, x86_feature);
else
guest_cpu_cap_clear(vcpu, x86_feature);
}
static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
{
if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature))
kvm_governed_feature_set(vcpu, x86_feature);
}
unsigned int x86_leaf = __feature_leaf(x86_feature);
static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
unsigned int x86_feature)
{
BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
return test_bit(kvm_governed_feature_index(x86_feature),
vcpu->arch.governed_features.enabled);
return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature);
}
static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (guest_can_use(vcpu, X86_FEATURE_LAM))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LAM))
cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
return kvm_vcpu_is_legal_gpa(vcpu, cr3);
}
static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu)
{
return (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_STIBP) ||
guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBRS) ||
guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_SSBD));
}
static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
{
return (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBPB) ||
guest_cpu_cap_has(vcpu, X86_FEATURE_SBPB));
}
#endif

View File

@ -1,22 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(KVM_GOVERNED_FEATURE) || defined(KVM_GOVERNED_X86_FEATURE)
BUILD_BUG()
#endif
#define KVM_GOVERNED_X86_FEATURE(x) KVM_GOVERNED_FEATURE(X86_FEATURE_##x)
KVM_GOVERNED_X86_FEATURE(GBPAGES)
KVM_GOVERNED_X86_FEATURE(XSAVES)
KVM_GOVERNED_X86_FEATURE(VMX)
KVM_GOVERNED_X86_FEATURE(NRIPS)
KVM_GOVERNED_X86_FEATURE(TSCRATEMSR)
KVM_GOVERNED_X86_FEATURE(V_VMSAVE_VMLOAD)
KVM_GOVERNED_X86_FEATURE(LBRV)
KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
KVM_GOVERNED_X86_FEATURE(VGIF)
KVM_GOVERNED_X86_FEATURE(VNMI)
KVM_GOVERNED_X86_FEATURE(LAM)
#undef KVM_GOVERNED_X86_FEATURE
#undef KVM_GOVERNED_FEATURE

View File

@ -1352,7 +1352,7 @@ static void __kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
return;
if (guest_cpuid_has(vcpu, X86_FEATURE_XSAVES) ||
!guest_cpuid_has(vcpu, X86_FEATURE_XSAVEC))
!guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVEC))
return;
pr_notice_ratelimited("Booting SMP Windows KVM VM with !XSAVES && XSAVEC. "

View File

@ -88,6 +88,8 @@ struct x86_instruction_info {
#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */
#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
/* Emulation during event vectoring is unhandleable. */
#define X86EMUL_UNHANDLEABLE_VECTORING 7
/* x86-specific emulation flags */
#define X86EMUL_F_WRITE BIT(0)

View File

@ -598,7 +598,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
* version first and level-triggered interrupts never get EOIed in
* IOAPIC.
*/
if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
if (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) &&
!ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
@ -734,10 +734,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
{
if (unlikely(apic->apicv_active)) {
/* need to update RVI */
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
kvm_x86_call(hwapic_irr_update)(apic->vcpu,
apic_find_highest_irr(apic));
} else {
apic->irr_pending = false;
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
@ -763,7 +760,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
* just set SVI.
*/
if (unlikely(apic->apicv_active))
kvm_x86_call(hwapic_isr_update)(vec);
kvm_x86_call(hwapic_isr_update)(apic->vcpu, vec);
else {
++apic->isr_count;
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@ -808,7 +805,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
* and must be left alone.
*/
if (unlikely(apic->apicv_active))
kvm_x86_call(hwapic_isr_update)(apic_find_highest_isr(apic));
kvm_x86_call(hwapic_isr_update)(apic->vcpu, apic_find_highest_isr(apic));
else {
--apic->isr_count;
BUG_ON(apic->isr_count < 0);
@ -816,6 +813,17 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
}
}
void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active)
return;
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
}
EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
{
/* This may race with setting of irr in __apic_accept_irq() and
@ -2634,7 +2642,7 @@ int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated)
return 0;
u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
(guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
(guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
if ((value & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
return 1;
@ -2805,8 +2813,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
apic_update_ppr(apic);
if (apic->apicv_active) {
kvm_x86_call(apicv_post_state_restore)(vcpu);
kvm_x86_call(hwapic_irr_update)(vcpu, -1);
kvm_x86_call(hwapic_isr_update)(-1);
kvm_x86_call(hwapic_isr_update)(vcpu, -1);
}
vcpu->arch.apic_arb_prio = 0;
@ -3121,9 +3128,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
kvm_apic_update_apicv(vcpu);
if (apic->apicv_active) {
kvm_x86_call(apicv_post_state_restore)(vcpu);
kvm_x86_call(hwapic_irr_update)(vcpu,
apic_find_highest_irr(apic));
kvm_x86_call(hwapic_isr_update)(apic_find_highest_isr(apic));
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
if (ioapic_in_kernel(vcpu->kvm))

View File

@ -118,6 +118,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high);
int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated);
int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);

View File

@ -126,7 +126,7 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu)
{
if (!guest_can_use(vcpu, X86_FEATURE_LAM))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LAM))
return 0;
return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57);

View File

@ -3364,18 +3364,6 @@ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu,
return true;
}
static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
{
if (fault->exec)
return is_executable_pte(spte);
if (fault->write)
return is_writable_pte(spte);
/* Fault was on Read access */
return spte & PT_PRESENT_MASK;
}
/*
* Returns the last level spte pointer of the shadow page walk for the given
* gpa, and sets *spte to the spte value. This spte may be non-preset. If no
@ -5034,7 +5022,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
__reset_rsvds_bits_mask(&context->guest_rsvd_check,
vcpu->arch.reserved_gpa_bits,
context->cpu_role.base.level, is_efer_nx(context),
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES),
is_cr4_pse(context),
guest_cpuid_is_amd_compatible(vcpu));
}
@ -5111,7 +5099,7 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
context->root_role.level,
context->root_role.efer_nx,
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES),
is_pse, is_amd);
if (!shadow_me_mask)

View File

@ -321,6 +321,10 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn);
}
/*
* With retpoline being active an indirect call is rather expensive,
* so do a direct call in the most common case.
*/
if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && fault.is_tdp)
r = kvm_tdp_page_fault(vcpu, &fault);
else

View File

@ -461,6 +461,23 @@ static inline bool is_mmu_writable_spte(u64 spte)
return spte & shadow_mmu_writable_mask;
}
/*
* Returns true if the access indicated by @fault is allowed by the existing
* SPTE protections. Note, the caller is responsible for checking that the
* SPTE is a shadow-present, leaf SPTE (either before or after).
*/
static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
{
if (fault->exec)
return is_executable_pte(spte);
if (fault->write)
return is_writable_pte(spte);
/* Fault was on Read access */
return spte & PT_PRESENT_MASK;
}
/*
* If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for
* write-tracking, remote TLBs must be flushed, even if the SPTE was read-only,

View File

@ -985,6 +985,11 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
if (fault->prefetch && is_shadow_present_pte(iter->old_spte))
return RET_PF_SPURIOUS;
if (is_shadow_present_pte(iter->old_spte) &&
is_access_allowed(fault, iter->old_spte) &&
is_last_spte(iter->old_spte, iter->level))
return RET_PF_SPURIOUS;
if (unlikely(!fault->slot))
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
else

View File

@ -797,7 +797,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
memset(pmu, 0, sizeof(*pmu));
kvm_pmu_call(init)(vcpu);
kvm_pmu_refresh(vcpu);
}
/* Release perf_events for vPMCs that have been unused for a full time slice. */

View File

@ -6,23 +6,6 @@
#include <asm/cpufeature.h>
#include <asm/cpufeatures.h>
/*
* Hardware-defined CPUID leafs that are either scattered by the kernel or are
* unknown to the kernel, but need to be directly used by KVM. Note, these
* word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
*/
enum kvm_only_cpuid_leafs {
CPUID_12_EAX = NCAPINTS,
CPUID_7_1_EDX,
CPUID_8000_0007_EDX,
CPUID_8000_0022_EAX,
CPUID_7_2_EDX,
CPUID_24_0_EBX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
};
/*
* Define a KVM-only feature flag.
*
@ -145,7 +128,10 @@ static __always_inline u32 __feature_translate(int x86_feature)
static __always_inline u32 __feature_leaf(int x86_feature)
{
return __feature_translate(x86_feature) / 32;
u32 x86_leaf = __feature_translate(x86_feature) / 32;
reverse_cpuid_check(x86_leaf);
return x86_leaf;
}
/*
@ -168,7 +154,6 @@ static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned int x86_featu
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
reverse_cpuid_check(x86_leaf);
return reverse_cpuid[x86_leaf];
}

View File

@ -283,7 +283,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
memset(smram.bytes, 0, sizeof(smram.bytes));
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
enter_smm_save_state_64(vcpu, &smram.smram64);
else
#endif
@ -353,7 +353,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
if (kvm_x86_call(set_efer)(vcpu, 0))
goto error;
#endif
@ -586,7 +586,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
* supports long mode.
*/
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
struct kvm_segment cs_desc;
unsigned long cr4;
@ -609,7 +609,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
unsigned long cr4, efer;
/* Clear CR4.PAE before clearing EFER.LME. */
@ -634,7 +634,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
return X86EMUL_UNHANDLEABLE;
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
ret = rsm_load_state_64(ctxt, &smram.smram64);
else
#endif

View File

@ -1199,6 +1199,12 @@ bool avic_hardware_setup(void)
return false;
}
if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
!boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) {
pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n");
return false;
}
if (boot_cpu_has(X86_FEATURE_AVIC)) {
pr_info("AVIC enabled\n");
} else if (force_avic) {

View File

@ -111,7 +111,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
{
if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
if (!guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
return true;
if (!nested_npt_enabled(svm))
@ -594,7 +594,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
vmcb_mark_dirty(vmcb02, VMCB_DR);
}
if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
/*
* Reserved bits of DEBUGCTL are ignored. Be consistent with
@ -651,7 +651,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
* exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
*/
if (guest_can_use(vcpu, X86_FEATURE_VGIF) &&
if (guest_cpu_cap_has(vcpu, X86_FEATURE_VGIF) &&
(svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
else
@ -689,7 +689,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR) &&
svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio)
nested_svm_update_tsc_ratio_msr(vcpu);
@ -710,7 +710,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
* what a nrips=0 CPU would do (L1 is responsible for advancing RIP
* prior to injecting the event).
*/
if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
vmcb02->control.next_rip = svm->nested.ctl.next_rip;
else if (boot_cpu_has(X86_FEATURE_NRIPS))
vmcb02->control.next_rip = vmcb12_rip;
@ -720,7 +720,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
svm->soft_int_injected = true;
svm->soft_int_csbase = vmcb12_csbase;
svm->soft_int_old_rip = vmcb12_rip;
if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
svm->soft_int_next_rip = svm->nested.ctl.next_rip;
else
svm->soft_int_next_rip = vmcb12_rip;
@ -728,18 +728,18 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
vmcb02->control.virt_ext = vmcb01->control.virt_ext &
LBR_CTL_ENABLE_MASK;
if (guest_can_use(vcpu, X86_FEATURE_LBRV))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV))
vmcb02->control.virt_ext |=
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
if (!nested_vmcb_needs_vls_intercept(svm))
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PAUSEFILTER))
pause_count12 = svm->nested.ctl.pause_filter_count;
else
pause_count12 = 0;
if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PFTHRESHOLD))
pause_thresh12 = svm->nested.ctl.pause_filter_thresh;
else
pause_thresh12 = 0;
@ -1026,7 +1026,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (vmcb12->control.exit_code != SVM_EXIT_ERR)
nested_save_pending_event_to_vmcb12(svm, vmcb12);
if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
vmcb12->control.next_rip = vmcb02->control.next_rip;
vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
@ -1065,7 +1065,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (!nested_exit_on_intr(svm))
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
svm_copy_lbrs(vmcb12, vmcb02);
svm_update_lbrv(vcpu);

View File

@ -46,7 +46,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
switch (msr) {
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE))
return NULL;
/*
* Each PMU counter has a pair of CTL and CTR MSRs. CTLn
@ -109,7 +109,7 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
return pmu->version > 0;
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
return guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
return guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE);
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
@ -179,7 +179,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
union cpuid_0x80000022_ebx ebx;
pmu->version = 1;
if (guest_cpuid_has(vcpu, X86_FEATURE_PERFMON_V2)) {
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PERFMON_V2)) {
pmu->version = 2;
/*
* Note, PERFMON_V2 is also in 0x80000022.0x0, i.e. the guest
@ -189,7 +189,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
x86_feature_cpuid(X86_FEATURE_PERFMON_V2).index);
ebx.full = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0)->ebx;
pmu->nr_arch_gp_counters = ebx.split.num_core_pmc;
} else if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
} else if (guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
} else {
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;

View File

@ -4435,8 +4435,8 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
bool v_tsc_aux = guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) ||
guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID);
set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux);
}
@ -4445,16 +4445,15 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
* For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if
* the host/guest supports its use.
*
* guest_can_use() checks a number of requirements on the host/guest to
* ensure that MSR_IA32_XSS is available, but it might report true even
* if X86_FEATURE_XSAVES isn't configured in the guest to ensure host
* MSR_IA32_XSS is always properly restored. For SEV-ES, it is better
* to further check that the guest CPUID actually supports
* X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved
* guests will still get intercepted and caught in the normal
* kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths.
* KVM treats the guest as being capable of using XSAVES even if XSAVES
* isn't enabled in guest CPUID as there is no intercept for XSAVES,
* i.e. the guest can use XSAVES/XRSTOR to read/write XSS if XSAVE is
* exposed to the guest and XSAVES is supported in hardware. Condition
* full XSS passthrough on the guest being able to use XSAVES *and*
* XSAVES being exposed to the guest so that KVM can at least honor
* guest CPUID for RDMSR and WRMSR.
*/
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1);
else

View File

@ -284,8 +284,6 @@ u32 svm_msrpm_offset(u32 msr)
return MSR_INVALID;
}
static void svm_flush_tlb_current(struct kvm_vcpu *vcpu);
static int get_npt_level(void)
{
#ifdef CONFIG_X86_64
@ -1049,7 +1047,7 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
(is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
(is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
if (enable_lbrv == current_enable_lbrv)
@ -1187,14 +1185,14 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
*/
if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) {
if (!npt_enabled ||
!guest_cpuid_has(&svm->vcpu, X86_FEATURE_INVPCID))
!guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_INVPCID))
svm_set_intercept(svm, INTERCEPT_INVPCID);
else
svm_clr_intercept(svm, INTERCEPT_INVPCID);
}
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP))
svm_clr_intercept(svm, INTERCEPT_RDTSCP);
else
svm_set_intercept(svm, INTERCEPT_RDTSCP);
@ -1921,9 +1919,6 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
unsigned long old_cr4 = vcpu->arch.cr4;
if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
svm_flush_tlb_current(vcpu);
vcpu->arch.cr4 = cr4;
if (!npt_enabled) {
cr4 |= X86_CR4_PAE;
@ -2864,7 +2859,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr_info->index) {
case MSR_AMD64_TSC_RATIO:
if (!msr_info->host_initiated &&
!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR))
!guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR))
return 1;
msr_info->data = svm->tsc_ratio_msr;
break;
@ -2940,7 +2935,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_AMD64_VIRT_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
!guest_cpu_cap_has(vcpu, X86_FEATURE_VIRT_SSBD))
return 1;
msr_info->data = svm->virt_spec_ctrl;
@ -3024,7 +3019,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) {
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR)) {
if (!msr->host_initiated)
return 1;
@ -3046,7 +3041,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->tsc_ratio_msr = data;
if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR) &&
is_guest_mode(vcpu))
nested_svm_update_tsc_ratio_msr(vcpu);
@ -3091,7 +3086,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_AMD64_VIRT_SPEC_CTRL:
if (!msr->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
!guest_cpu_cap_has(vcpu, X86_FEATURE_VIRT_SSBD))
return 1;
if (data & ~SPEC_CTRL_SSBD)
@ -3201,15 +3196,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (data & ~supported_de_cfg)
return 1;
/*
* Don't let the guest change the host-programmed value. The
* MSR is very model specific, i.e. contains multiple bits that
* are completely unknown to KVM, and the one bit known to KVM
* is simply a reflection of hardware capabilities.
*/
if (!msr->host_initiated && data != svm->msr_decfg)
return 1;
svm->msr_decfg = data;
break;
}
@ -3272,7 +3258,7 @@ static int invpcid_interception(struct kvm_vcpu *vcpu)
unsigned long type;
gva_t gva;
if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_INVPCID)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@ -3542,6 +3528,21 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
*error_code = 0;
}
static void svm_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info,
u32 *error_code)
{
struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
*intr_info = control->event_inj;
if ((*intr_info & SVM_EXITINTINFO_VALID) &&
(*intr_info & SVM_EXITINTINFO_VALID_ERR))
*error_code = control->event_inj_err;
else
*error_code = 0;
}
static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -4401,27 +4402,17 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* XSS on VM-Enter/VM-Exit. Failure to do so would effectively give
* the guest read/write access to the host's XSS.
*/
if (boot_cpu_has(X86_FEATURE_XSAVE) &&
boot_cpu_has(X86_FEATURE_XSAVES) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES,
boot_cpu_has(X86_FEATURE_XSAVES) &&
guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE));
/*
* Intercept VMLOAD if the vCPU model is Intel in order to emulate that
* VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
* SVM on Intel is bonkers and extremely unlikely to work).
*/
if (!guest_cpuid_is_intel_compatible(vcpu))
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI);
if (guest_cpuid_is_intel_compatible(vcpu))
guest_cpu_cap_clear(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
svm_recalc_instruction_intercepts(vcpu, svm);
@ -4431,7 +4422,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0,
!!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
!!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
if (sev_guest(vcpu->kvm))
sev_vcpu_after_set_cpuid(svm);
@ -4682,7 +4673,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
* responsible for ensuring nested SVM and SMIs are mutually exclusive.
*/
if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
return 1;
smram->smram64.svm_guest_flag = 1;
@ -4729,14 +4720,14 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
const struct kvm_smram_state_64 *smram64 = &smram->smram64;
if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
return 0;
/* Non-zero if SMI arrived while vCPU was in guest mode. */
if (!smram64->svm_guest_flag)
return 0;
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SVM))
return 1;
if (!(smram64->efer & EFER_SVME))
@ -4799,9 +4790,15 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len)
{
struct vcpu_svm *svm = to_svm(vcpu);
bool smep, smap, is_user;
u64 error_code;
/* Check that emulation is possible during event vectoring */
if ((svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK) &&
!kvm_can_emulate_event_vectoring(emul_type))
return X86EMUL_UNHANDLEABLE_VECTORING;
/* Emulation is always possible when KVM has access to all guest state. */
if (!sev_guest(vcpu->kvm))
return X86EMUL_CONTINUE;
@ -4898,7 +4895,7 @@ static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
* In addition, don't apply the erratum workaround if the #NPF occurred
* while translating guest page tables (see below).
*/
error_code = to_svm(vcpu)->vmcb->control.exit_info_1;
error_code = svm->vmcb->control.exit_info_1;
if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
goto resume_guest;
@ -5086,6 +5083,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
.get_exit_info = svm_get_exit_info,
.get_entry_info = svm_get_entry_info,
.vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid,

View File

@ -358,39 +358,32 @@ static __always_inline struct kvm_sev_info *to_kvm_sev_info(struct kvm *kvm)
return &to_kvm_svm(kvm)->sev_info;
}
#ifdef CONFIG_KVM_AMD_SEV
static __always_inline bool sev_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return sev->active;
#else
return false;
#endif
}
static __always_inline bool sev_es_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return sev->es_active && !WARN_ON_ONCE(!sev->active);
#else
return false;
#endif
}
static __always_inline bool sev_snp_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return (sev->vmsa_features & SVM_SEV_FEAT_SNP_ACTIVE) &&
!WARN_ON_ONCE(!sev_es_guest(kvm));
#else
return false;
#endif
}
#else
#define sev_guest(kvm) false
#define sev_es_guest(kvm) false
#define sev_snp_guest(kvm) false
#endif
static inline bool ghcb_gpa_is_registered(struct vcpu_svm *svm, u64 val)
{
@ -502,7 +495,7 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
{
return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) &&
return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VGIF) &&
(svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
}
@ -554,7 +547,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
{
return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) &&
return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VNMI) &&
(svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
}

View File

@ -22,15 +22,22 @@ TRACE_EVENT(kvm_entry,
__field( unsigned int, vcpu_id )
__field( unsigned long, rip )
__field( bool, immediate_exit )
__field( u32, intr_info )
__field( u32, error_code )
),
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
__entry->rip = kvm_rip_read(vcpu);
__entry->immediate_exit = force_immediate_exit;
kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info,
&__entry->error_code);
),
TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip,
TP_printk("vcpu %u, rip 0x%lx intr_info 0x%08x error_code 0x%08x%s",
__entry->vcpu_id, __entry->rip,
__entry->intr_info, __entry->error_code,
__entry->immediate_exit ? "[immediate exit]" : "")
);
@ -308,12 +315,14 @@ TRACE_EVENT(name, \
__field( u32, intr_info ) \
__field( u32, error_code ) \
__field( unsigned int, vcpu_id ) \
__field( u64, requests ) \
), \
\
TP_fast_assign( \
__entry->guest_rip = kvm_rip_read(vcpu); \
__entry->isa = isa; \
__entry->vcpu_id = vcpu->vcpu_id; \
__entry->requests = READ_ONCE(vcpu->requests); \
kvm_x86_call(get_exit_info)(vcpu, \
&__entry->exit_reason, \
&__entry->info1, \
@ -323,11 +332,13 @@ TRACE_EVENT(name, \
), \
\
TP_printk("vcpu %u reason %s%s%s rip 0x%lx info1 0x%016llx " \
"info2 0x%016llx intr_info 0x%08x error_code 0x%08x", \
"info2 0x%016llx intr_info 0x%08x error_code 0x%08x " \
"requests 0x%016llx", \
__entry->vcpu_id, \
kvm_print_exit_reason(__entry->exit_reason, __entry->isa), \
__entry->guest_rip, __entry->info1, __entry->info2, \
__entry->intr_info, __entry->error_code) \
__entry->intr_info, __entry->error_code, \
__entry->requests) \
)
/*

View File

@ -42,7 +42,7 @@ static inline struct hv_enlightened_vmcs *nested_vmx_evmcs(struct vcpu_vmx *vmx)
return vmx->nested.hv_evmcs;
}
static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu)
static inline bool guest_cpu_cap_has_evmcs(struct kvm_vcpu *vcpu)
{
/*
* eVMCS is exposed to the guest if Hyper-V is enabled in CPUID and

View File

@ -100,7 +100,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.apicv_pre_state_restore = vmx_apicv_pre_state_restore,
.required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
.hwapic_irr_update = vmx_hwapic_irr_update,
.hwapic_isr_update = vmx_hwapic_isr_update,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
.deliver_interrupt = vmx_deliver_interrupt,
@ -111,6 +110,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.get_mt_mask = vmx_get_mt_mask,
.get_exit_info = vmx_get_exit_info,
.get_entry_info = vmx_get_entry_info,
.vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,

View File

@ -257,7 +257,7 @@ static bool nested_evmcs_handle_vmclear(struct kvm_vcpu *vcpu, gpa_t vmptr)
* state. It is possible that the area will stay mapped as
* vmx->nested.hv_evmcs but this shouldn't be a problem.
*/
if (!guest_cpuid_has_evmcs(vcpu) ||
if (!guest_cpu_cap_has_evmcs(vcpu) ||
!evmptr_is_valid(nested_get_evmptr(vcpu)))
return false;
@ -2089,7 +2089,7 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
bool evmcs_gpa_changed = false;
u64 evmcs_gpa;
if (likely(!guest_cpuid_has_evmcs(vcpu)))
if (likely(!guest_cpu_cap_has_evmcs(vcpu)))
return EVMPTRLD_DISABLED;
evmcs_gpa = nested_get_evmptr(vcpu);
@ -2992,7 +2992,7 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
return -EINVAL;
#ifdef CONFIG_KVM_HYPERV
if (guest_cpuid_has_evmcs(vcpu))
if (guest_cpu_cap_has_evmcs(vcpu))
return nested_evmcs_check_controls(vmcs12);
#endif
@ -3287,7 +3287,7 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
* L2 was running), map it here to make sure vmcs12 changes are
* properly reflected.
*/
if (guest_cpuid_has_evmcs(vcpu) &&
if (guest_cpu_cap_has_evmcs(vcpu) &&
vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
enum nested_evmptrld_status evmptrld_status =
nested_vmx_handle_enlightened_vmptrld(vcpu, false);
@ -3481,14 +3481,6 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
return 1;
}
static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
{
u8 rvi = vmx_get_rvi();
u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
return ((rvi & 0xf0) > (vppr & 0xf0));
}
static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12);
@ -3508,7 +3500,6 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
enum vm_entry_failure_code entry_failure_code;
bool evaluate_pending_interrupts;
union vmx_exit_reason exit_reason = {
.basic = EXIT_REASON_INVALID_STATE,
.failed_vmentry = 1,
@ -3527,13 +3518,6 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
kvm_service_local_tlb_flush_requests(vcpu);
evaluate_pending_interrupts = exec_controls_get(vmx) &
(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
if (!evaluate_pending_interrupts)
evaluate_pending_interrupts |= kvm_apic_has_pending_init_or_sipi(vcpu);
if (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
@ -3616,9 +3600,13 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
* Re-evaluate pending events if L1 had a pending IRQ/NMI/INIT/SIPI
* when it executed VMLAUNCH/VMRESUME, as entering non-root mode can
* effectively unblock various events, e.g. INIT/SIPI cause VM-Exit
* unconditionally.
* unconditionally. Take care to pull data from vmcs01 as appropriate,
* e.g. when checking for interrupt windows, as vmcs02 is now loaded.
*/
if (unlikely(evaluate_pending_interrupts))
if ((__exec_controls_get(&vmx->vmcs01) & (CPU_BASED_INTR_WINDOW_EXITING |
CPU_BASED_NMI_WINDOW_EXITING)) ||
kvm_apic_has_pending_init_or_sipi(vcpu) ||
kvm_apic_has_interrupt(vcpu))
kvm_make_request(KVM_REQ_EVENT, vcpu);
/*
@ -3751,14 +3739,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (unlikely(status != NVMX_VMENTRY_SUCCESS))
goto vmentry_failed;
/* Emulate processing of posted interrupts on VM-Enter. */
if (nested_cpu_has_posted_intr(vmcs12) &&
kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
vmx->nested.pi_pending = true;
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
}
/* Hide L1D cache contents from the nested guest. */
vmx->vcpu.arch.l1tf_flush_l1d = true;
@ -4220,13 +4200,25 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
*/
bool block_nested_exceptions = vmx->nested.nested_run_pending;
/*
* New events (not exceptions) are only recognized at instruction
* Events that don't require injection, i.e. that are virtualized by
* hardware, aren't blocked by a pending VM-Enter as KVM doesn't need
* to regain control in order to deliver the event, and hardware will
* handle event ordering, e.g. with respect to injected exceptions.
*
* But, new events (not exceptions) are only recognized at instruction
* boundaries. If an event needs reinjection, then KVM is handling a
* VM-Exit that occurred _during_ instruction execution; new events are
* blocked until the instruction completes.
* VM-Exit that occurred _during_ instruction execution; new events,
* irrespective of whether or not they're injected, are blocked until
* the instruction completes.
*/
bool block_non_injected_events = kvm_event_needs_reinjection(vcpu);
/*
* Inject events are blocked by nested VM-Enter, as KVM is responsible
* for managing priority between concurrent events, i.e. KVM needs to
* wait until after VM-Enter completes to deliver injected events.
*/
bool block_nested_events = block_nested_exceptions ||
kvm_event_needs_reinjection(vcpu);
block_non_injected_events;
if (lapic_in_kernel(vcpu) &&
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
@ -4338,18 +4330,26 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
int irq;
if (block_nested_events)
return -EBUSY;
if (!nested_exit_on_intr(vcpu))
if (!nested_exit_on_intr(vcpu)) {
if (block_nested_events)
return -EBUSY;
goto no_vmexit;
}
if (!nested_exit_intr_ack_set(vcpu)) {
if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
return 0;
}
irq = kvm_cpu_get_extint(vcpu);
if (irq != -1) {
if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0);
return 0;
@ -4368,11 +4368,22 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
* and enabling posted interrupts requires ACK-on-exit.
*/
if (irq == vmx->nested.posted_intr_nv) {
/*
* Nested posted interrupts are delivered via RVI, i.e.
* aren't injected by KVM, and so can be queued even if
* manual event injection is disallowed.
*/
if (block_non_injected_events)
return -EBUSY;
vmx->nested.pi_pending = true;
kvm_apic_clear_irr(vcpu, irq);
goto no_vmexit;
}
if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0);
@ -5015,7 +5026,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
* doesn't isolate different VMCSs, i.e. in this case, doesn't provide
* separate modes for L2 vs L1.
*/
if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL))
indirect_branch_prediction_barrier();
/* Update any VMCS fields that might have changed while L2 ran */
@ -5050,6 +5061,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
if (vmx->nested.update_vmcs01_hwapic_isr) {
vmx->nested.update_vmcs01_hwapic_isr = false;
kvm_apic_update_hwapic_isr(vcpu);
}
if ((vm_exit_reason != -1) &&
(enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx)))
vmx->nested.need_vmcs12_to_shadow_sync = true;
@ -6279,7 +6295,7 @@ static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu,
{
u32 encls_leaf;
if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) ||
!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING))
return false;
@ -6617,7 +6633,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
vmx = to_vmx(vcpu);
vmcs12 = get_vmcs12(vcpu);
if (guest_can_use(vcpu, X86_FEATURE_VMX) &&
if (guest_cpu_cap_has(vcpu, X86_FEATURE_VMX) &&
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
@ -6758,7 +6774,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
return -EINVAL;
} else {
if (!guest_can_use(vcpu, X86_FEATURE_VMX))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
return -EINVAL;
if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
@ -6792,7 +6808,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
(!guest_can_use(vcpu, X86_FEATURE_VMX) ||
(!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX) ||
!vmx->nested.enlightened_vmcs_enabled))
return -EINVAL;

View File

@ -110,7 +110,7 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
{
if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM))
return 0;
return vcpu->arch.perf_capabilities;
@ -160,7 +160,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
break;
case MSR_IA32_DS_AREA:
ret = guest_cpuid_has(vcpu, X86_FEATURE_DS);
ret = guest_cpu_cap_has(vcpu, X86_FEATURE_DS);
break;
case MSR_PEBS_DATA_CFG:
perf_capabilities = vcpu_get_perf_capabilities(vcpu);

View File

@ -122,7 +122,7 @@ static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
* likely than a bad userspace address.
*/
if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2)) {
memset(&ex, 0, sizeof(ex));
ex.vector = PF_VECTOR;
ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
@ -365,7 +365,7 @@ static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
return true;
if (leaf >= EAUG && leaf <= EMODT)
return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
return guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2);
return false;
}
@ -381,8 +381,8 @@ int handle_encls(struct kvm_vcpu *vcpu)
{
u32 leaf = (u32)kvm_rax_read(vcpu);
if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
!guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
if (!enable_sgx || !guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) ||
!guest_cpu_cap_has(vcpu, X86_FEATURE_SGX1)) {
kvm_queue_exception(vcpu, UD_VECTOR);
} else if (!encls_leaf_enabled_in_guest(vcpu, leaf) ||
!sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) {
@ -479,15 +479,15 @@ void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (!cpu_has_vmx_encls_vmexit())
return;
if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) &&
sgx_enabled_in_guest_bios(vcpu)) {
if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX1)) {
bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
if (sgx_intercept_encls_ecreate(vcpu))
bitmap |= (1 << ECREATE);
}
if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2))
bitmap &= ~GENMASK_ULL(EMODT, EAUG);
/*

View File

@ -1636,7 +1636,8 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
* result in a #GP unless the same write also clears TraceEn.
*/
if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
(data & RTIT_CTL_TRACEEN) &&
data != vmx->pt_desc.guest.ctl)
return 1;
/*
@ -1705,6 +1706,12 @@ int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
kvm_queue_exception(vcpu, UD_VECTOR);
return X86EMUL_PROPAGATE_FAULT;
}
/* Check that emulation is possible during event vectoring */
if ((to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
!kvm_can_emulate_event_vectoring(emul_type))
return X86EMUL_UNHANDLEABLE_VECTORING;
return X86EMUL_CONTINUE;
}
@ -1908,8 +1915,8 @@ static void vmx_setup_uret_msrs(struct vcpu_vmx *vmx)
vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
guest_cpu_cap_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
guest_cpu_cap_has(&vmx->vcpu, X86_FEATURE_RDPID));
/*
* hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new
@ -2062,7 +2069,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
!guest_cpu_cap_has(vcpu, X86_FEATURE_MPX)))
return 1;
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
@ -2078,13 +2085,13 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
!guest_cpu_cap_has(vcpu, X86_FEATURE_SGX_LC))
return 1;
msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash
[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
break;
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!guest_can_use(vcpu, X86_FEATURE_VMX))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
return 1;
if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
&msr_info->data))
@ -2097,7 +2104,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* sanity checking and refuse to boot. Filter all unsupported
* features out.
*/
if (!msr_info->host_initiated && guest_cpuid_has_evmcs(vcpu))
if (!msr_info->host_initiated && guest_cpu_cap_has_evmcs(vcpu))
nested_evmcs_filter_control_msr(vcpu, msr_info->index,
&msr_info->data);
#endif
@ -2167,7 +2174,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
u64 data)
{
#ifdef CONFIG_X86_64
if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
return (u32)data;
#endif
return (unsigned long)data;
@ -2178,7 +2185,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
u64 debugctl = 0;
if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
(host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
(host_initiated || guest_cpu_cap_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) &&
@ -2282,7 +2289,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
!guest_cpu_cap_has(vcpu, X86_FEATURE_MPX)))
return 1;
if (is_noncanonical_msr_address(data & PAGE_MASK, vcpu) ||
(data & MSR_IA32_BNDCFGS_RSVD))
@ -2384,7 +2391,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* behavior, but it's close enough.
*/
if (!msr_info->host_initiated &&
(!guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC) ||
(!guest_cpu_cap_has(vcpu, X86_FEATURE_SGX_LC) ||
((vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED) &&
!(vmx->msr_ia32_feature_control & FEAT_CTL_SGX_LC_ENABLED))))
return 1;
@ -2394,7 +2401,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!msr_info->host_initiated)
return 1; /* they are read-only */
if (!guest_can_use(vcpu, X86_FEATURE_VMX))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_RTIT_CTL:
@ -2468,9 +2475,9 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((data & PERF_CAP_PEBS_MASK) !=
(kvm_caps.supported_perf_cap & PERF_CAP_PEBS_MASK))
return 1;
if (!guest_cpuid_has(vcpu, X86_FEATURE_DS))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_DS))
return 1;
if (!guest_cpuid_has(vcpu, X86_FEATURE_DTES64))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_DTES64))
return 1;
if (!cpuid_model_is_consistent(vcpu))
return 1;
@ -4590,10 +4597,7 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
bool __enabled; \
\
if (cpu_has_vmx_##name()) { \
if (kvm_is_governed_feature(X86_FEATURE_##feat_name)) \
__enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name); \
else \
__enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name); \
__enabled = guest_cpu_cap_has(__vcpu, X86_FEATURE_##feat_name); \
vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\
__enabled, exiting); \
} \
@ -4669,8 +4673,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
*/
if (cpu_has_vmx_rdtscp()) {
bool rdpid_or_rdtscp_enabled =
guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) ||
guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID);
vmx_adjust_secondary_exec_control(vmx, &exec_control,
SECONDARY_EXEC_ENABLE_RDTSCP,
@ -5959,7 +5963,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
} operand;
int gpr_index;
if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_INVPCID)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@ -6191,6 +6195,15 @@ void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
}
}
void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code)
{
*intr_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
if (is_exception_with_error_code(*intr_info))
*error_code = vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE);
else
*error_code = 0;
}
static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
{
if (vmx->pml_pg) {
@ -6543,33 +6556,15 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return 0;
}
/*
* Note:
* Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
* delivery event since it indicates guest is accessing MMIO.
* The vm-exit can be triggered again after return to guest that
* will cause infinite loop.
*/
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason.basic != EXIT_REASON_EXCEPTION_NMI &&
exit_reason.basic != EXIT_REASON_EPT_VIOLATION &&
exit_reason.basic != EXIT_REASON_PML_FULL &&
exit_reason.basic != EXIT_REASON_APIC_ACCESS &&
exit_reason.basic != EXIT_REASON_TASK_SWITCH &&
exit_reason.basic != EXIT_REASON_NOTIFY)) {
int ndata = 3;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
vcpu->run->internal.data[0] = vectoring_info;
vcpu->run->internal.data[1] = exit_reason.full;
vcpu->run->internal.data[2] = vmx_get_exit_qual(vcpu);
if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) {
vcpu->run->internal.data[ndata++] =
vmcs_read64(GUEST_PHYSICAL_ADDRESS);
}
vcpu->run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu;
vcpu->run->internal.ndata = ndata;
exit_reason.basic != EXIT_REASON_NOTIFY &&
exit_reason.basic != EXIT_REASON_EPT_MISCONFIG)) {
kvm_prepare_event_vectoring_exit(vcpu, INVALID_GPA);
return 0;
}
@ -6862,11 +6857,32 @@ void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
read_unlock(&vcpu->kvm->mmu_lock);
}
void vmx_hwapic_isr_update(int max_isr)
void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
{
u16 status;
u8 old;
/*
* If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI
* is only relevant for if and only if Virtual Interrupt Delivery is
* enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's
* vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested
* VM-Exit, otherwise L1 with run with a stale SVI.
*/
if (is_guest_mode(vcpu)) {
/*
* KVM is supposed to forward intercepted L2 EOIs to L1 if VID
* is enabled in vmcs12; as above, the EOIs affect L2's vAPIC.
* Note, userspace can stuff state while L2 is active; assert
* that VID is disabled if and only if the vCPU is in KVM_RUN
* to avoid false positives if userspace is setting APIC state.
*/
WARN_ON_ONCE(vcpu->wants_to_run &&
nested_cpu_has_vid(get_vmcs12(vcpu)));
to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
return;
}
if (max_isr == -1)
max_isr = 0;
@ -6896,20 +6912,6 @@ static void vmx_set_rvi(int vector)
}
}
void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
{
/*
* When running L2, updating RVI is only relevant when
* vmcs12 virtual-interrupt-delivery enabled.
* However, it can be enabled only when L1 also
* intercepts external-interrupts and in that case
* we should not update vmcs02 RVI but instead intercept
* interrupt. Therefore, do nothing when running L2.
*/
if (!is_guest_mode(vcpu))
vmx_set_rvi(max_irr);
}
int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@ -7828,12 +7830,8 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* to the guest. XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
* set if and only if XSAVE is supported.
*/
if (boot_cpu_has(X86_FEATURE_XSAVE) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LAM);
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE))
guest_cpu_cap_clear(vcpu, X86_FEATURE_XSAVES);
vmx_setup_uret_msrs(vmx);
@ -7841,7 +7839,7 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmcs_set_secondary_exec_control(vmx,
vmx_secondary_exec_control(vmx));
if (guest_can_use(vcpu, X86_FEATURE_VMX))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
vmx->msr_ia32_feature_control_valid_bits |=
FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
@ -7850,25 +7848,25 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
if (guest_can_use(vcpu, X86_FEATURE_VMX))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
nested_vmx_cr_fixed1_bits_update(vcpu);
if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
guest_cpu_cap_has(vcpu, X86_FEATURE_INTEL_PT))
update_intel_pt_cfg(vcpu);
if (boot_cpu_has(X86_FEATURE_RTM)) {
struct vmx_uret_msr *msr;
msr = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
if (msr) {
bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
bool enabled = guest_cpu_cap_has(vcpu, X86_FEATURE_RTM);
vmx_set_guest_uret_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
}
}
if (kvm_cpu_cap_has(X86_FEATURE_XFD))
vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
!guest_cpuid_has(vcpu, X86_FEATURE_XFD));
!guest_cpu_cap_has(vcpu, X86_FEATURE_XFD));
if (boot_cpu_has(X86_FEATURE_IBPB))
vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
@ -7876,17 +7874,17 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
set_cr4_guest_host_mask(vmx);
vmx_write_encls_bitmap(vcpu, NULL);
if (guest_cpuid_has(vcpu, X86_FEATURE_SGX))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX))
vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_SGX_ENABLED;
else
vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_ENABLED;
if (guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX_LC))
vmx->msr_ia32_feature_control_valid_bits |=
FEAT_CTL_SGX_LC_ENABLED;
else

View File

@ -176,6 +176,7 @@ struct nested_vmx {
bool reload_vmcs01_apic_access_page;
bool update_vmcs01_cpu_dirty_logging;
bool update_vmcs01_apicv_status;
bool update_vmcs01_hwapic_isr;
/*
* Enlightened VMCS has been enabled. It does not mean that L1 has to

View File

@ -47,8 +47,7 @@ bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu);
void vmx_migrate_timers(struct kvm_vcpu *vcpu);
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu);
void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
void vmx_hwapic_isr_update(int max_isr);
void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr);
int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu);
void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
int trig_mode, int vector);
@ -104,8 +103,11 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr);
u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code);
u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
void vmx_write_tsc_offset(struct kvm_vcpu *vcpu);

View File

@ -119,8 +119,6 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#endif
static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
#define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE
@ -1179,7 +1177,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
if (vcpu->arch.xcr0 != kvm_host.xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != kvm_host.xss)
wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
@ -1210,7 +1208,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
if (vcpu->arch.xcr0 != kvm_host.xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, kvm_host.xcr0);
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != kvm_host.xss)
wrmsrl(MSR_IA32_XSS, kvm_host.xss);
}
@ -1283,18 +1281,6 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
if (cr4 & cr4_reserved_bits)
return false;
if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
return false;
return true;
}
EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4);
static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
return __kvm_is_valid_cr4(vcpu, cr4) &&
@ -1516,10 +1502,10 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
{
u64 fixed = DR6_FIXED_1;
if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_RTM))
fixed |= DR6_RTM;
if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
fixed |= DR6_BUS_LOCK;
return fixed;
}
@ -1695,20 +1681,20 @@ static int do_get_feature_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (efer & EFER_AUTOIBRS && !guest_cpuid_has(vcpu, X86_FEATURE_AUTOIBRS))
if (efer & EFER_AUTOIBRS && !guest_cpu_cap_has(vcpu, X86_FEATURE_AUTOIBRS))
return false;
if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
if (efer & EFER_FFXSR && !guest_cpu_cap_has(vcpu, X86_FEATURE_FXSR_OPT))
return false;
if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
if (efer & EFER_SVME && !guest_cpu_cap_has(vcpu, X86_FEATURE_SVM))
return false;
if (efer & (EFER_LME | EFER_LMA) &&
!guest_cpuid_has(vcpu, X86_FEATURE_LM))
!guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
return false;
if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
if (efer & EFER_NX && !guest_cpu_cap_has(vcpu, X86_FEATURE_NX))
return false;
return true;
@ -1850,8 +1836,8 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
return 1;
if (!host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
!guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) &&
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID))
return 1;
/*
@ -1908,8 +1894,8 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
return 1;
if (!host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
!guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) &&
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID))
return 1;
break;
}
@ -2095,7 +2081,7 @@ EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn)
{
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) &&
!guest_cpuid_has(vcpu, X86_FEATURE_MWAIT))
!guest_cpu_cap_has(vcpu, X86_FEATURE_MWAIT))
return kvm_handle_invalid_op(vcpu);
pr_warn_once("%s instruction emulated as NOP!\n", insn);
@ -3767,13 +3753,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_ARCH_CAPABILITIES:
if (!msr_info->host_initiated ||
!guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
!guest_cpu_cap_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
return KVM_MSR_RET_UNSUPPORTED;
vcpu->arch.arch_capabilities = data;
break;
case MSR_IA32_PERF_CAPABILITIES:
if (!msr_info->host_initiated ||
!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM))
return KVM_MSR_RET_UNSUPPORTED;
if (data & ~kvm_caps.supported_perf_cap)
@ -3797,11 +3783,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((!guest_has_pred_cmd_msr(vcpu)))
return 1;
if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
!guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBPB))
reserved_bits |= PRED_CMD_IBPB;
if (!guest_cpuid_has(vcpu, X86_FEATURE_SBPB))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SBPB))
reserved_bits |= PRED_CMD_SBPB;
}
@ -3822,7 +3808,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
case MSR_IA32_FLUSH_CMD:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D))
!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D))
return 1;
if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D) || (data & ~L1D_FLUSH))
@ -3873,7 +3859,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_set_lapic_tscdeadline_msr(vcpu, data);
break;
case MSR_IA32_TSC_ADJUST:
if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
if (!msr_info->host_initiated) {
s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
adjust_tsc_offset_guest(vcpu, adj);
@ -3900,7 +3886,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
((old_val ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XMM3))
return 1;
vcpu->arch.ia32_misc_enable_msr = data;
kvm_update_cpuid_runtime(vcpu);
@ -4077,12 +4063,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_pr_unimpl_wrmsr(vcpu, msr, data);
break;
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW))
return 1;
vcpu->arch.osvw.length = data;
break;
case MSR_AMD64_OSVW_STATUS:
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW))
return 1;
vcpu->arch.osvw.status = data;
break;
@ -4101,7 +4087,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
#ifdef CONFIG_X86_64
case MSR_IA32_XFD:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
!guest_cpu_cap_has(vcpu, X86_FEATURE_XFD))
return 1;
if (data & ~kvm_guest_supported_xfd(vcpu))
@ -4111,7 +4097,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_XFD_ERR:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
!guest_cpu_cap_has(vcpu, X86_FEATURE_XFD))
return 1;
if (data & ~kvm_guest_supported_xfd(vcpu))
@ -4226,12 +4212,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.microcode_version;
break;
case MSR_IA32_ARCH_CAPABILITIES:
if (!guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
return KVM_MSR_RET_UNSUPPORTED;
msr_info->data = vcpu->arch.arch_capabilities;
break;
case MSR_IA32_PERF_CAPABILITIES:
if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM))
return KVM_MSR_RET_UNSUPPORTED;
msr_info->data = vcpu->arch.perf_capabilities;
break;
@ -4432,12 +4418,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0xbe702111;
break;
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW))
return 1;
msr_info->data = vcpu->arch.osvw.length;
break;
case MSR_AMD64_OSVW_STATUS:
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW))
return 1;
msr_info->data = vcpu->arch.osvw.status;
break;
@ -4456,14 +4442,14 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
#ifdef CONFIG_X86_64
case MSR_IA32_XFD:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
!guest_cpu_cap_has(vcpu, X86_FEATURE_XFD))
return 1;
msr_info->data = vcpu->arch.guest_fpu.fpstate->xfd;
break;
case MSR_IA32_XFD_ERR:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
!guest_cpu_cap_has(vcpu, X86_FEATURE_XFD))
return 1;
msr_info->data = vcpu->arch.guest_fpu.xfd_err;
@ -4545,6 +4531,20 @@ static inline bool kvm_can_mwait_in_guest(void)
boot_cpu_has(X86_FEATURE_ARAT);
}
static u64 kvm_get_allowed_disable_exits(void)
{
u64 r = KVM_X86_DISABLE_EXITS_PAUSE;
if (!mitigate_smt_rsb) {
r |= KVM_X86_DISABLE_EXITS_HLT |
KVM_X86_DISABLE_EXITS_CSTATE;
if (kvm_can_mwait_in_guest())
r |= KVM_X86_DISABLE_EXITS_MWAIT;
}
return r;
}
#ifdef CONFIG_KVM_HYPERV
static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 __user *cpuid_arg)
@ -4687,15 +4687,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_CLOCK_VALID_FLAGS;
break;
case KVM_CAP_X86_DISABLE_EXITS:
r = KVM_X86_DISABLE_EXITS_PAUSE;
if (!mitigate_smt_rsb) {
r |= KVM_X86_DISABLE_EXITS_HLT |
KVM_X86_DISABLE_EXITS_CSTATE;
if (kvm_can_mwait_in_guest())
r |= KVM_X86_DISABLE_EXITS_MWAIT;
}
r = kvm_get_allowed_disable_exits();
break;
case KVM_CAP_X86_SMM:
if (!IS_ENABLED(CONFIG_KVM_SMM))
@ -5822,9 +5814,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
vcpu->arch.pv_cpuid.enforce = cap->args[0];
if (vcpu->arch.pv_cpuid.enforce)
kvm_update_pv_runtime(vcpu);
return 0;
default:
return -EINVAL;
@ -6542,30 +6531,32 @@ split_irqchip_unlock:
break;
case KVM_CAP_X86_DISABLE_EXITS:
r = -EINVAL;
if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
if (cap->args[0] & ~kvm_get_allowed_disable_exits())
break;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
kvm->arch.pause_in_guest = true;
mutex_lock(&kvm->lock);
if (kvm->created_vcpus)
goto disable_exits_unlock;
#define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \
"KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests."
if (!mitigate_smt_rsb) {
if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() &&
(cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
pr_warn_once(SMT_RSB_MSG);
if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
kvm_can_mwait_in_guest())
kvm->arch.mwait_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
kvm->arch.hlt_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
kvm->arch.cstate_in_guest = true;
}
if (!mitigate_smt_rsb && boot_cpu_has_bug(X86_BUG_SMT_RSB) &&
cpu_smt_possible() &&
(cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
pr_warn_once(SMT_RSB_MSG);
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
kvm->arch.pause_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT)
kvm->arch.mwait_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
kvm->arch.hlt_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
kvm->arch.cstate_in_guest = true;
r = 0;
disable_exits_unlock:
mutex_unlock(&kvm->lock);
break;
case KVM_CAP_MSR_PLATFORM_INFO:
kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
@ -8511,17 +8502,17 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
{
return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
return guest_cpu_cap_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
}
static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
{
return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
return guest_cpu_cap_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
}
static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt)
{
return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID);
return guest_cpu_cap_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID);
}
static bool emulator_guest_cpuid_is_intel_compatible(struct x86_emulate_ctxt *ctxt)
@ -8813,6 +8804,28 @@ void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa)
{
u32 reason, intr_info, error_code;
struct kvm_run *run = vcpu->run;
u64 info1, info2;
int ndata = 0;
kvm_x86_call(get_exit_info)(vcpu, &reason, &info1, &info2,
&intr_info, &error_code);
run->internal.data[ndata++] = info2;
run->internal.data[ndata++] = reason;
run->internal.data[ndata++] = info1;
run->internal.data[ndata++] = gpa;
run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu;
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
run->internal.ndata = ndata;
}
EXPORT_SYMBOL_GPL(kvm_prepare_event_vectoring_exit);
static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
{
struct kvm *kvm = vcpu->kvm;
@ -9085,6 +9098,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
if (r == X86EMUL_RETRY_INSTR || r == X86EMUL_PROPAGATE_FAULT)
return 1;
if (kvm_unprotect_and_retry_on_failure(vcpu, cr2_or_gpa,
emulation_type))
return 1;
if (r == X86EMUL_UNHANDLEABLE_VECTORING) {
kvm_prepare_event_vectoring_exit(vcpu, cr2_or_gpa);
return 0;
}
WARN_ON_ONCE(r != X86EMUL_UNHANDLEABLE);
return handle_emulation_failure(vcpu, emulation_type);
}
@ -9773,10 +9795,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
kvm_caps.supported_xss = 0;
#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
#undef __kvm_cpu_cap_has
if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
@ -9976,7 +9994,7 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
{
u64 ret = vcpu->run->hypercall.ret;
if (!is_64_bit_mode(vcpu))
if (!is_64_bit_hypercall(vcpu))
ret = (u32)ret;
kvm_rax_write(vcpu, ret);
++vcpu->stat.hypercalls;
@ -12276,9 +12294,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto free_emulate_ctxt;
}
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
kvm_async_pf_hash_reset(vcpu);
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) {
@ -12301,6 +12316,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_xen_init_vcpu(vcpu);
vcpu_load(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz);
kvm_vcpu_reset(vcpu, false);
kvm_init_mmu(vcpu);

View File

@ -550,7 +550,6 @@ static inline void kvm_machine_check(void)
void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
int kvm_spec_ctrl_test_value(u64 value);
bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
struct x86_exception *e);
int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
@ -577,6 +576,11 @@ enum kvm_msr_access {
#define KVM_MSR_RET_UNSUPPORTED 2
#define KVM_MSR_RET_FILTERED 3
static inline bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
return !(cr4 & vcpu->arch.cr4_guest_rsvd_bits);
}
#define __cr4_reserved_bits(__cpu_has, __c) \
({ \
u64 __reserved_bits = CR4_RESERVED_BITS; \

View File

@ -963,6 +963,15 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
int num_vcpus = atomic_read(&kvm->online_vcpus);
/*
* Explicitly verify the target vCPU is online, as the anti-speculation
* logic only limits the CPU's ability to speculate, e.g. given a "bad"
* index, clamping the index to 0 would return vCPU0, not NULL.
*/
if (i >= num_vcpus)
return NULL;
i = array_index_nospec(i, num_vcpus);
/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
@ -970,9 +979,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
return xa_load(&kvm->vcpu_array, i);
}
#define kvm_for_each_vcpu(idx, vcpup, kvm) \
xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
(atomic_read(&kvm->online_vcpus) - 1))
#define kvm_for_each_vcpu(idx, vcpup, kvm) \
if (atomic_read(&kvm->online_vcpus)) \
xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
(atomic_read(&kvm->online_vcpus) - 1))
static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
{

View File

@ -617,10 +617,6 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
KVM_X86_DISABLE_EXITS_HLT | \
KVM_X86_DISABLE_EXITS_PAUSE | \
KVM_X86_DISABLE_EXITS_CSTATE)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {

View File

@ -27,6 +27,8 @@ extern uint64_t guest_tsc_khz;
#define MAX_NR_CPUID_ENTRIES 100
#endif
#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
@ -569,6 +571,11 @@ static inline void set_cr4(uint64_t val)
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
static inline void set_idt(const struct desc_ptr *idt_desc)
{
__asm__ __volatile__("lidt %0"::"m"(*idt_desc));
}
static inline u64 xgetbv(u32 index)
{
u32 eax, edx;
@ -1010,10 +1017,19 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
{
vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
}
static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
uint32_t function,
uint32_t index)
{
TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)");
vcpu_get_cpuid(vcpu);
return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
function, index);
}
@ -1034,7 +1050,7 @@ static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
return r;
/* On success, refresh the cache to pick up adjustments made by KVM. */
vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
vcpu_get_cpuid(vcpu);
return 0;
}
@ -1044,12 +1060,7 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
/* Refresh the cache to pick up adjustments made by KVM. */
vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
}
static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
{
vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
vcpu_get_cpuid(vcpu);
}
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,

View File

@ -235,7 +235,7 @@ static void guest_code_delete_memory_region(void)
* in the guest will never succeed, and so isn't an option.
*/
memset(&idt, 0, sizeof(idt));
__asm__ __volatile__("lidt %0" :: "m"(idt));
set_idt(&idt);
GUEST_SYNC(0);
@ -553,6 +553,56 @@ static void test_add_overlapping_private_memory_regions(void)
close(memfd);
kvm_vm_free(vm);
}
static void guest_code_mmio_during_vectoring(void)
{
const struct desc_ptr idt_desc = {
.address = MEM_REGION_GPA,
.size = 0xFFF,
};
set_idt(&idt_desc);
/* Generate a #GP by dereferencing a non-canonical address */
*((uint8_t *)NONCANONICAL) = 0x1;
GUEST_ASSERT(0);
}
/*
* This test points the IDT descriptor base to an MMIO address. It should cause
* a KVM internal error when an event occurs in the guest.
*/
static void test_mmio_during_vectoring(void)
{
struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
u64 expected_gpa;
pr_info("Testing MMIO during vectoring error handling\n");
vm = vm_create_with_one_vcpu(&vcpu, guest_code_mmio_during_vectoring);
virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1);
run = vcpu->run;
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_DELIVERY_EV,
"Unexpected suberror = %d", vcpu->run->internal.suberror);
TEST_ASSERT(run->internal.ndata != 4, "Unexpected internal error data array size = %d",
run->internal.ndata);
/* The reported GPA should be IDT base + offset of the GP vector */
expected_gpa = MEM_REGION_GPA + GP_VECTOR * sizeof(struct idt_entry);
TEST_ASSERT(run->internal.data[3] == expected_gpa,
"Unexpected GPA = %llx (expected %lx)",
vcpu->run->internal.data[3], expected_gpa);
kvm_vm_free(vm);
}
#endif
int main(int argc, char *argv[])
@ -568,6 +618,7 @@ int main(int argc, char *argv[])
* KVM_RUN fails with ENOEXEC or EFAULT.
*/
test_zero_memory_regions();
test_mmio_during_vectoring();
#endif
test_invalid_memory_region_flags();

View File

@ -139,11 +139,13 @@ static void test_pv_unhalt(void)
struct kvm_vm *vm;
struct kvm_cpuid_entry2 *ent;
u32 kvm_sig_old;
int r;
if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT))
return;
pr_info("testing KVM_FEATURE_PV_UNHALT\n");
TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
/* KVM_PV_UNHALT test */
vm = vm_create_with_one_vcpu(&vcpu, guest_main);
vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
@ -151,19 +153,45 @@ static void test_pv_unhalt(void)
TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
"Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
/* Make sure KVM clears vcpu->arch.kvm_cpuid */
/* Verify KVM disallows disabling exits after vCPU creation. */
r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
TEST_ASSERT(r && errno == EINVAL,
"Disabling exits after vCPU creation didn't fail as expected");
kvm_vm_free(vm);
/* Verify that KVM clear PV_UNHALT from guest CPUID. */
vm = vm_create(1);
vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
vcpu = vm_vcpu_add(vm, 0, NULL);
TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
"vCPU created with PV_UNHALT set by default");
vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
"PV_UNHALT set in guest CPUID when HLT-exiting is disabled");
/*
* Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT
* when KVM PV is not present, and DOES clear PV_UNHALT when switching
* back to the correct signature..
*/
ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
kvm_sig_old = ent->ebx;
ent->ebx = 0xdeadbeef;
vcpu_set_cpuid(vcpu);
vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
"PV_UNHALT cleared when using bogus KVM PV signature");
ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
ent->ebx = kvm_sig_old;
vcpu_set_cpuid(vcpu);
TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
"KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
"PV_UNHALT set in guest CPUID when HLT-exiting is disabled");
/* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */

View File

@ -41,13 +41,15 @@ do { \
TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
} while (0)
#define KVM_ALWAYS_ALLOWED_CR4 (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
static uint64_t calc_supported_cr4_feature_bits(void)
{
uint64_t cr4;
uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4;
cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
if (kvm_cpu_has(X86_FEATURE_UMIP))
cr4 |= X86_CR4_UMIP;
if (kvm_cpu_has(X86_FEATURE_LA57))
@ -72,36 +74,31 @@ static uint64_t calc_supported_cr4_feature_bits(void)
return cr4;
}
int main(int argc, char *argv[])
static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4)
{
struct kvm_sregs sregs;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t cr4;
int rc, i;
/*
* Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
* use it to verify all supported CR4 bits can be set prior to defining
* the vCPU model, i.e. without doing KVM_SET_CPUID2.
*/
vm = vm_create_barebones();
vcpu = __vm_vcpu_add(vm, 0);
vcpu_sregs_get(vcpu, &sregs);
sregs.cr0 = 0;
sregs.cr4 |= calc_supported_cr4_feature_bits();
cr4 = sregs.cr4;
sregs.cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
sregs.cr4 |= cr4;
rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) ==
(vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)),
"KVM didn't %s OSXSAVE in CPUID as expected",
(sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear");
TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) ==
(vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)),
"KVM didn't %s OSPKE in CPUID as expected",
(sregs.cr4 & X86_CR4_PKE) ? "set" : "clear");
vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
sregs.cr4, cr4);
/* Verify all unsupported features are rejected by KVM. */
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
@ -119,10 +116,28 @@ int main(int argc, char *argv[])
/* NW without CD is illegal, as is PG without PE. */
TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
}
int main(int argc, char *argv[])
{
struct kvm_sregs sregs;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
int rc;
/*
* Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
* use it to verify KVM enforces guest CPUID even if *userspace* never
* sets CPUID.
*/
vm = vm_create_barebones();
vcpu = __vm_vcpu_add(vm, 0);
test_cr_bits(vcpu, KVM_ALWAYS_ALLOWED_CR4);
kvm_vm_free(vm);
/* Create a "real" VM and verify APIC_BASE can be set. */
/* Create a "real" VM with a fully populated guest CPUID and verify
* APIC_BASE and all supported CR4 can be set.
*/
vm = vm_create_with_one_vcpu(&vcpu, NULL);
vcpu_sregs_get(vcpu, &sregs);
@ -135,6 +150,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
sregs.apic_base);
test_cr_bits(vcpu, calc_supported_cr4_feature_bits());
kvm_vm_free(vm);
return 0;

View File

@ -155,7 +155,7 @@ static void guest_shutdown_code(void)
/* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
memset(&idt, 0, sizeof(idt));
__asm__ __volatile__("lidt %0" :: "m"(idt));
set_idt(&idt);
__asm__ __volatile__("ud2");
}

View File

@ -4116,32 +4116,30 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
mutex_lock(&kvm->lock);
#ifdef CONFIG_LOCKDEP
/* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */
mutex_lock(&vcpu->mutex);
mutex_unlock(&vcpu->mutex);
#endif
if (kvm_get_vcpu_by_id(kvm, id)) {
r = -EEXIST;
goto unlock_vcpu_destroy;
}
vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
r = xa_reserve(&kvm->vcpu_array, vcpu->vcpu_idx, GFP_KERNEL_ACCOUNT);
r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
WARN_ON_ONCE(r == -EBUSY);
if (r)
goto unlock_vcpu_destroy;
/* Now it's all set up, let userspace reach it */
/*
* Now it's all set up, let userspace reach it. Grab the vCPU's mutex
* so that userspace can't invoke vCPU ioctl()s until the vCPU is fully
* visible (per online_vcpus), e.g. so that KVM doesn't get tricked
* into a NULL-pointer dereference because KVM thinks the _current_
* vCPU doesn't exist. As a bonus, taking vcpu->mutex ensures lockdep
* knows it's taken *inside* kvm->lock.
*/
mutex_lock(&vcpu->mutex);
kvm_get_kvm(kvm);
r = create_vcpu_fd(vcpu);
if (r < 0)
goto kvm_put_xa_release;
if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) {
r = -EINVAL;
goto kvm_put_xa_release;
}
goto kvm_put_xa_erase;
/*
* Pairs with smp_rmb() in kvm_get_vcpu. Store the vcpu
@ -4149,15 +4147,17 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
*/
smp_wmb();
atomic_inc(&kvm->online_vcpus);
mutex_unlock(&vcpu->mutex);
mutex_unlock(&kvm->lock);
kvm_arch_vcpu_postcreate(vcpu);
kvm_create_vcpu_debugfs(vcpu);
return r;
kvm_put_xa_release:
kvm_put_xa_erase:
mutex_unlock(&vcpu->mutex);
kvm_put_kvm_no_destroy(kvm);
xa_release(&kvm->vcpu_array, vcpu->vcpu_idx);
xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx);
unlock_vcpu_destroy:
mutex_unlock(&kvm->lock);
kvm_dirty_ring_free(&vcpu->dirty_ring);
@ -4282,6 +4282,33 @@ static int kvm_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
}
#endif
static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
/*
* In practice, this happy path will always be taken, as a well-behaved
* VMM will never invoke a vCPU ioctl() before KVM_CREATE_VCPU returns.
*/
if (likely(vcpu->vcpu_idx < atomic_read(&kvm->online_vcpus)))
return 0;
/*
* Acquire and release the vCPU's mutex to wait for vCPU creation to
* complete (kvm_vm_ioctl_create_vcpu() holds the mutex until the vCPU
* is fully online).
*/
if (mutex_lock_killable(&vcpu->mutex))
return -EINTR;
mutex_unlock(&vcpu->mutex);
if (WARN_ON_ONCE(!kvm_get_vcpu(kvm, vcpu->vcpu_idx)))
return -EIO;
return 0;
}
static long kvm_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@ -4297,6 +4324,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
return -EINVAL;
/*
* Wait for the vCPU to be online before handling the ioctl(), as KVM
* assumes the vCPU is reachable via vcpu_array, i.e. may dereference
* a NULL pointer if userspace invokes an ioctl() before KVM is ready.
*/
r = kvm_wait_for_vcpu_online(vcpu);
if (r)
return r;
/*
* Some architectures have vcpu ioctls that are asynchronous to vcpu
* execution; mutex_lock() would break them.