KVM: x86: Quirk initialization of feature MSRs to KVM's max configuration

Add a quirk to control KVM's misguided initialization of select feature
MSRs to KVM's max configuration, as enabling features by default violates
KVM's approach of letting userspace own the vCPU model, and is actively
problematic for MSRs that are conditionally supported, as the vCPU will
end up with an MSR value that userspace can't restore.  E.g. if the vCPU
is configured with PDCM=0, userspace will save and attempt to restore a
non-zero PERF_CAPABILITIES, thanks to KVM's meddling.

Link: https://lore.kernel.org/r/20240802185511.305849-4-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
This commit is contained in:
Sean Christopherson 2024-08-02 11:55:05 -07:00
parent bc2ca3680b
commit dcb988cdac
6 changed files with 39 additions and 8 deletions

View File

@ -8107,6 +8107,28 @@ KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM
or moved memslot isn't reachable, i.e KVM
_may_ invalidate only SPTEs related to the
memslot.
KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the
vCPU's MSR_IA32_PERF_CAPABILITIES (0x345),
MSR_IA32_ARCH_CAPABILITIES (0x10a),
MSR_PLATFORM_INFO (0xce), and all VMX MSRs
(0x480..0x492) to the maximal capabilities
supported by KVM. KVM also sets
MSR_IA32_UCODE_REV (0x8b) to an arbitrary
value (which is different for Intel vs.
AMD). Lastly, when guest CPUID is set (by
userspace), KVM modifies select VMX MSR
fields to force consistency between guest
CPUID and L2's effective ISA. When this
quirk is disabled, KVM zeroes the vCPU's MSR
values (with two exceptions, see below),
i.e. treats the feature MSRs like CPUID
leaves and gives userspace full control of
the vCPU model definition. This quirk does
not affect VMX MSRs CR0/CR4_FIXED1 (0x487
and 0x489), as KVM does now allow them to
be set by userspace (KVM sets them based on
guest CPUID, for safety purposes).
=================================== ============================================
7.32 KVM_CAP_MAX_VCPU_ID

View File

@ -2360,7 +2360,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \
KVM_X86_QUIRK_SLOT_ZAP_ALL)
KVM_X86_QUIRK_SLOT_ZAP_ALL | \
KVM_X86_QUIRK_STUFF_FEATURE_MSRS)
/*
* KVM previously used a u32 field in kvm_run to indicate the hypercall was

View File

@ -440,6 +440,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1

View File

@ -1390,7 +1390,9 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu)
svm_vcpu_init_msrpm(vcpu, svm->msrpm);
svm_init_osvw(vcpu);
vcpu->arch.microcode_version = 0x01000065;
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
vcpu->arch.microcode_version = 0x01000065;
svm->tsc_ratio_msr = kvm_caps.default_tsc_scaling_ratio;
svm->nmi_masked = false;

View File

@ -4572,7 +4572,8 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
* Update the nested MSR settings so that a nested VMM can/can't set
* controls for features that are/aren't exposed to the guest.
*/
if (nested) {
if (nested &&
kvm_check_has_quirk(vmx->vcpu.kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) {
/*
* All features that can be added or removed to VMX MSRs must
* be supported in the first place for nested virtualization.
@ -4862,7 +4863,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
init_vmcs(vmx);
if (nested)
if (nested &&
kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs));
vcpu_setup_sgx_lepubkeyhash(vcpu);
@ -4875,7 +4877,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
#endif
vcpu->arch.microcode_version = 0x100000000ULL;
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
vcpu->arch.microcode_version = 0x100000000ULL;
vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
/*

View File

@ -12314,9 +12314,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) {
vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
}
kvm_pmu_init(vcpu);
vcpu->arch.pending_external_vector = -1;