mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-29 17:25:38 +00:00
KVM x86 fixes for 6.13:
- Disable AVIC on SNP-enabled systems that don't allow writes to the virtual APIC page, as such hosts will hit unexpected RMP #PFs in the host when running VMs of any flavor. - Fix a WARN in the hypercall completion path due to KVM trying to determine if a guest with protected register state is in 64-bit mode (KVM's ABI is to assume such guests only make hypercalls in 64-bit mode). - Allow the guest to write to supported bits in MSR_AMD64_DE_CFG to fix a regression with Windows guests, and because KVM's read-only behavior appears to be entirely made up. - Treat TDP MMU faults as spurious if the faulting access is allowed given the existing SPTE. This fixes a benign WARN (other than the WARN itself) due to unexpectedly replacing a writable SPTE with a read-only SPTE. - Emit a warning when KVM is configured with ignore_msrs=1 and also to hide the MSRs that the guest is looking for from the kernel logs. ignore_msrs can trick guests into assuming that certain processor features are present, and this in turn leads to bogus bug reports. -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmdoSTgUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroM+IggAndK6byvsGjU0lM5v4WhKMnD7KZXi PsXuzv7LQUtbOaosowbBDUfMkZXwewmgUjBWmJL8WFlVePk7+8Aj29Zwd4vH3cHI KZ/AeyC6VwH1CFLZMSoHgG2dCl8hUUkUldZLAdYKigTRMlw5FmnmIfhqx/mpxiMz xHs9TU+NZpaQVXSzq9P1AXkJX+zZvyYlGXrTRtJ541AwYPWsq1MBr1megtBi59ws zEgCOyBpDHWQEasbtwHjPNNH+rCi8SEq2QCrSGHpTXWLS9kXj32wLldWb6E3L5ta c1/Q9mQDpVXQNidnAWyhukVmbETQDkpB+j9Mfd+bwIy1+zh6iSEe9dWM9g== =/fW+ -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM x86 fixes from Paolo Bonzini: - Disable AVIC on SNP-enabled systems that don't allow writes to the virtual APIC page, as such hosts will hit unexpected RMP #PFs in the host when running VMs of any flavor. - Fix a WARN in the hypercall completion path due to KVM trying to determine if a guest with protected register state is in 64-bit mode (KVM's ABI is to assume such guests only make hypercalls in 64-bit mode). - Allow the guest to write to supported bits in MSR_AMD64_DE_CFG to fix a regression with Windows guests, and because KVM's read-only behavior appears to be entirely made up. - Treat TDP MMU faults as spurious if the faulting access is allowed given the existing SPTE. This fixes a benign WARN (other than the WARN itself) due to unexpectedly replacing a writable SPTE with a read-only SPTE. - Emit a warning when KVM is configured with ignore_msrs=1 and also to hide the MSRs that the guest is looking for from the kernel logs. ignore_msrs can trick guests into assuming that certain processor features are present, and this in turn leads to bogus bug reports. * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: let it be known that ignore_msrs is a bad idea KVM: VMX: don't include '<linux/find.h>' directly KVM: x86/mmu: Treat TDP MMU faults as spurious if access is already allowed KVM: SVM: Allow guest writes to set MSR_AMD64_DE_CFG bits KVM: x86: Play nice with protected guests in complete_hypercall_exit() KVM: SVM: Disable AVIC on SNP-enabled system without HvInUseWrAllowed feature
This commit is contained in:
commit
b1fdbe77be
@ -452,6 +452,7 @@
|
|||||||
#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
|
#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
|
||||||
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
|
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
|
||||||
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
|
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
|
||||||
|
#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
|
||||||
|
|
||||||
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
|
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
|
||||||
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
|
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
|
||||||
|
@ -3364,18 +3364,6 @@ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
|
|
||||||
{
|
|
||||||
if (fault->exec)
|
|
||||||
return is_executable_pte(spte);
|
|
||||||
|
|
||||||
if (fault->write)
|
|
||||||
return is_writable_pte(spte);
|
|
||||||
|
|
||||||
/* Fault was on Read access */
|
|
||||||
return spte & PT_PRESENT_MASK;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the last level spte pointer of the shadow page walk for the given
|
* Returns the last level spte pointer of the shadow page walk for the given
|
||||||
* gpa, and sets *spte to the spte value. This spte may be non-preset. If no
|
* gpa, and sets *spte to the spte value. This spte may be non-preset. If no
|
||||||
|
@ -461,6 +461,23 @@ static inline bool is_mmu_writable_spte(u64 spte)
|
|||||||
return spte & shadow_mmu_writable_mask;
|
return spte & shadow_mmu_writable_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns true if the access indicated by @fault is allowed by the existing
|
||||||
|
* SPTE protections. Note, the caller is responsible for checking that the
|
||||||
|
* SPTE is a shadow-present, leaf SPTE (either before or after).
|
||||||
|
*/
|
||||||
|
static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
|
||||||
|
{
|
||||||
|
if (fault->exec)
|
||||||
|
return is_executable_pte(spte);
|
||||||
|
|
||||||
|
if (fault->write)
|
||||||
|
return is_writable_pte(spte);
|
||||||
|
|
||||||
|
/* Fault was on Read access */
|
||||||
|
return spte & PT_PRESENT_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for
|
* If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for
|
||||||
* write-tracking, remote TLBs must be flushed, even if the SPTE was read-only,
|
* write-tracking, remote TLBs must be flushed, even if the SPTE was read-only,
|
||||||
|
@ -985,6 +985,11 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
|
|||||||
if (fault->prefetch && is_shadow_present_pte(iter->old_spte))
|
if (fault->prefetch && is_shadow_present_pte(iter->old_spte))
|
||||||
return RET_PF_SPURIOUS;
|
return RET_PF_SPURIOUS;
|
||||||
|
|
||||||
|
if (is_shadow_present_pte(iter->old_spte) &&
|
||||||
|
is_access_allowed(fault, iter->old_spte) &&
|
||||||
|
is_last_spte(iter->old_spte, iter->level))
|
||||||
|
return RET_PF_SPURIOUS;
|
||||||
|
|
||||||
if (unlikely(!fault->slot))
|
if (unlikely(!fault->slot))
|
||||||
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
|
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
|
||||||
else
|
else
|
||||||
|
@ -1199,6 +1199,12 @@ bool avic_hardware_setup(void)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
|
||||||
|
!boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) {
|
||||||
|
pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (boot_cpu_has(X86_FEATURE_AVIC)) {
|
if (boot_cpu_has(X86_FEATURE_AVIC)) {
|
||||||
pr_info("AVIC enabled\n");
|
pr_info("AVIC enabled\n");
|
||||||
} else if (force_avic) {
|
} else if (force_avic) {
|
||||||
|
@ -3201,15 +3201,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
|||||||
if (data & ~supported_de_cfg)
|
if (data & ~supported_de_cfg)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't let the guest change the host-programmed value. The
|
|
||||||
* MSR is very model specific, i.e. contains multiple bits that
|
|
||||||
* are completely unknown to KVM, and the one bit known to KVM
|
|
||||||
* is simply a reflection of hardware capabilities.
|
|
||||||
*/
|
|
||||||
if (!msr->host_initiated && data != svm->msr_decfg)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
svm->msr_decfg = data;
|
svm->msr_decfg = data;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
#ifndef __KVM_X86_VMX_POSTED_INTR_H
|
#ifndef __KVM_X86_VMX_POSTED_INTR_H
|
||||||
#define __KVM_X86_VMX_POSTED_INTR_H
|
#define __KVM_X86_VMX_POSTED_INTR_H
|
||||||
|
|
||||||
#include <linux/find.h>
|
#include <linux/bitmap.h>
|
||||||
#include <asm/posted_intr.h>
|
#include <asm/posted_intr.h>
|
||||||
|
|
||||||
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
|
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
|
||||||
|
@ -9976,7 +9976,7 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
|
|||||||
{
|
{
|
||||||
u64 ret = vcpu->run->hypercall.ret;
|
u64 ret = vcpu->run->hypercall.ret;
|
||||||
|
|
||||||
if (!is_64_bit_mode(vcpu))
|
if (!is_64_bit_hypercall(vcpu))
|
||||||
ret = (u32)ret;
|
ret = (u32)ret;
|
||||||
kvm_rax_write(vcpu, ret);
|
kvm_rax_write(vcpu, ret);
|
||||||
++vcpu->stat.hypercalls;
|
++vcpu->stat.hypercalls;
|
||||||
@ -12724,6 +12724,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||||||
kvm_hv_init_vm(kvm);
|
kvm_hv_init_vm(kvm);
|
||||||
kvm_xen_init_vm(kvm);
|
kvm_xen_init_vm(kvm);
|
||||||
|
|
||||||
|
if (ignore_msrs && !report_ignored_msrs) {
|
||||||
|
pr_warn_once("Running KVM with ignore_msrs=1 and report_ignored_msrs=0 is not a\n"
|
||||||
|
"a supported configuration. Lying to the guest about the existence of MSRs\n"
|
||||||
|
"may cause the guest operating system to hang or produce errors. If a guest\n"
|
||||||
|
"does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n");
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_uninit_mmu:
|
out_uninit_mmu:
|
||||||
|
Loading…
Reference in New Issue
Block a user