KVM x86 fixes for 6.13:

- Disable AVIC on SNP-enabled systems that don't allow writes to the virtual
   APIC page, as such hosts will hit unexpected RMP #PFs in the host when
   running VMs of any flavor.
 
 - Fix a WARN in the hypercall completion path due to KVM trying to determine
   if a guest with protected register state is in 64-bit mode (KVM's ABI is to
   assume such guests only make hypercalls in 64-bit mode).
 
 - Allow the guest to write to supported bits in MSR_AMD64_DE_CFG to fix a
   regression with Windows guests, and because KVM's read-only behavior appears
   to be entirely made up.
 
 - Treat TDP MMU faults as spurious if the faulting access is allowed given the
   existing SPTE.  This fixes a benign WARN (other than the WARN itself) due to
   unexpectedly replacing a writable SPTE with a read-only SPTE.
 
 - Emit a warning when KVM is configured with ignore_msrs=1 and also to hide the
   MSRs that the guest is looking for from the kernel logs.  ignore_msrs can
   trick guests into assuming that certain processor features are present, and
   this in turn leads to bogus bug reports.
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmdoSTgUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroM+IggAndK6byvsGjU0lM5v4WhKMnD7KZXi
 PsXuzv7LQUtbOaosowbBDUfMkZXwewmgUjBWmJL8WFlVePk7+8Aj29Zwd4vH3cHI
 KZ/AeyC6VwH1CFLZMSoHgG2dCl8hUUkUldZLAdYKigTRMlw5FmnmIfhqx/mpxiMz
 xHs9TU+NZpaQVXSzq9P1AXkJX+zZvyYlGXrTRtJ541AwYPWsq1MBr1megtBi59ws
 zEgCOyBpDHWQEasbtwHjPNNH+rCi8SEq2QCrSGHpTXWLS9kXj32wLldWb6E3L5ta
 c1/Q9mQDpVXQNidnAWyhukVmbETQDkpB+j9Mfd+bwIy1+zh6iSEe9dWM9g==
 =/fW+
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM x86 fixes from Paolo Bonzini:

 - Disable AVIC on SNP-enabled systems that don't allow writes to the
   virtual APIC page, as such hosts will hit unexpected RMP #PFs in the
   host when running VMs of any flavor.

 - Fix a WARN in the hypercall completion path due to KVM trying to
   determine if a guest with protected register state is in 64-bit mode
   (KVM's ABI is to assume such guests only make hypercalls in 64-bit
   mode).

 - Allow the guest to write to supported bits in MSR_AMD64_DE_CFG to fix
   a regression with Windows guests, and because KVM's read-only
   behavior appears to be entirely made up.

 - Treat TDP MMU faults as spurious if the faulting access is allowed
   given the existing SPTE. This fixes a benign WARN (other than the
   WARN itself) due to unexpectedly replacing a writable SPTE with a
   read-only SPTE.

 - Emit a warning when KVM is configured with ignore_msrs=1 and also to
   hide the MSRs that the guest is looking for from the kernel logs.
   ignore_msrs can trick guests into assuming that certain processor
   features are present, and this in turn leads to bogus bug reports.

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86: let it be known that ignore_msrs is a bad idea
  KVM: VMX: don't include '<linux/find.h>' directly
  KVM: x86/mmu: Treat TDP MMU faults as spurious if access is already allowed
  KVM: SVM: Allow guest writes to set MSR_AMD64_DE_CFG bits
  KVM: x86: Play nice with protected guests in complete_hypercall_exit()
  KVM: SVM: Disable AVIC on SNP-enabled system without HvInUseWrAllowed feature
This commit is contained in:
Linus Torvalds 2024-12-22 12:16:41 -08:00
commit b1fdbe77be
8 changed files with 38 additions and 23 deletions

View File

@ -452,6 +452,7 @@
#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */

View File

@ -3364,18 +3364,6 @@ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu,
return true;
}
static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
{
if (fault->exec)
return is_executable_pte(spte);
if (fault->write)
return is_writable_pte(spte);
/* Fault was on Read access */
return spte & PT_PRESENT_MASK;
}
/*
* Returns the last level spte pointer of the shadow page walk for the given
* gpa, and sets *spte to the spte value. This spte may be non-preset. If no

View File

@ -461,6 +461,23 @@ static inline bool is_mmu_writable_spte(u64 spte)
return spte & shadow_mmu_writable_mask;
}
/*
* Returns true if the access indicated by @fault is allowed by the existing
* SPTE protections. Note, the caller is responsible for checking that the
* SPTE is a shadow-present, leaf SPTE (either before or after).
*/
static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte)
{
if (fault->exec)
return is_executable_pte(spte);
if (fault->write)
return is_writable_pte(spte);
/* Fault was on Read access */
return spte & PT_PRESENT_MASK;
}
/*
* If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for
* write-tracking, remote TLBs must be flushed, even if the SPTE was read-only,

View File

@ -985,6 +985,11 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
if (fault->prefetch && is_shadow_present_pte(iter->old_spte))
return RET_PF_SPURIOUS;
if (is_shadow_present_pte(iter->old_spte) &&
is_access_allowed(fault, iter->old_spte) &&
is_last_spte(iter->old_spte, iter->level))
return RET_PF_SPURIOUS;
if (unlikely(!fault->slot))
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
else

View File

@ -1199,6 +1199,12 @@ bool avic_hardware_setup(void)
return false;
}
if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
!boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) {
pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n");
return false;
}
if (boot_cpu_has(X86_FEATURE_AVIC)) {
pr_info("AVIC enabled\n");
} else if (force_avic) {

View File

@ -3201,15 +3201,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (data & ~supported_de_cfg)
return 1;
/*
* Don't let the guest change the host-programmed value. The
* MSR is very model specific, i.e. contains multiple bits that
* are completely unknown to KVM, and the one bit known to KVM
* is simply a reflection of hardware capabilities.
*/
if (!msr->host_initiated && data != svm->msr_decfg)
return 1;
svm->msr_decfg = data;
break;
}

View File

@ -2,7 +2,7 @@
#ifndef __KVM_X86_VMX_POSTED_INTR_H
#define __KVM_X86_VMX_POSTED_INTR_H
#include <linux/find.h>
#include <linux/bitmap.h>
#include <asm/posted_intr.h>
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);

View File

@ -9976,7 +9976,7 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
{
u64 ret = vcpu->run->hypercall.ret;
if (!is_64_bit_mode(vcpu))
if (!is_64_bit_hypercall(vcpu))
ret = (u32)ret;
kvm_rax_write(vcpu, ret);
++vcpu->stat.hypercalls;
@ -12724,6 +12724,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_hv_init_vm(kvm);
kvm_xen_init_vm(kvm);
if (ignore_msrs && !report_ignored_msrs) {
pr_warn_once("Running KVM with ignore_msrs=1 and report_ignored_msrs=0 is not a\n"
"a supported configuration. Lying to the guest about the existence of MSRs\n"
"may cause the guest operating system to hang or produce errors. If a guest\n"
"does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n");
}
return 0;
out_uninit_mmu: