From 9ef1530c0c1bb182d7f60165946bd027ff49282e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 13 Apr 2020 03:20:06 -0400 Subject: [PATCH 01/17] KVM: SVM: fix compilation with modular PSP and non-modular KVM Use svm_sev_enabled() in order to cull all calls to PSP code. Otherwise, compilation fails with undefined symbols if the PSP device driver is compiled as a module and KVM is not. Reported-by: Uros Bizjak Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0e3fc311d7da..5ffe041dc0e8 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1117,7 +1117,7 @@ int __init sev_hardware_setup(void) /* Maximum number of encrypted guests supported simultaneously */ max_sev_asid = cpuid_ecx(0x8000001F); - if (!max_sev_asid) + if (!svm_sev_enabled()) return 1; /* Minimum ASID value that should be used for SEV guest */ @@ -1156,6 +1156,9 @@ int __init sev_hardware_setup(void) void sev_hardware_teardown(void) { + if (!svm_sev_enabled()) + return; + bitmap_free(sev_asid_bitmap); bitmap_free(sev_reclaim_asid_bitmap); From f14eec0a32038f2d6c05b8ea91c7701f65ce7418 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 13 Apr 2020 03:17:58 -0400 Subject: [PATCH 02/17] KVM: SVM: move more vmentry code to assembly Manipulate IF around vmload/vmsave to remove the confusing usage of local_irq_enable where interrupts are actually disabled via GIF. And stuff the RSB immediately without waiting for a RET to avoid Spectre-v2 attacks. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/nospec-branch.h | 21 --------------------- arch/x86/kvm/svm/svm.c | 7 ------- arch/x86/kvm/svm/vmenter.S | 9 +++++++++ 3 files changed, 9 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 07e95dcb40ad..7e9a281e2660 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -237,27 +237,6 @@ enum ssb_mitigation { extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; -/* - * On VMEXIT we must ensure that no RSB predictions learned in the guest - * can be followed in the host, by overwriting the RSB completely. Both - * retpoline and IBRS mitigations for Spectre v2 need this; only on future - * CPUs with IBRS_ALL *might* it be avoided. - */ -static inline void vmexit_fill_RSB(void) -{ -#ifdef CONFIG_RETPOLINE - unsigned long loops; - - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE("jmp 910f", - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), - X86_FEATURE_RETPOLINE) - "910:" - : "=r" (loops), ASM_CALL_CONSTRAINT - : : "memory" ); -#endif -} - static __always_inline void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2be5bbae3a40..117bb0b28535 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3330,13 +3330,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); - local_irq_enable(); - __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else @@ -3366,8 +3361,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) reload_tss(vcpu); - local_irq_disable(); - x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); vcpu->arch.cr2 = svm->vmcb->save.cr2; diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index fa1af90067e9..723887e35e95 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -3,6 +3,7 @@ #include #include #include +#include #define WORD_SIZE (BITS_PER_LONG / 8) @@ -78,6 +79,7 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_AX /* Enter guest mode */ + sti 1: vmload %_ASM_AX jmp 3f 2: cmpb $0, kvm_rebooting @@ -99,6 +101,13 @@ SYM_FUNC_START(__svm_vcpu_run) ud2 _ASM_EXTABLE(5b, 6b) 7: + cli + +#ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +#endif + /* "POP" @regs to RAX. */ pop %_ASM_AX From fb56baae5ea509e63c2a068d66a4d8ea91969fca Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 14 Apr 2020 09:14:14 +0200 Subject: [PATCH 03/17] KVM: VMX: Enable machine check support for 32bit targets There is no reason to limit the use of do_machine_check to 64bit targets. MCE handling works for both target familes. Cc: Paolo Bonzini Cc: Sean Christopherson Cc: stable@vger.kernel.org Fixes: a0861c02a981 ("KVM: Add VT-x machine check support") Signed-off-by: Uros Bizjak Message-Id: <20200414071414.45636-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8959514eaf0f..01330096ff3e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4572,7 +4572,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, */ static void kvm_machine_check(void) { -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) +#if defined(CONFIG_X86_MCE) struct pt_regs regs = { .cs = 3, /* Fake ring 3 no matter what the guest ran on */ .flags = X86_EFLAGS_IF, From b6467ab142b708dd076f6186ca274f14af379c72 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Apr 2020 23:40:58 -0700 Subject: [PATCH 04/17] KVM: Check validity of resolved slot when searching memslots Check that the resolved slot (somewhat confusingly named 'start') is a valid/allocated slot before doing the final comparison to see if the specified gfn resides in the associated slot. The resolved slot can be invalid if the binary search loop terminated because the search index was incremented beyond the number of used slots. This bug has existed since the binary search algorithm was introduced, but went unnoticed because KVM statically allocated memory for the max number of slots, i.e. the access would only be truly out-of-bounds if all possible slots were allocated and the specified gfn was less than the base of the lowest memslot. Commit 36947254e5f98 ("KVM: Dynamically size memslot array based on number of used slots") eliminated the "all possible slots allocated" condition and made the bug embarrasingly easy to hit. Fixes: 9c1a5d38780e6 ("kvm: optimize GFN to memslot lookup with large slots amount") Reported-by: syzbot+d889b59b2bb87d4047a2@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20200408064059.8957-2-sean.j.christopherson@intel.com> Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6d58beb65454..01276e3d01b9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1048,7 +1048,7 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn) start = slot + 1; } - if (gfn >= memslots[start].base_gfn && + if (start < slots->used_slots && gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { atomic_set(&slots->lru_slot, start); return &memslots[start]; From 97daa028f3f621adff2c4f7b15fe0874e5b5bd6c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Apr 2020 23:40:59 -0700 Subject: [PATCH 05/17] KVM: s390: Return last valid slot if approx index is out-of-bounds Return the index of the last valid slot from gfn_to_memslot_approx() if its binary search loop yielded an out-of-bounds index. The index can be out-of-bounds if the specified gfn is less than the base of the lowest memslot (which is also the last valid memslot). Note, the sole caller, kvm_s390_get_cmma(), ensures used_slots is non-zero. Fixes: afdad61615cc3 ("KVM: s390: Fix storage attributes migration with memory slots") Cc: stable@vger.kernel.org # 4.19.x: 0774a964ef56: KVM: Fix out of range accesses to memslots Cc: stable@vger.kernel.org # 4.19.x Signed-off-by: Sean Christopherson Message-Id: <20200408064059.8957-3-sean.j.christopherson@intel.com> Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/s390/kvm/kvm-s390.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 19a81024fe16..5dcf9ff12828 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1939,6 +1939,9 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) start = slot + 1; } + if (start >= slots->used_slots) + return slots->used_slots - 1; + if (gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { atomic_set(&slots->lru_slot, start); From b045ae906b42afb361dc7ecf1a3cea110fb0a65f Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 14 Apr 2020 22:47:45 +0000 Subject: [PATCH 06/17] kvm: nVMX: reflect MTF VM-exits if injected by L1 According to SDM 26.6.2, it is possible to inject an MTF VM-exit via the VM-entry interruption-information field regardless of the 'monitor trap flag' VM-execution control. KVM appropriately copies the VM-entry interruption-information field from vmcs12 to vmcs02. However, if L1 has not set the 'monitor trap flag' VM-execution control, KVM fails to reflect the subsequent MTF VM-exit into L1. Fix this by consulting the VM-entry interruption-information field of vmcs12 to determine if L1 has injected the MTF VM-exit. If so, reflect the exit, regardless of the 'monitor trap flag' VM-execution control. Fixes: 5f3d45e7f282 ("kvm/x86: add support for MONITOR_TRAP_FLAG") Signed-off-by: Oliver Upton Reviewed-by: Peter Shier Reviewed-by: Jim Mattson Message-Id: <20200414224746.240324-1-oupton@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index cbc9ea2de28f..0d1400fa1e22 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5533,6 +5533,23 @@ static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu, return 1 & (b >> (field & 7)); } +static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12) +{ + u32 entry_intr_info = vmcs12->vm_entry_intr_info_field; + + if (nested_cpu_has_mtf(vmcs12)) + return true; + + /* + * An MTF VM-exit may be injected into the guest by setting the + * interruption-type to 7 (other event) and the vector field to 0. Such + * is the case regardless of the 'monitor trap flag' VM-execution + * control. + */ + return entry_intr_info == (INTR_INFO_VALID_MASK + | INTR_TYPE_OTHER_EVENT); +} + /* * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we * should handle it ourselves in L0 (and then continue L2). Only call this @@ -5633,7 +5650,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_MWAIT_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); case EXIT_REASON_MONITOR_TRAP_FLAG: - return nested_cpu_has_mtf(vmcs12); + return nested_vmx_exit_handled_mtf(vmcs12); case EXIT_REASON_MONITOR_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING); case EXIT_REASON_PAUSE_INSTRUCTION: From 69c097552502a1be956861d01b2b91b56f789c52 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 14 Apr 2020 22:12:41 +0000 Subject: [PATCH 07/17] kvm: nVMX: match comment with return type for nested_vmx_exit_reflected nested_vmx_exit_reflected() returns a bool, not int. As such, refer to the return values as true/false in the comment instead of 1/0. Signed-off-by: Oliver Upton Message-Id: <20200414221241.134103-1-oupton@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0d1400fa1e22..fd78ffbde644 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5551,7 +5551,7 @@ static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12) } /* - * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we + * Return true if we should exit from L2 to L1 to handle an exit, or false if we * should handle it ourselves in L0 (and then continue L2). Only call this * when in is_guest_mode (L2). */ From b4fd630812a08092ab965b08e70a3078be0ee967 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 14 Apr 2020 13:36:12 +0200 Subject: [PATCH 08/17] KVM: SVM: Do not mark svm_vcpu_run with STACK_FRAME_NON_STANDARD svm_vcpu_run does not change stack or frame pointer anymore. Cc: Paolo Bonzini Signed-off-by: Uros Bizjak Message-Id: <20200414113612.104501-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 117bb0b28535..683f3817f896 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3404,7 +3404,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) mark_all_clean(svm->vmcb); } -STACK_FRAME_NON_STANDARD(svm_vcpu_run); static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) { From b2bce0a589cafcf1a8755627b932c80621aef575 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 11 Apr 2020 18:09:27 +0200 Subject: [PATCH 09/17] KVM: SVM: Fix build error due to missing release_pages() include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: arch/x86/kvm/svm/sev.c: In function ‘sev_pin_memory’: arch/x86/kvm/svm/sev.c:360:3: error: implicit declaration of function ‘release_pages’;\ did you mean ‘reclaim_pages’? [-Werror=implicit-function-declaration] 360 | release_pages(pages, npinned); | ^~~~~~~~~~~~~ | reclaim_pages because svm.c includes pagemap.h but the carved out sev.c needs it too. Triggered by a randconfig build. Fixes: eaf78265a4ab ("KVM: SVM: Move SEV code to separate file") Signed-off-by: Borislav Petkov Message-Id: <20200411160927.27954-1-bp@alien8.de> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 5ffe041dc0e8..cf912b4aaba8 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "x86.h" From b61f62d408959a37bb6b19f9521b682aec3a5601 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 9 Apr 2020 14:04:40 +0200 Subject: [PATCH 10/17] KVM: SVM: Do not setup frame pointer in __svm_vcpu_run __svm_vcpu_run is a leaf function and does not need a frame pointer. %rbp is also destroyed a few instructions later when guest registers are loaded. Cc: Paolo Bonzini Signed-off-by: Uros Bizjak Message-Id: <20200409120440.1427215-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/vmenter.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 723887e35e95..bf944334003a 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -36,7 +36,6 @@ */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP - mov %_ASM_SP, %_ASM_BP #ifdef CONFIG_X86_64 push %r15 push %r14 From 56a87e5d997b722acf2a5dd5c938d574d8c95da6 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 9 Apr 2020 13:49:26 +0200 Subject: [PATCH 11/17] KVM: SVM: Fix __svm_vcpu_run declaration. The function returns no value. Cc: Paolo Bonzini Fixes: 199cd1d7b534 ("KVM: SVM: Split svm_vcpu_run inline assembly to separate file") Signed-off-by: Uros Bizjak Message-Id: <20200409114926.1407442-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 683f3817f896..2f379bacbb26 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3276,7 +3276,7 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) svm_complete_interrupts(svm); } -bool __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); +void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); static void svm_vcpu_run(struct kvm_vcpu *vcpu) { From 7289fdb5dcdbc5155b5531529c44105868a762f2 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Thu, 16 Apr 2020 12:11:52 -0700 Subject: [PATCH 12/17] KVM: Remove CREATE_IRQCHIP/SET_PIT2 race Fixes a NULL pointer dereference, caused by the PIT firing an interrupt before the interrupt table has been initialized. SET_PIT2 can race with the creation of the IRQchip. In particular, if SET_PIT2 is called with a low PIT timer period (after the creation of the IOAPIC, but before the instantiation of the irq routes), the PIT can fire an interrupt at an uninitialized table. Signed-off-by: Steve Rutherford Signed-off-by: Jon Cargille Reviewed-by: Jim Mattson Message-Id: <20200416191152.259434-1-jcargill@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 027dfd278a97..3cc3f673785c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5049,10 +5049,13 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&u.ps, argp, sizeof(u.ps))) goto out; + mutex_lock(&kvm->lock); r = -ENXIO; if (!kvm->arch.vpit) - goto out; + goto set_pit_out; r = kvm_vm_ioctl_set_pit(kvm, &u.ps); +set_pit_out: + mutex_unlock(&kvm->lock); break; } case KVM_GET_PIT2: { @@ -5072,10 +5075,13 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&u.ps2, argp, sizeof(u.ps2))) goto out; + mutex_lock(&kvm->lock); r = -ENXIO; if (!kvm->arch.vpit) - goto out; + goto set_pit2_out; r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); +set_pit2_out: + mutex_unlock(&kvm->lock); break; } case KVM_REINJECT_CONTROL: { From 2ca1a06a5440ae8aa7bb0709336d759395b7bbb8 Mon Sep 17 00:00:00 2001 From: Venkatesh Srinivas Date: Thu, 16 Apr 2020 11:42:54 -0700 Subject: [PATCH 13/17] kvm: Handle reads of SandyBridge RAPL PMU MSRs rather than injecting #GP Linux 3.14 unconditionally reads the RAPL PMU MSRs on boot, without handling General Protection Faults on reading those MSRs. Rather than injecting a #GP, which prevents boot, handle the MSRs by returning 0 for their data. Zero was checked to be safe by code review of the RAPL PMU driver and in discussion with the original driver author (eranian@google.com). Signed-off-by: Venkatesh Srinivas Signed-off-by: Jon Cargille Reviewed-by: Jim Mattson Message-Id: <20200416184254.248374-1-jcargill@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3cc3f673785c..919d3c8f9b65 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3060,6 +3060,17 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_PERF_CTL: case MSR_AMD64_DC_CFG: case MSR_F15H_EX_CFG: + /* + * Intel Sandy Bridge CPUs must support the RAPL (running average power + * limit) MSRs. Just return 0, as we do not want to expose the host + * data here. Do not conditionalize this on CPUID, as KVM does not do + * so for existing CPU-specific MSRs. + */ + case MSR_RAPL_POWER_UNIT: + case MSR_PP0_ENERGY_STATUS: /* Power plane 0 (core) */ + case MSR_PP1_ENERGY_STATUS: /* Power plane 1 (graphics uncore) */ + case MSR_PKG_ENERGY_STATUS: /* Total package */ + case MSR_DRAM_ENERGY_STATUS: /* DRAM controller */ msr_info->data = 0; break; case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: From d47c4c454ab023feae8c5be56bb14927d064bac0 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Wed, 15 Apr 2020 21:03:53 +0200 Subject: [PATCH 14/17] KVM: s390: Fix PV check in deliverable_irqs() The diag 0x44 handler, which handles a directed yield, goes into a a codepath that does a kvm_for_each_vcpu() and ultimately deliverable_irqs(). The new check for kvm_s390_pv_cpu_is_protected() contains an assertion that the vcpu->mutex is held, which isn't going to be the case in this scenario. The result is a plethora of these messages if the lock debugging is enabled, and thus an implication that we have a problem. WARNING: CPU: 9 PID: 16167 at arch/s390/kvm/kvm-s390.h:239 deliverable_irqs+0x1c6/0x1d0 [kvm] ...snip... Call Trace: [<000003ff80429bf2>] deliverable_irqs+0x1ca/0x1d0 [kvm] ([<000003ff80429b34>] deliverable_irqs+0x10c/0x1d0 [kvm]) [<000003ff8042ba82>] kvm_s390_vcpu_has_irq+0x2a/0xa8 [kvm] [<000003ff804101e2>] kvm_arch_dy_runnable+0x22/0x38 [kvm] [<000003ff80410284>] kvm_vcpu_on_spin+0x8c/0x1d0 [kvm] [<000003ff80436888>] kvm_s390_handle_diag+0x3b0/0x768 [kvm] [<000003ff80425af4>] kvm_handle_sie_intercept+0x1cc/0xcd0 [kvm] [<000003ff80422bb0>] __vcpu_run+0x7b8/0xfd0 [kvm] [<000003ff80423de6>] kvm_arch_vcpu_ioctl_run+0xee/0x3e0 [kvm] [<000003ff8040ccd8>] kvm_vcpu_ioctl+0x2c8/0x8d0 [kvm] [<00000001504ced06>] ksys_ioctl+0xae/0xe8 [<00000001504cedaa>] __s390x_sys_ioctl+0x2a/0x38 [<0000000150cb9034>] system_call+0xd8/0x2d8 2 locks held by CPU 2/KVM/16167: #0: 00000001951980c0 (&vcpu->mutex){+.+.}, at: kvm_vcpu_ioctl+0x90/0x8d0 [kvm] #1: 000000019599c0f0 (&kvm->srcu){....}, at: __vcpu_run+0x4bc/0xfd0 [kvm] Last Breaking-Event-Address: [<000003ff80429b34>] deliverable_irqs+0x10c/0x1d0 [kvm] irq event stamp: 11967 hardirqs last enabled at (11975): [<00000001502992f2>] console_unlock+0x4ca/0x650 hardirqs last disabled at (11982): [<0000000150298ee8>] console_unlock+0xc0/0x650 softirqs last enabled at (7940): [<0000000150cba6ca>] __do_softirq+0x422/0x4d8 softirqs last disabled at (7929): [<00000001501cd688>] do_softirq_own_stack+0x70/0x80 Considering what's being done here, let's fix this by removing the mutex assertion rather than acquiring the mutex for every other vcpu. Fixes: 201ae986ead7 ("KVM: s390: protvirt: Implement interrupt injection") Signed-off-by: Eric Farman Signed-off-by: Christian Borntraeger Reviewed-by: Christian Borntraeger Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20200415190353.63625-1-farman@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 8191106bf7b9..bfb481134994 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -393,7 +393,7 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) if (psw_mchk_disabled(vcpu)) active_mask &= ~IRQ_PEND_MCHK_MASK; /* PV guest cpus can have a single interruption injected at a time. */ - if (kvm_s390_pv_cpu_is_protected(vcpu) && + if (kvm_s390_pv_cpu_get_handle(vcpu) && vcpu->arch.sie_block->iictl != IICTL_CODE_NONE) active_mask &= ~(IRQ_PEND_EXT_II_MASK | IRQ_PEND_IO_MASK | From 2a173ec993baa6a97e7b0fb89240200a88d90746 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 17 Apr 2020 17:29:36 +0200 Subject: [PATCH 15/17] MAINTAINERS: add a reviewer for KVM/s390 Signed-off-by: Claudio Imbrenda Signed-off-by: Christian Borntraeger Acked-by: Christian Borntraeger Acked-by: Cornelia Huck Acked-by: Janosch Frank Link: https://lore.kernel.org/r/20200417152936.772256-1-imbrenda@linux.ibm.com --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b816a453b10e..de7eb50c8c81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9329,6 +9329,7 @@ M: Christian Borntraeger M: Janosch Frank R: David Hildenbrand R: Cornelia Huck +R: Claudio Imbrenda L: kvm@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ From 7f4b5cde24094127ace370c3c6b82fef65d9f71f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 20 Apr 2020 11:17:37 -0500 Subject: [PATCH 16/17] kvm: Disable objtool frame pointer checking for vmenter.S Frame pointers are completely broken by vmenter.S because it clobbers RBP: arch/x86/kvm/svm/vmenter.o: warning: objtool: __svm_vcpu_run()+0xe4: BP used as a scratch register That's unavoidable, so just skip checking that file when frame pointers are configured in. On the other hand, ORC can handle that code just fine, so leave objtool enabled in the !FRAME_POINTER case. Reported-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Message-Id: <01fae42917bacad18be8d2cbc771353da6603473.1587398610.git.jpoimboe@redhat.com> Tested-by: Randy Dunlap # build-tested Fixes: 199cd1d7b534 ("KVM: SVM: Split svm_vcpu_run inline assembly to separate file") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index a789759b7261..4a3081e9f4b5 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,6 +3,10 @@ ccflags-y += -Iarch/x86/kvm ccflags-$(CONFIG_KVM_WERROR) += -Werror +ifeq ($(CONFIG_FRAME_POINTER),y) +OBJECT_FILES_NON_STANDARD_vmenter.o := y +endif + KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ From ae49dedaa92b55258544aace7c585094b862ef79 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 16 Apr 2020 14:23:43 +1000 Subject: [PATCH 17/17] KVM: PPC: Book3S HV: Handle non-present PTEs in page fault functions Since cd758a9b57ee "KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot in HPT page fault handler", it's been possible in fairly rare circumstances to load a non-present PTE in kvmppc_book3s_hv_page_fault() when running a guest on a POWER8 host. Because that case wasn't checked for, we could misinterpret the non-present PTE as being a cache-inhibited PTE. That could mismatch with the corresponding hash PTE, which would cause the function to fail with -EFAULT a little further down. That would propagate up to the KVM_RUN ioctl() generally causing the KVM userspace (usually qemu) to fall over. This addresses the problem by catching that case and returning to the guest instead. For completeness, this fixes the radix page fault handler in the same way. For radix this didn't cause any obvious misbehaviour, because we ended up putting the non-present PTE into the guest's partition-scoped page tables, leading immediately to another hypervisor data/instruction storage interrupt, which would go through the page fault path again and fix things up. Fixes: cd758a9b57ee "KVM: PPC: Book3S HV: Use __gfn_to_pfn_memslot in HPT page fault handler" Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1820402 Reported-by: David Gibson Tested-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 9 +++++---- arch/powerpc/kvm/book3s_64_mmu_radix.c | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 3aecec890d6f..20b7dce739ad 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -604,18 +604,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ local_irq_disable(); ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + pte = __pte(0); + if (ptep) + pte = *ptep; + local_irq_enable(); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. */ - if (!ptep) { - local_irq_enable(); + if (!pte_present(pte)) { if (page) put_page(page); return RESUME_GUEST; } - pte = *ptep; - local_irq_enable(); hpa = pte_pfn(pte) << PAGE_SHIFT; pte_size = PAGE_SIZE; if (shift) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 134fbc1f029f..7bf94ba62f6d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -815,18 +815,19 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, */ local_irq_disable(); ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + pte = __pte(0); + if (ptep) + pte = *ptep; + local_irq_enable(); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. */ - if (!ptep) { - local_irq_enable(); + if (!pte_present(pte)) { if (page) put_page(page); return RESUME_GUEST; } - pte = *ptep; - local_irq_enable(); /* If we're logging dirty pages, always map single pages */ large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES);