mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-29 17:23:36 +00:00
- Make sure clearing CPU buffers using VERW happens at the latest possible
point in the return-to-userspace path, otherwise memory accesses after the VERW execution could cause data to land in CPU buffers again -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmXbG7IACgkQEsHwGGHe VUoEEg//d1qt/PEWCC23wMO6gLMl4J/e4ZQAuGOKGed/jUmOaQKpHJmpDMRc0li5 llRYDdfE0ikmtQT3t9vQDs3xbWfT5bLMsijliRimb193FaS1HGlHMMS1nxhfjyfv MecbWfkwzX2JnrxJpsbfue+7kks3HyIXYsXV7kSFiHavk4F3GFQXYLO11pKbNQwN 9UfjJDeVsrcWPGCHhoPKF5NHUnQKIA8ZC6g8yBq894AtdWOhFY7ePKBZefUWQQ1n myc5GJ3dKFICMCZvkMABtHYCmHU/W3y/6tPtnrz3kT8GdCIAHG+K9VRUfY1ml94H x327GoM3sEzHLsPizKy00/Uao+j6FOtv631LoDLsO2MF3sHoTZDaSgg5y2D/ZC7t IZdK3mUGtdINRhGiWWpdxyaMfkQ62cdZk8FkeYkRAewYS6WYSdMX3cPqFNy4Ss5u r3reMOD3JcxAatcqhHMXjARMfY+N08gQBpxBul3ejgH8t8aY7xJx6Vggty5kBlHZ 7urV9jIRxSXfbBmOcYu6HP1ucFLWNSUQCBn7Imrh+5zbE1XVv7NaAWvT4Nmgb0/X 57fHoYYSVwaJ0k3zWWM7QYEdcuJ7IZnVgTCQYx26Ec2AOQRxE9ose+awTLYtTbp1 T+XaOlItHKMRzx9K46D7xHwmC5qiokFki3exp5vfGZxGyT3+t/c= =n5us -----END PGP SIGNATURE----- Merge tag 'x86_urgent_for_v6.8_rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 fixes from Borislav Petkov: - Make sure clearing CPU buffers using VERW happens at the latest possible point in the return-to-userspace path, otherwise memory accesses after the VERW execution could cause data to land in CPU buffers again * tag 'x86_urgent_for_v6.8_rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: KVM/VMX: Move VERW closer to VMentry for MDS mitigation KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key x86/entry_32: Add VERW just before userspace transition x86/entry_64: Add VERW just before userspace transition x86/bugs: Add asm helpers for executing VERW
This commit is contained in:
commit
1eee4ef38c
@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing:
|
||||
|
||||
mds_clear_cpu_buffers()
|
||||
|
||||
Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
|
||||
Other than CFLAGS.ZF, this macro doesn't clobber any registers.
|
||||
|
||||
The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
|
||||
(idle) transitions.
|
||||
|
||||
@ -138,17 +141,30 @@ Mitigation points
|
||||
|
||||
When transitioning from kernel to user space the CPU buffers are flushed
|
||||
on affected CPUs when the mitigation is not disabled on the kernel
|
||||
command line. The migitation is enabled through the static key
|
||||
mds_user_clear.
|
||||
command line. The mitigation is enabled through the feature flag
|
||||
X86_FEATURE_CLEAR_CPU_BUF.
|
||||
|
||||
The mitigation is invoked in prepare_exit_to_usermode() which covers
|
||||
all but one of the kernel to user space transitions. The exception
|
||||
is when we return from a Non Maskable Interrupt (NMI), which is
|
||||
handled directly in do_nmi().
|
||||
The mitigation is invoked just before transitioning to userspace after
|
||||
user registers are restored. This is done to minimize the window in
|
||||
which kernel data could be accessed after VERW e.g. via an NMI after
|
||||
VERW.
|
||||
|
||||
(The reason that NMI is special is that prepare_exit_to_usermode() can
|
||||
enable IRQs. In NMI context, NMIs are blocked, and we don't want to
|
||||
enable IRQs with NMIs blocked.)
|
||||
**Corner case not handled**
|
||||
Interrupts returning to kernel don't clear CPUs buffers since the
|
||||
exit-to-user path is expected to do that anyways. But, there could be
|
||||
a case when an NMI is generated in kernel after the exit-to-user path
|
||||
has cleared the buffers. This case is not handled and NMI returning to
|
||||
kernel don't clear CPU buffers because:
|
||||
|
||||
1. It is rare to get an NMI after VERW, but before returning to userspace.
|
||||
2. For an unprivileged user, there is no known way to make that NMI
|
||||
less rare or target it.
|
||||
3. It would take a large number of these precisely-timed NMIs to mount
|
||||
an actual attack. There's presumably not enough bandwidth.
|
||||
4. The NMI in question occurs after a VERW, i.e. when user state is
|
||||
restored and most interesting data is already scrubbed. Whats left
|
||||
is only the data that NMI touches, and that may or may not be of
|
||||
any interest.
|
||||
|
||||
|
||||
2. C-State transition
|
||||
|
@ -6,6 +6,9 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/msr-index.h>
|
||||
#include <asm/unwind_hints.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/cache.h>
|
||||
|
||||
.pushsection .noinstr.text, "ax"
|
||||
|
||||
@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
|
||||
EXPORT_SYMBOL_GPL(entry_ibpb);
|
||||
|
||||
.popsection
|
||||
|
||||
/*
|
||||
* Define the VERW operand that is disguised as entry code so that
|
||||
* it can be referenced with KPTI enabled. This ensure VERW can be
|
||||
* used late in exit-to-user path after page tables are switched.
|
||||
*/
|
||||
.pushsection .entry.text, "ax"
|
||||
|
||||
.align L1_CACHE_BYTES, 0xcc
|
||||
SYM_CODE_START_NOALIGN(mds_verw_sel)
|
||||
UNWIND_HINT_UNDEFINED
|
||||
ANNOTATE_NOENDBR
|
||||
.word __KERNEL_DS
|
||||
.align L1_CACHE_BYTES, 0xcc
|
||||
SYM_CODE_END(mds_verw_sel);
|
||||
/* For KVM */
|
||||
EXPORT_SYMBOL_GPL(mds_verw_sel);
|
||||
|
||||
.popsection
|
||||
|
||||
|
@ -885,6 +885,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
|
||||
BUG_IF_WRONG_CR3 no_user_check=1
|
||||
popfl
|
||||
popl %eax
|
||||
CLEAR_CPU_BUFFERS
|
||||
|
||||
/*
|
||||
* Return back to the vDSO, which will pop ecx and edx.
|
||||
@ -954,6 +955,7 @@ restore_all_switch_stack:
|
||||
|
||||
/* Restore user state */
|
||||
RESTORE_REGS pop=4 # skip orig_eax/error_code
|
||||
CLEAR_CPU_BUFFERS
|
||||
.Lirq_return:
|
||||
/*
|
||||
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
|
||||
@ -1146,6 +1148,7 @@ SYM_CODE_START(asm_exc_nmi)
|
||||
|
||||
/* Not on SYSENTER stack. */
|
||||
call exc_nmi
|
||||
CLEAR_CPU_BUFFERS
|
||||
jmp .Lnmi_return
|
||||
|
||||
.Lnmi_from_sysenter_stack:
|
||||
|
@ -161,6 +161,7 @@ syscall_return_via_sysret:
|
||||
SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
|
||||
ANNOTATE_NOENDBR
|
||||
swapgs
|
||||
CLEAR_CPU_BUFFERS
|
||||
sysretq
|
||||
SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
|
||||
ANNOTATE_NOENDBR
|
||||
@ -573,6 +574,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
|
||||
|
||||
.Lswapgs_and_iret:
|
||||
swapgs
|
||||
CLEAR_CPU_BUFFERS
|
||||
/* Assert that the IRET frame indicates user mode. */
|
||||
testb $3, 8(%rsp)
|
||||
jnz .Lnative_iret
|
||||
@ -723,6 +725,8 @@ native_irq_return_ldt:
|
||||
*/
|
||||
popq %rax /* Restore user RAX */
|
||||
|
||||
CLEAR_CPU_BUFFERS
|
||||
|
||||
/*
|
||||
* RSP now points to an ordinary IRET frame, except that the page
|
||||
* is read-only and RSP[31:16] are preloaded with the userspace
|
||||
@ -1449,6 +1453,12 @@ nmi_restore:
|
||||
std
|
||||
movq $0, 5*8(%rsp) /* clear "NMI executing" */
|
||||
|
||||
/*
|
||||
* Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
|
||||
* NMI in kernel after user state is restored. For an unprivileged user
|
||||
* these conditions are hard to meet.
|
||||
*/
|
||||
|
||||
/*
|
||||
* iretq reads the "iret" frame and exits the NMI stack in a
|
||||
* single instruction. We are returning to kernel mode, so this
|
||||
@ -1466,6 +1476,7 @@ SYM_CODE_START(entry_SYSCALL32_ignore)
|
||||
UNWIND_HINT_END_OF_STACK
|
||||
ENDBR
|
||||
mov $-ENOSYS, %eax
|
||||
CLEAR_CPU_BUFFERS
|
||||
sysretl
|
||||
SYM_CODE_END(entry_SYSCALL32_ignore)
|
||||
|
||||
|
@ -270,6 +270,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
|
||||
xorl %r9d, %r9d
|
||||
xorl %r10d, %r10d
|
||||
swapgs
|
||||
CLEAR_CPU_BUFFERS
|
||||
sysretl
|
||||
SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
|
||||
ANNOTATE_NOENDBR
|
||||
|
@ -95,7 +95,7 @@
|
||||
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
|
||||
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
|
||||
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
|
||||
/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
|
||||
#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
|
||||
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
|
||||
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
|
||||
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
|
||||
|
@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
||||
|
||||
static __always_inline void arch_exit_to_user_mode(void)
|
||||
{
|
||||
mds_user_clear_cpu_buffers();
|
||||
amd_clear_divider();
|
||||
}
|
||||
#define arch_exit_to_user_mode arch_exit_to_user_mode
|
||||
|
@ -315,6 +315,17 @@
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Macro to execute VERW instruction that mitigate transient data sampling
|
||||
* attacks such as MDS. On affected systems a microcode update overloaded VERW
|
||||
* instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
|
||||
*
|
||||
* Note: Only the memory operand variant of VERW clears the CPU buffers.
|
||||
*/
|
||||
.macro CLEAR_CPU_BUFFERS
|
||||
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
|
||||
.endm
|
||||
|
||||
#else /* __ASSEMBLY__ */
|
||||
|
||||
#define ANNOTATE_RETPOLINE_SAFE \
|
||||
@ -529,13 +540,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
|
||||
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
|
||||
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(mds_user_clear);
|
||||
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
|
||||
|
||||
extern u16 mds_verw_sel;
|
||||
|
||||
#include <asm/segment.h>
|
||||
|
||||
/**
|
||||
@ -561,17 +573,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
|
||||
asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
|
||||
}
|
||||
|
||||
/**
|
||||
* mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
|
||||
*
|
||||
* Clear CPU buffers if the corresponding static key is enabled
|
||||
*/
|
||||
static __always_inline void mds_user_clear_cpu_buffers(void)
|
||||
{
|
||||
if (static_branch_likely(&mds_user_clear))
|
||||
mds_clear_cpu_buffers();
|
||||
}
|
||||
|
||||
/**
|
||||
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
|
||||
*
|
||||
|
@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
|
||||
/* Control unconditional IBPB in switch_mm() */
|
||||
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
|
||||
|
||||
/* Control MDS CPU buffer clear before returning to user space */
|
||||
DEFINE_STATIC_KEY_FALSE(mds_user_clear);
|
||||
EXPORT_SYMBOL_GPL(mds_user_clear);
|
||||
/* Control MDS CPU buffer clear before idling (halt, mwait) */
|
||||
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
|
||||
EXPORT_SYMBOL_GPL(mds_idle_clear);
|
||||
@ -252,7 +249,7 @@ static void __init mds_select_mitigation(void)
|
||||
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
|
||||
mds_mitigation = MDS_MITIGATION_VMWERV;
|
||||
|
||||
static_branch_enable(&mds_user_clear);
|
||||
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
|
||||
|
||||
if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
|
||||
(mds_nosmt || cpu_mitigations_auto_nosmt()))
|
||||
@ -356,7 +353,7 @@ static void __init taa_select_mitigation(void)
|
||||
* For guests that can't determine whether the correct microcode is
|
||||
* present on host, enable the mitigation for UCODE_NEEDED as well.
|
||||
*/
|
||||
static_branch_enable(&mds_user_clear);
|
||||
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
|
||||
|
||||
if (taa_nosmt || cpu_mitigations_auto_nosmt())
|
||||
cpu_smt_disable(false);
|
||||
@ -424,7 +421,7 @@ static void __init mmio_select_mitigation(void)
|
||||
*/
|
||||
if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
|
||||
boot_cpu_has(X86_FEATURE_RTM)))
|
||||
static_branch_enable(&mds_user_clear);
|
||||
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
|
||||
else
|
||||
static_branch_enable(&mmio_stale_data_clear);
|
||||
|
||||
@ -484,12 +481,12 @@ static void __init md_clear_update_mitigation(void)
|
||||
if (cpu_mitigations_off())
|
||||
return;
|
||||
|
||||
if (!static_key_enabled(&mds_user_clear))
|
||||
if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
|
||||
* mitigation, if necessary.
|
||||
* X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
|
||||
* Stale Data mitigation, if necessary.
|
||||
*/
|
||||
if (mds_mitigation == MDS_MITIGATION_OFF &&
|
||||
boot_cpu_has_bug(X86_BUG_MDS)) {
|
||||
|
@ -563,9 +563,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
|
||||
}
|
||||
if (this_cpu_dec_return(nmi_state))
|
||||
goto nmi_restart;
|
||||
|
||||
if (user_mode(regs))
|
||||
mds_user_clear_cpu_buffers();
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_KVM_INTEL)
|
||||
|
@ -2,7 +2,10 @@
|
||||
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
|
||||
#define __KVM_X86_VMX_RUN_FLAGS_H
|
||||
|
||||
#define VMX_RUN_VMRESUME (1 << 0)
|
||||
#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
|
||||
#define VMX_RUN_VMRESUME_SHIFT 0
|
||||
#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
|
||||
|
||||
#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
|
||||
#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
|
||||
|
||||
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
|
||||
|
@ -139,7 +139,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
|
||||
mov (%_ASM_SP), %_ASM_AX
|
||||
|
||||
/* Check if vmlaunch or vmresume is needed */
|
||||
test $VMX_RUN_VMRESUME, %ebx
|
||||
bt $VMX_RUN_VMRESUME_SHIFT, %ebx
|
||||
|
||||
/* Load guest registers. Don't clobber flags. */
|
||||
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
|
||||
@ -161,8 +161,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
|
||||
/* Load guest RAX. This kills the @regs pointer! */
|
||||
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
|
||||
|
||||
/* Check EFLAGS.ZF from 'test VMX_RUN_VMRESUME' above */
|
||||
jz .Lvmlaunch
|
||||
/* Clobbers EFLAGS.ZF */
|
||||
CLEAR_CPU_BUFFERS
|
||||
|
||||
/* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
|
||||
jnc .Lvmlaunch
|
||||
|
||||
/*
|
||||
* After a successful VMRESUME/VMLAUNCH, control flow "magically"
|
||||
|
@ -388,7 +388,16 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
|
||||
|
||||
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
|
||||
{
|
||||
vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
|
||||
/*
|
||||
* Disable VERW's behavior of clearing CPU buffers for the guest if the
|
||||
* CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled
|
||||
* the mitigation. Disabling the clearing behavior provides a
|
||||
* performance boost for guests that aren't aware that manually clearing
|
||||
* CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry
|
||||
* and VM-Exit.
|
||||
*/
|
||||
vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
|
||||
(host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
|
||||
!boot_cpu_has_bug(X86_BUG_MDS) &&
|
||||
!boot_cpu_has_bug(X86_BUG_TAA);
|
||||
|
||||
@ -7224,11 +7233,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
|
||||
|
||||
guest_state_enter_irqoff();
|
||||
|
||||
/* L1D Flush includes CPU buffer clear to mitigate MDS */
|
||||
/*
|
||||
* L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
|
||||
* mitigation for MDS is done late in VMentry and is still
|
||||
* executed in spite of L1D Flush. This is because an extra VERW
|
||||
* should not matter much after the big hammer L1D Flush.
|
||||
*/
|
||||
if (static_branch_unlikely(&vmx_l1d_should_flush))
|
||||
vmx_l1d_flush(vcpu);
|
||||
else if (static_branch_unlikely(&mds_user_clear))
|
||||
mds_clear_cpu_buffers();
|
||||
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
|
||||
kvm_arch_has_assigned_device(vcpu->kvm))
|
||||
mds_clear_cpu_buffers();
|
||||
|
Loading…
Reference in New Issue
Block a user