mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-08 14:13:53 +00:00
Merge branch 'x86/cpu' into perf/core, to pick up revert
perf/core has an earlier version of the x86/cpu tree merged, to avoid
conflicts, and due to this we want to pick up this ABI impacting
revert as well:
049331f277
: ("x86/fsgsbase: Revert FSGSBASE support")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
f584dd32ed
@ -2857,8 +2857,6 @@
|
||||
no5lvl [X86-64] Disable 5-level paging mode. Forces
|
||||
kernel to use 4-level paging instead.
|
||||
|
||||
nofsgsbase [X86] Disables FSGSBASE instructions.
|
||||
|
||||
no_console_suspend
|
||||
[HW] Never suspend the console
|
||||
Disable suspending of consoles during suspend and
|
||||
|
@ -108,12 +108,3 @@ We try to only use IST entries and the paranoid entry code for vectors
|
||||
that absolutely need the more expensive check for the GS base - and we
|
||||
generate all 'normal' entry points with the regular (faster) paranoid=0
|
||||
variant.
|
||||
|
||||
On a FSGSBASE system, however, user space can set GS without kernel
|
||||
interaction. It means the value of GS base itself does not imply anything,
|
||||
whether a kernel value or a user space value. So, there is no longer a safe
|
||||
way to check whether the exception is entering from user mode or kernel
|
||||
mode in the paranoid entry code path. So the GSBASE value needs to be read
|
||||
out, saved and the kernel GSBASE value written. On exit the saved GSBASE
|
||||
value needs to be restored unconditionally. The non paranoid entry/exit
|
||||
code still uses SWAPGS unconditionally as the state is known.
|
||||
|
@ -1,199 +0,0 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Using FS and GS segments in user space applications
|
||||
===================================================
|
||||
|
||||
The x86 architecture supports segmentation. Instructions which access
|
||||
memory can use segment register based addressing mode. The following
|
||||
notation is used to address a byte within a segment:
|
||||
|
||||
Segment-register:Byte-address
|
||||
|
||||
The segment base address is added to the Byte-address to compute the
|
||||
resulting virtual address which is accessed. This allows to access multiple
|
||||
instances of data with the identical Byte-address, i.e. the same code. The
|
||||
selection of a particular instance is purely based on the base-address in
|
||||
the segment register.
|
||||
|
||||
In 32-bit mode the CPU provides 6 segments, which also support segment
|
||||
limits. The limits can be used to enforce address space protections.
|
||||
|
||||
In 64-bit mode the CS/SS/DS/ES segments are ignored and the base address is
|
||||
always 0 to provide a full 64bit address space. The FS and GS segments are
|
||||
still functional in 64-bit mode.
|
||||
|
||||
Common FS and GS usage
|
||||
------------------------------
|
||||
|
||||
The FS segment is commonly used to address Thread Local Storage (TLS). FS
|
||||
is usually managed by runtime code or a threading library. Variables
|
||||
declared with the '__thread' storage class specifier are instantiated per
|
||||
thread and the compiler emits the FS: address prefix for accesses to these
|
||||
variables. Each thread has its own FS base address so common code can be
|
||||
used without complex address offset calculations to access the per thread
|
||||
instances. Applications should not use FS for other purposes when they use
|
||||
runtimes or threading libraries which manage the per thread FS.
|
||||
|
||||
The GS segment has no common use and can be used freely by
|
||||
applications. GCC and Clang support GS based addressing via address space
|
||||
identifiers.
|
||||
|
||||
Reading and writing the FS/GS base address
|
||||
------------------------------------------
|
||||
|
||||
There exist two mechanisms to read and write the FS/FS base address:
|
||||
|
||||
- the arch_prctl() system call
|
||||
|
||||
- the FSGSBASE instruction family
|
||||
|
||||
Accessing FS/GS base with arch_prctl()
|
||||
--------------------------------------
|
||||
|
||||
The arch_prctl(2) based mechanism is available on all 64bit CPUs and all
|
||||
kernel versions.
|
||||
|
||||
Reading the base:
|
||||
|
||||
arch_prctl(ARCH_GET_FS, &fsbase);
|
||||
arch_prctl(ARCH_GET_GS, &gsbase);
|
||||
|
||||
Writing the base:
|
||||
|
||||
arch_prctl(ARCH_SET_FS, fsbase);
|
||||
arch_prctl(ARCH_SET_GS, gsbase);
|
||||
|
||||
The ARCH_SET_GS prctl may be disabled depending on kernel configuration
|
||||
and security settings.
|
||||
|
||||
Accessing FS/GS base with the FSGSBASE instructions
|
||||
---------------------------------------------------
|
||||
|
||||
With the Ivy Bridge CPU generation Intel introduced a new set of
|
||||
instructions to access the FS and GS base registers directly from user
|
||||
space. These instructions are also supported on AMD Family 17H CPUs. The
|
||||
following instructions are available:
|
||||
|
||||
=============== ===========================
|
||||
RDFSBASE %reg Read the FS base register
|
||||
RDGSBASE %reg Read the GS base register
|
||||
WRFSBASE %reg Write the FS base register
|
||||
WRGSBASE %reg Write the GS base register
|
||||
=============== ===========================
|
||||
|
||||
The instructions avoid the overhead of the arch_prctl() syscall and allow
|
||||
more flexible usage of the FS/GS addressing modes in user space
|
||||
applications. This does not prevent conflicts between threading libraries
|
||||
and runtimes which utilize FS and applications which want to use it for
|
||||
their own purpose.
|
||||
|
||||
FSGSBASE instructions enablement
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
The instructions are enumerated in CPUID leaf 7, bit 0 of EBX. If
|
||||
available /proc/cpuinfo shows 'fsgsbase' in the flag entry of the CPUs.
|
||||
|
||||
The availability of the instructions does not enable them
|
||||
automatically. The kernel has to enable them explicitly in CR4. The
|
||||
reason for this is that older kernels make assumptions about the values in
|
||||
the GS register and enforce them when GS base is set via
|
||||
arch_prctl(). Allowing user space to write arbitrary values to GS base
|
||||
would violate these assumptions and cause malfunction.
|
||||
|
||||
On kernels which do not enable FSGSBASE the execution of the FSGSBASE
|
||||
instructions will fault with a #UD exception.
|
||||
|
||||
The kernel provides reliable information about the enabled state in the
|
||||
ELF AUX vector. If the HWCAP2_FSGSBASE bit is set in the AUX vector, the
|
||||
kernel has FSGSBASE instructions enabled and applications can use them.
|
||||
The following code example shows how this detection works::
|
||||
|
||||
#include <sys/auxv.h>
|
||||
#include <elf.h>
|
||||
|
||||
/* Will be eventually in asm/hwcap.h */
|
||||
#ifndef HWCAP2_FSGSBASE
|
||||
#define HWCAP2_FSGSBASE (1 << 1)
|
||||
#endif
|
||||
|
||||
....
|
||||
|
||||
unsigned val = getauxval(AT_HWCAP2);
|
||||
|
||||
if (val & HWCAP2_FSGSBASE)
|
||||
printf("FSGSBASE enabled\n");
|
||||
|
||||
FSGSBASE instructions compiler support
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE
|
||||
instructions. Clang supports them as well.
|
||||
|
||||
=================== ===========================
|
||||
_readfsbase_u64() Read the FS base register
|
||||
_readfsbase_u64() Read the GS base register
|
||||
_writefsbase_u64() Write the FS base register
|
||||
_writegsbase_u64() Write the GS base register
|
||||
=================== ===========================
|
||||
|
||||
To utilize these instrinsics <immintrin.h> must be included in the source
|
||||
code and the compiler option -mfsgsbase has to be added.
|
||||
|
||||
Compiler support for FS/GS based addressing
|
||||
-------------------------------------------
|
||||
|
||||
GCC version 6 and newer provide support for FS/GS based addressing via
|
||||
Named Address Spaces. GCC implements the following address space
|
||||
identifiers for x86:
|
||||
|
||||
========= ====================================
|
||||
__seg_fs Variable is addressed relative to FS
|
||||
__seg_gs Variable is addressed relative to GS
|
||||
========= ====================================
|
||||
|
||||
The preprocessor symbols __SEG_FS and __SEG_GS are defined when these
|
||||
address spaces are supported. Code which implements fallback modes should
|
||||
check whether these symbols are defined. Usage example::
|
||||
|
||||
#ifdef __SEG_GS
|
||||
|
||||
long data0 = 0;
|
||||
long data1 = 1;
|
||||
|
||||
long __seg_gs *ptr;
|
||||
|
||||
/* Check whether FSGSBASE is enabled by the kernel (HWCAP2_FSGSBASE) */
|
||||
....
|
||||
|
||||
/* Set GS to point to data0 */
|
||||
_writegsbase_u64(&data0);
|
||||
|
||||
/* Access offset 0 of GS */
|
||||
ptr = 0;
|
||||
printf("data0 = %ld\n", *ptr);
|
||||
|
||||
/* Set GS to point to data1 */
|
||||
_writegsbase_u64(&data1);
|
||||
/* ptr still addresses offset 0! */
|
||||
printf("data1 = %ld\n", *ptr);
|
||||
|
||||
|
||||
Clang does not provide the GCC address space identifiers, but it provides
|
||||
address spaces via an attribute based mechanism in Clang 5 and newer
|
||||
versions:
|
||||
|
||||
==================================== =====================================
|
||||
__attribute__((address_space(256)) Variable is addressed relative to GS
|
||||
__attribute__((address_space(257)) Variable is addressed relative to FS
|
||||
==================================== =====================================
|
||||
|
||||
FS/GS based addressing with inline assembly
|
||||
-------------------------------------------
|
||||
|
||||
In case the compiler does not support address spaces, inline assembly can
|
||||
be used for FS/GS based addressing mode::
|
||||
|
||||
mov %fs:offset, %reg
|
||||
mov %gs:offset, %reg
|
||||
|
||||
mov %reg, %fs:offset
|
||||
mov %reg, %gs:offset
|
@ -14,4 +14,3 @@ x86_64 Support
|
||||
fake-numa-for-cpusets
|
||||
cpu-hotplug-spec
|
||||
machinecheck
|
||||
fsgs
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/inst.h>
|
||||
|
||||
/*
|
||||
|
||||
@ -338,12 +337,6 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
|
||||
rdgsbase \save_reg
|
||||
GET_PERCPU_BASE \scratch_reg
|
||||
wrgsbase \scratch_reg
|
||||
.endm
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
.macro STACKLEAK_ERASE
|
||||
@ -352,39 +345,6 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
#endif
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
/*
|
||||
* CPU/node NR is loaded from the limit (size) field of a special segment
|
||||
* descriptor entry in GDT.
|
||||
*/
|
||||
.macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req
|
||||
movq $__CPUNODE_SEG, \reg
|
||||
lsl \reg, \reg
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Fetch the per-CPU GSBASE value for this processor and put it in @reg.
|
||||
* We normally use %gs for accessing per-CPU data, but we are setting up
|
||||
* %gs here and obviously can not use %gs itself to access per-CPU data.
|
||||
*/
|
||||
.macro GET_PERCPU_BASE reg:req
|
||||
ALTERNATIVE \
|
||||
"LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \
|
||||
"RDPID \reg", \
|
||||
X86_FEATURE_RDPID
|
||||
andq $VDSO_CPUNODE_MASK, \reg
|
||||
movq __per_cpu_offset(, \reg, 8), \reg
|
||||
.endm
|
||||
|
||||
#else
|
||||
|
||||
.macro GET_PERCPU_BASE reg:req
|
||||
movq pcpu_unit_offsets(%rip), \reg
|
||||
.endm
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* This does 'call enter_from_user_mode' unless we can avoid it based on
|
||||
* kernel config or using the static jump infrastructure.
|
||||
|
@ -38,7 +38,6 @@
|
||||
#include <asm/export.h>
|
||||
#include <asm/frame.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
#include <asm/fsgsbase.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
#include "calling.h"
|
||||
@ -948,6 +947,7 @@ ENTRY(\sym)
|
||||
addq $\ist_offset, CPU_TSS_IST(\shift_ist)
|
||||
.endif
|
||||
|
||||
/* these procedures expect "no swapgs" flag in ebx */
|
||||
.if \paranoid
|
||||
jmp paranoid_exit
|
||||
.else
|
||||
@ -1164,21 +1164,24 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Save all registers in pt_regs. Return GSBASE related information
|
||||
* in EBX depending on the availability of the FSGSBASE instructions:
|
||||
*
|
||||
* FSGSBASE R/EBX
|
||||
* N 0 -> SWAPGS on exit
|
||||
* 1 -> no SWAPGS on exit
|
||||
*
|
||||
* Y GSBASE value at entry, must be restored in paranoid_exit
|
||||
* Save all registers in pt_regs, and switch gs if needed.
|
||||
* Use slow, but surefire "are we in kernel?" check.
|
||||
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
|
||||
*/
|
||||
ENTRY(paranoid_entry)
|
||||
UNWIND_HINT_FUNC
|
||||
cld
|
||||
PUSH_AND_CLEAR_REGS save_ret=1
|
||||
ENCODE_FRAME_POINTER 8
|
||||
movl $1, %ebx
|
||||
movl $MSR_GS_BASE, %ecx
|
||||
rdmsr
|
||||
testl %edx, %edx
|
||||
js 1f /* negative -> in kernel */
|
||||
SWAPGS
|
||||
xorl %ebx, %ebx
|
||||
|
||||
1:
|
||||
/*
|
||||
* Always stash CR3 in %r14. This value will be restored,
|
||||
* verbatim, at exit. Needed if paranoid_entry interrupted
|
||||
@ -1188,49 +1191,9 @@ ENTRY(paranoid_entry)
|
||||
* This is also why CS (stashed in the "iret frame" by the
|
||||
* hardware at entry) can not be used: this may be a return
|
||||
* to kernel code, but with a user CR3 value.
|
||||
*
|
||||
* Switching CR3 does not depend on kernel GSBASE so it can
|
||||
* be done before switching to the kernel GSBASE. This is
|
||||
* required for FSGSBASE because the kernel GSBASE has to
|
||||
* be retrieved from a kernel internal table.
|
||||
*/
|
||||
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
|
||||
|
||||
/*
|
||||
* Handling GSBASE depends on the availability of FSGSBASE.
|
||||
*
|
||||
* Without FSGSBASE the kernel enforces that negative GSBASE
|
||||
* values indicate kernel GSBASE. With FSGSBASE no assumptions
|
||||
* can be made about the GSBASE value when entering from user
|
||||
* space.
|
||||
*/
|
||||
ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE
|
||||
|
||||
/*
|
||||
* Read the current GSBASE and store it in in %rbx unconditionally,
|
||||
* retrieve and set the current CPUs kernel GSBASE. The stored value
|
||||
* has to be restored in paranoid_exit unconditionally.
|
||||
*/
|
||||
SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
|
||||
ret
|
||||
|
||||
.Lparanoid_entry_checkgs:
|
||||
/* EBX = 1 -> kernel GSBASE active, no restore required */
|
||||
movl $1, %ebx
|
||||
/*
|
||||
* The kernel-enforced convention is a negative GSBASE indicates
|
||||
* a kernel value. No SWAPGS needed on entry and exit.
|
||||
*/
|
||||
movl $MSR_GS_BASE, %ecx
|
||||
rdmsr
|
||||
testl %edx, %edx
|
||||
jns .Lparanoid_entry_swapgs
|
||||
ret
|
||||
|
||||
.Lparanoid_entry_swapgs:
|
||||
SWAPGS
|
||||
/* EBX = 0 -> SWAPGS required on exit */
|
||||
xorl %ebx, %ebx
|
||||
ret
|
||||
END(paranoid_entry)
|
||||
|
||||
@ -1241,47 +1204,28 @@ END(paranoid_entry)
|
||||
*
|
||||
* We may be returning to very strange contexts (e.g. very early
|
||||
* in syscall entry), so checking for preemption here would
|
||||
* be complicated. Fortunately, there's no good reason to try
|
||||
* to handle preemption here.
|
||||
* be complicated. Fortunately, we there's no good reason
|
||||
* to try to handle preemption here.
|
||||
*
|
||||
* R/EBX contains the GSBASE related information depending on the
|
||||
* availability of the FSGSBASE instructions:
|
||||
*
|
||||
* FSGSBASE R/EBX
|
||||
* N 0 -> SWAPGS on exit
|
||||
* 1 -> no SWAPGS on exit
|
||||
*
|
||||
* Y User space GSBASE, must be restored unconditionally
|
||||
* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
|
||||
*/
|
||||
ENTRY(paranoid_exit)
|
||||
UNWIND_HINT_REGS
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF_DEBUG
|
||||
|
||||
/* Handle GS depending on FSGSBASE availability */
|
||||
ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "nop",X86_FEATURE_FSGSBASE
|
||||
|
||||
/* With FSGSBASE enabled, unconditionally restore GSBASE */
|
||||
wrgsbase %rbx
|
||||
jmp .Lparanoid_exit_no_swapgs;
|
||||
|
||||
.Lparanoid_exit_checkgs:
|
||||
/* On non-FSGSBASE systems, conditionally do SWAPGS */
|
||||
testl %ebx, %ebx
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
jnz .Lparanoid_exit_no_swapgs
|
||||
TRACE_IRQS_IRETQ
|
||||
/* Always restore stashed CR3 value (see paranoid_entry) */
|
||||
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
|
||||
SWAPGS_UNSAFE_STACK
|
||||
jmp .Lparanoid_exit_restore
|
||||
|
||||
.Lparanoid_exit_no_swapgs:
|
||||
TRACE_IRQS_IRETQ_DEBUG
|
||||
/* Always restore stashed CR3 value (see paranoid_entry) */
|
||||
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
|
||||
|
||||
.Lparanoid_exit_restore:
|
||||
jmp restore_regs_and_return_to_kernel
|
||||
jmp restore_regs_and_return_to_kernel
|
||||
END(paranoid_exit)
|
||||
|
||||
/*
|
||||
@ -1692,27 +1636,10 @@ end_repeat_nmi:
|
||||
/* Always restore stashed CR3 value (see paranoid_entry) */
|
||||
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
|
||||
|
||||
/*
|
||||
* The above invocation of paranoid_entry stored the GSBASE
|
||||
* related information in R/EBX depending on the availability
|
||||
* of FSGSBASE.
|
||||
*
|
||||
* If FSGSBASE is enabled, restore the saved GSBASE value
|
||||
* unconditionally, otherwise take the conditional SWAPGS path.
|
||||
*/
|
||||
ALTERNATIVE "jmp nmi_no_fsgsbase", "", X86_FEATURE_FSGSBASE
|
||||
|
||||
wrgsbase %rbx
|
||||
jmp nmi_restore
|
||||
|
||||
nmi_no_fsgsbase:
|
||||
/* EBX == 0 -> invoke SWAPGS */
|
||||
testl %ebx, %ebx
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
jnz nmi_restore
|
||||
|
||||
nmi_swapgs:
|
||||
SWAPGS_UNSAFE_STACK
|
||||
|
||||
nmi_restore:
|
||||
POP_REGS
|
||||
|
||||
@ -1743,11 +1670,17 @@ nmi_restore:
|
||||
iretq
|
||||
END(nmi)
|
||||
|
||||
#ifndef CONFIG_IA32_EMULATION
|
||||
/*
|
||||
* This handles SYSCALL from 32-bit code. There is no way to program
|
||||
* MSRs to fully disable 32-bit SYSCALL.
|
||||
*/
|
||||
ENTRY(ignore_sysret)
|
||||
UNWIND_HINT_EMPTY
|
||||
mov $-ENOSYS, %eax
|
||||
sysret
|
||||
END(ignore_sysret)
|
||||
#endif
|
||||
|
||||
ENTRY(rewind_stack_do_exit)
|
||||
UNWIND_HINT_FUNC
|
||||
|
@ -19,62 +19,35 @@ extern unsigned long x86_gsbase_read_task(struct task_struct *task);
|
||||
extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
|
||||
extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
|
||||
|
||||
/* Must be protected by X86_FEATURE_FSGSBASE check. */
|
||||
|
||||
static __always_inline unsigned long rdfsbase(void)
|
||||
{
|
||||
unsigned long fsbase;
|
||||
|
||||
asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
|
||||
|
||||
return fsbase;
|
||||
}
|
||||
|
||||
static __always_inline unsigned long rdgsbase(void)
|
||||
{
|
||||
unsigned long gsbase;
|
||||
|
||||
asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
|
||||
|
||||
return gsbase;
|
||||
}
|
||||
|
||||
static __always_inline void wrfsbase(unsigned long fsbase)
|
||||
{
|
||||
asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
|
||||
}
|
||||
|
||||
static __always_inline void wrgsbase(unsigned long gsbase)
|
||||
{
|
||||
asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
|
||||
}
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
/* Helper functions for reading/writing FS/GS base */
|
||||
|
||||
static inline unsigned long x86_fsbase_read_cpu(void)
|
||||
{
|
||||
unsigned long fsbase;
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_FSGSBASE))
|
||||
fsbase = rdfsbase();
|
||||
else
|
||||
rdmsrl(MSR_FS_BASE, fsbase);
|
||||
rdmsrl(MSR_FS_BASE, fsbase);
|
||||
|
||||
return fsbase;
|
||||
}
|
||||
|
||||
static inline void x86_fsbase_write_cpu(unsigned long fsbase)
|
||||
static inline unsigned long x86_gsbase_read_cpu_inactive(void)
|
||||
{
|
||||
if (static_cpu_has(X86_FEATURE_FSGSBASE))
|
||||
wrfsbase(fsbase);
|
||||
else
|
||||
wrmsrl(MSR_FS_BASE, fsbase);
|
||||
unsigned long gsbase;
|
||||
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
|
||||
|
||||
return gsbase;
|
||||
}
|
||||
|
||||
extern unsigned long x86_gsbase_read_cpu_inactive(void);
|
||||
extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
|
||||
static inline void x86_fsbase_write_cpu(unsigned long fsbase)
|
||||
{
|
||||
wrmsrl(MSR_FS_BASE, fsbase);
|
||||
}
|
||||
|
||||
static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
|
||||
{
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
|
@ -306,21 +306,6 @@
|
||||
.endif
|
||||
MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
|
||||
.endm
|
||||
|
||||
.macro RDPID opd
|
||||
REG_TYPE rdpid_opd_type \opd
|
||||
.if rdpid_opd_type == REG_TYPE_R64
|
||||
R64_NUM rdpid_opd \opd
|
||||
.else
|
||||
R32_NUM rdpid_opd \opd
|
||||
.endif
|
||||
.byte 0xf3
|
||||
.if rdpid_opd > 7
|
||||
PFX_REX rdpid_opd 0
|
||||
.endif
|
||||
.byte 0x0f, 0xc7
|
||||
MODRM 0xc0 rdpid_opd 0x7
|
||||
.endm
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -5,7 +5,4 @@
|
||||
/* MONITOR/MWAIT enabled in Ring 3 */
|
||||
#define HWCAP2_RING3MWAIT (1 << 0)
|
||||
|
||||
/* Kernel allows FSGSBASE instructions available in Ring 3 */
|
||||
#define HWCAP2_FSGSBASE BIT(1)
|
||||
|
||||
#endif
|
||||
|
@ -366,22 +366,6 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
|
||||
cr4_clear_bits(X86_CR4_UMIP);
|
||||
}
|
||||
|
||||
static __init int x86_nofsgsbase_setup(char *arg)
|
||||
{
|
||||
/* Require an exact match without trailing characters. */
|
||||
if (strlen(arg))
|
||||
return 0;
|
||||
|
||||
/* Do not emit a message if the feature is not present. */
|
||||
if (!boot_cpu_has(X86_FEATURE_FSGSBASE))
|
||||
return 1;
|
||||
|
||||
setup_clear_cpu_cap(X86_FEATURE_FSGSBASE);
|
||||
pr_info("FSGSBASE disabled via kernel command line\n");
|
||||
return 1;
|
||||
}
|
||||
__setup("nofsgsbase", x86_nofsgsbase_setup);
|
||||
|
||||
/*
|
||||
* Protection Keys are not available in 32-bit mode.
|
||||
*/
|
||||
@ -1387,12 +1371,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||
setup_smap(c);
|
||||
setup_umip(c);
|
||||
|
||||
/* Enable FSGSBASE instructions if available. */
|
||||
if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
|
||||
cr4_set_bits(X86_CR4_FSGSBASE);
|
||||
elf_hwcap2 |= HWCAP2_FSGSBASE;
|
||||
}
|
||||
|
||||
/*
|
||||
* The vendor-specific functions might have changed features.
|
||||
* Now we do "generic changes."
|
||||
|
@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Processors which have self-snooping capability can handle conflicting
|
||||
* memory type across CPUs by snooping its own cache. However, there exists
|
||||
* CPU models in which having conflicting memory types still leads to
|
||||
* unpredictable behavior, machine check errors, or hangs. Clear this
|
||||
* feature to prevent its use on machines with known erratas.
|
||||
*/
|
||||
static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
|
||||
{
|
||||
switch (c->x86_model) {
|
||||
case INTEL_FAM6_CORE_YONAH:
|
||||
case INTEL_FAM6_CORE2_MEROM:
|
||||
case INTEL_FAM6_CORE2_MEROM_L:
|
||||
case INTEL_FAM6_CORE2_PENRYN:
|
||||
case INTEL_FAM6_CORE2_DUNNINGTON:
|
||||
case INTEL_FAM6_NEHALEM:
|
||||
case INTEL_FAM6_NEHALEM_G:
|
||||
case INTEL_FAM6_NEHALEM_EP:
|
||||
case INTEL_FAM6_NEHALEM_EX:
|
||||
case INTEL_FAM6_WESTMERE:
|
||||
case INTEL_FAM6_WESTMERE_EP:
|
||||
case INTEL_FAM6_SANDYBRIDGE:
|
||||
setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
|
||||
}
|
||||
}
|
||||
|
||||
static bool ring3mwait_disabled __read_mostly;
|
||||
|
||||
static int __init ring3mwait_disable(char *__unused)
|
||||
@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
|
||||
}
|
||||
|
||||
check_mpx_erratum(c);
|
||||
check_memory_type_self_snoop_errata(c);
|
||||
|
||||
/*
|
||||
* Get the number of SMT siblings early from the extended topology
|
||||
|
@ -743,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
|
||||
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
|
||||
cr0 = read_cr0() | X86_CR0_CD;
|
||||
write_cr0(cr0);
|
||||
wbinvd();
|
||||
|
||||
/*
|
||||
* Cache flushing is the most time-consuming step when programming
|
||||
* the MTRRs. Fortunately, as per the Intel Software Development
|
||||
* Manual, we can skip it if the processor supports cache self-
|
||||
* snooping.
|
||||
*/
|
||||
if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
|
||||
wbinvd();
|
||||
|
||||
/* Save value of CR4 and clear Page Global Enable (bit 7) */
|
||||
if (boot_cpu_has(X86_FEATURE_PGE)) {
|
||||
@ -760,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
|
||||
|
||||
/* Disable MTRRs, and set the default type to uncached */
|
||||
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
|
||||
wbinvd();
|
||||
|
||||
/* Again, only flush caches if we have to. */
|
||||
if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
|
||||
wbinvd();
|
||||
}
|
||||
|
||||
static void post_set(void) __releases(set_atomicity_lock)
|
||||
|
@ -161,40 +161,6 @@ enum which_selector {
|
||||
GS
|
||||
};
|
||||
|
||||
/*
|
||||
* Out of line to be protected from kprobes. It is not used on Xen
|
||||
* paravirt. When paravirt support is needed, it needs to be renamed
|
||||
* with native_ prefix.
|
||||
*/
|
||||
static noinline unsigned long __rdgsbase_inactive(void)
|
||||
{
|
||||
unsigned long gsbase;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
native_swapgs();
|
||||
gsbase = rdgsbase();
|
||||
native_swapgs();
|
||||
|
||||
return gsbase;
|
||||
}
|
||||
NOKPROBE_SYMBOL(__rdgsbase_inactive);
|
||||
|
||||
/*
|
||||
* Out of line to be protected from kprobes. It is not used on Xen
|
||||
* paravirt. When paravirt support is needed, it needs to be renamed
|
||||
* with native_ prefix.
|
||||
*/
|
||||
static noinline void __wrgsbase_inactive(unsigned long gsbase)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
native_swapgs();
|
||||
wrgsbase(gsbase);
|
||||
native_swapgs();
|
||||
}
|
||||
NOKPROBE_SYMBOL(__wrgsbase_inactive);
|
||||
|
||||
/*
|
||||
* Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
|
||||
* not available. The goal is to be reasonably fast on non-FSGSBASE systems.
|
||||
@ -244,22 +210,8 @@ static __always_inline void save_fsgs(struct task_struct *task)
|
||||
{
|
||||
savesegment(fs, task->thread.fsindex);
|
||||
savesegment(gs, task->thread.gsindex);
|
||||
if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* If FSGSBASE is enabled, we can't make any useful guesses
|
||||
* about the base, and user code expects us to save the current
|
||||
* value. Fortunately, reading the base directly is efficient.
|
||||
*/
|
||||
task->thread.fsbase = rdfsbase();
|
||||
local_irq_save(flags);
|
||||
task->thread.gsbase = __rdgsbase_inactive();
|
||||
local_irq_restore(flags);
|
||||
} else {
|
||||
save_base_legacy(task, task->thread.fsindex, FS);
|
||||
save_base_legacy(task, task->thread.gsindex, GS);
|
||||
}
|
||||
save_base_legacy(task, task->thread.fsindex, FS);
|
||||
save_base_legacy(task, task->thread.gsindex, GS);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_KVM)
|
||||
@ -338,22 +290,10 @@ static __always_inline void load_seg_legacy(unsigned short prev_index,
|
||||
static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
|
||||
struct thread_struct *next)
|
||||
{
|
||||
if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
|
||||
/* Update the FS and GS selectors if they could have changed. */
|
||||
if (unlikely(prev->fsindex || next->fsindex))
|
||||
loadseg(FS, next->fsindex);
|
||||
if (unlikely(prev->gsindex || next->gsindex))
|
||||
loadseg(GS, next->gsindex);
|
||||
|
||||
/* Update the bases. */
|
||||
wrfsbase(next->fsbase);
|
||||
__wrgsbase_inactive(next->gsbase);
|
||||
} else {
|
||||
load_seg_legacy(prev->fsindex, prev->fsbase,
|
||||
next->fsindex, next->fsbase, FS);
|
||||
load_seg_legacy(prev->gsindex, prev->gsbase,
|
||||
next->gsindex, next->gsbase, GS);
|
||||
}
|
||||
load_seg_legacy(prev->fsindex, prev->fsbase,
|
||||
next->fsindex, next->fsbase, FS);
|
||||
load_seg_legacy(prev->gsindex, prev->gsbase,
|
||||
next->gsindex, next->gsbase, GS);
|
||||
}
|
||||
|
||||
static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
|
||||
@ -399,46 +339,13 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
|
||||
return base;
|
||||
}
|
||||
|
||||
unsigned long x86_gsbase_read_cpu_inactive(void)
|
||||
{
|
||||
unsigned long gsbase;
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
|
||||
unsigned long flags;
|
||||
|
||||
/* Interrupts are disabled here. */
|
||||
local_irq_save(flags);
|
||||
gsbase = __rdgsbase_inactive();
|
||||
local_irq_restore(flags);
|
||||
} else {
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
|
||||
}
|
||||
|
||||
return gsbase;
|
||||
}
|
||||
|
||||
void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
|
||||
{
|
||||
if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
|
||||
unsigned long flags;
|
||||
|
||||
/* Interrupts are disabled here. */
|
||||
local_irq_save(flags);
|
||||
__wrgsbase_inactive(gsbase);
|
||||
local_irq_restore(flags);
|
||||
} else {
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long x86_fsbase_read_task(struct task_struct *task)
|
||||
{
|
||||
unsigned long fsbase;
|
||||
|
||||
if (task == current)
|
||||
fsbase = x86_fsbase_read_cpu();
|
||||
else if (static_cpu_has(X86_FEATURE_FSGSBASE) ||
|
||||
(task->thread.fsindex == 0))
|
||||
else if (task->thread.fsindex == 0)
|
||||
fsbase = task->thread.fsbase;
|
||||
else
|
||||
fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex);
|
||||
@ -452,8 +359,7 @@ unsigned long x86_gsbase_read_task(struct task_struct *task)
|
||||
|
||||
if (task == current)
|
||||
gsbase = x86_gsbase_read_cpu_inactive();
|
||||
else if (static_cpu_has(X86_FEATURE_FSGSBASE) ||
|
||||
(task->thread.gsindex == 0))
|
||||
else if (task->thread.gsindex == 0)
|
||||
gsbase = task->thread.gsbase;
|
||||
else
|
||||
gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex);
|
||||
@ -493,11 +399,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||
p->thread.sp = (unsigned long) fork_frame;
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
|
||||
save_fsgs(me);
|
||||
p->thread.fsindex = me->thread.fsindex;
|
||||
p->thread.fsbase = me->thread.fsbase;
|
||||
p->thread.gsindex = me->thread.gsindex;
|
||||
p->thread.gsbase = me->thread.gsbase;
|
||||
savesegment(gs, p->thread.gsindex);
|
||||
p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
|
||||
savesegment(fs, p->thread.fsindex);
|
||||
p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
|
||||
savesegment(es, p->thread.es);
|
||||
savesegment(ds, p->thread.ds);
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
@ -12,8 +12,9 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
|
||||
|
||||
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
|
||||
check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
|
||||
protection_keys test_vdso test_vsyscall mov_ss_trap
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
|
||||
protection_keys test_vdso test_vsyscall mov_ss_trap \
|
||||
syscall_arg_fault
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
|
||||
test_FCMOV test_FCOMI test_FISTTP \
|
||||
vdso_restorer
|
||||
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
|
||||
|
@ -35,6 +35,8 @@
|
||||
static volatile sig_atomic_t want_segv;
|
||||
static volatile unsigned long segv_addr;
|
||||
|
||||
static unsigned short *shared_scratch;
|
||||
|
||||
static int nerrs;
|
||||
|
||||
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
||||
@ -242,16 +244,11 @@ static void do_remote_base()
|
||||
|
||||
static __thread int set_thread_area_entry_number = -1;
|
||||
|
||||
static void do_unexpected_base(void)
|
||||
static unsigned short load_gs(void)
|
||||
{
|
||||
/*
|
||||
* The goal here is to try to arrange for GS == 0, GSBASE !=
|
||||
* 0, and for the the kernel the think that GSBASE == 0.
|
||||
*
|
||||
* To make the test as reliable as possible, this uses
|
||||
* explicit descriptors. (This is not the only way. This
|
||||
* could use ARCH_SET_GS with a low, nonzero base, but the
|
||||
* relevant side effect of ARCH_SET_GS could change.)
|
||||
* Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think
|
||||
* that GSBASE == 0 (i.e. thread.gsbase == 0).
|
||||
*/
|
||||
|
||||
/* Step 1: tell the kernel that we have GSBASE == 0. */
|
||||
@ -271,8 +268,9 @@ static void do_unexpected_base(void)
|
||||
.useable = 0
|
||||
};
|
||||
if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
|
||||
printf("\tother thread: using LDT slot 0\n");
|
||||
printf("\tusing LDT slot 0\n");
|
||||
asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
|
||||
return 0x7;
|
||||
} else {
|
||||
/* No modify_ldt for us (configured out, perhaps) */
|
||||
|
||||
@ -294,20 +292,15 @@ static void do_unexpected_base(void)
|
||||
|
||||
if (ret != 0) {
|
||||
printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
printf("\tother thread: using GDT slot %d\n", desc.entry_number);
|
||||
printf("\tusing GDT slot %d\n", desc.entry_number);
|
||||
set_thread_area_entry_number = desc.entry_number;
|
||||
|
||||
asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
|
||||
unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3);
|
||||
asm volatile ("mov %0, %%gs" : : "rm" (gs));
|
||||
return gs;
|
||||
}
|
||||
|
||||
/*
|
||||
* Step 3: set the selector back to zero. On AMD chips, this will
|
||||
* preserve GSBASE.
|
||||
*/
|
||||
|
||||
asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
|
||||
}
|
||||
|
||||
void test_wrbase(unsigned short index, unsigned long base)
|
||||
@ -346,12 +339,19 @@ static void *threadproc(void *ctx)
|
||||
if (ftx == 3)
|
||||
return NULL;
|
||||
|
||||
if (ftx == 1)
|
||||
if (ftx == 1) {
|
||||
do_remote_base();
|
||||
else if (ftx == 2)
|
||||
do_unexpected_base();
|
||||
else
|
||||
} else if (ftx == 2) {
|
||||
/*
|
||||
* On AMD chips, this causes GSBASE != 0, GS == 0, and
|
||||
* thread.gsbase == 0.
|
||||
*/
|
||||
|
||||
load_gs();
|
||||
asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
|
||||
} else {
|
||||
errx(1, "helper thread got bad command");
|
||||
}
|
||||
|
||||
ftx = 0;
|
||||
syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
|
||||
@ -453,12 +453,7 @@ static void test_ptrace_write_gsbase(void)
|
||||
if (child == 0) {
|
||||
printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n");
|
||||
|
||||
/*
|
||||
* Use the LDT setup and fetch the GSBASE from the LDT
|
||||
* by switching to the (nonzero) selector (again)
|
||||
*/
|
||||
do_unexpected_base();
|
||||
asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
|
||||
*shared_scratch = load_gs();
|
||||
|
||||
if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
|
||||
err(1, "PTRACE_TRACEME");
|
||||
@ -476,7 +471,7 @@ static void test_ptrace_write_gsbase(void)
|
||||
|
||||
gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
|
||||
|
||||
if (gs != 0x7) {
|
||||
if (gs != *shared_scratch) {
|
||||
nerrs++;
|
||||
printf("[FAIL]\tGS is not prepared with nonzero\n");
|
||||
goto END;
|
||||
@ -494,16 +489,24 @@ static void test_ptrace_write_gsbase(void)
|
||||
* selector value is changed or not by the GSBASE write in
|
||||
* a ptracer.
|
||||
*/
|
||||
if (gs != 0x7) {
|
||||
if (gs != *shared_scratch) {
|
||||
nerrs++;
|
||||
printf("[FAIL]\tGS changed to %lx\n", gs);
|
||||
|
||||
/*
|
||||
* On older kernels, poking a nonzero value into the
|
||||
* base would zero the selector. On newer kernels,
|
||||
* this behavior has changed -- poking the base
|
||||
* changes only the base and, if FSGSBASE is not
|
||||
* available, this may have no effect.
|
||||
*/
|
||||
if (gs == 0)
|
||||
printf("\tNote: this is expected behavior on older kernels.\n");
|
||||
} else if (have_fsgsbase && (base != 0xFF)) {
|
||||
nerrs++;
|
||||
printf("[FAIL]\tGSBASE changed to %lx\n", base);
|
||||
} else {
|
||||
printf("[OK]\tGS remained 0x7 %s");
|
||||
if (have_fsgsbase)
|
||||
printf("and GSBASE changed to 0xFF");
|
||||
printf("[OK]\tGS remained 0x%hx%s", *shared_scratch, have_fsgsbase ? " and GSBASE changed to 0xFF" : "");
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
@ -516,6 +519,9 @@ int main()
|
||||
{
|
||||
pthread_t thread;
|
||||
|
||||
shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
|
||||
MAP_ANONYMOUS | MAP_SHARED, -1, 0);
|
||||
|
||||
/* Probe FSGSBASE */
|
||||
sethandler(SIGILL, sigill, 0);
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
|
@ -15,9 +15,30 @@
|
||||
#include <setjmp.h>
|
||||
#include <errno.h>
|
||||
|
||||
#ifdef __x86_64__
|
||||
# define WIDTH "q"
|
||||
#else
|
||||
# define WIDTH "l"
|
||||
#endif
|
||||
|
||||
/* Our sigaltstack scratch space. */
|
||||
static unsigned char altstack_data[SIGSTKSZ];
|
||||
|
||||
static unsigned long get_eflags(void)
|
||||
{
|
||||
unsigned long eflags;
|
||||
asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
|
||||
return eflags;
|
||||
}
|
||||
|
||||
static void set_eflags(unsigned long eflags)
|
||||
{
|
||||
asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
|
||||
: : "rm" (eflags) : "flags");
|
||||
}
|
||||
|
||||
#define X86_EFLAGS_TF (1UL << 8)
|
||||
|
||||
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
||||
int flags)
|
||||
{
|
||||
@ -35,13 +56,22 @@ static sigjmp_buf jmpbuf;
|
||||
|
||||
static volatile sig_atomic_t n_errs;
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define REG_AX REG_RAX
|
||||
#define REG_IP REG_RIP
|
||||
#else
|
||||
#define REG_AX REG_EAX
|
||||
#define REG_IP REG_EIP
|
||||
#endif
|
||||
|
||||
static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
|
||||
{
|
||||
ucontext_t *ctx = (ucontext_t*)ctx_void;
|
||||
long ax = (long)ctx->uc_mcontext.gregs[REG_AX];
|
||||
|
||||
if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
|
||||
printf("[FAIL]\tAX had the wrong value: 0x%x\n",
|
||||
ctx->uc_mcontext.gregs[REG_EAX]);
|
||||
if (ax != -EFAULT && ax != -ENOSYS) {
|
||||
printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
|
||||
(unsigned long)ax);
|
||||
n_errs++;
|
||||
} else {
|
||||
printf("[OK]\tSeems okay\n");
|
||||
@ -50,9 +80,42 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
|
||||
siglongjmp(jmpbuf, 1);
|
||||
}
|
||||
|
||||
static volatile sig_atomic_t sigtrap_consecutive_syscalls;
|
||||
|
||||
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
|
||||
{
|
||||
/*
|
||||
* KVM has some bugs that can cause us to stop making progress.
|
||||
* detect them and complain, but don't infinite loop or fail the
|
||||
* test.
|
||||
*/
|
||||
|
||||
ucontext_t *ctx = (ucontext_t*)ctx_void;
|
||||
unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
|
||||
|
||||
if (*ip == 0x340f || *ip == 0x050f) {
|
||||
/* The trap was on SYSCALL or SYSENTER */
|
||||
sigtrap_consecutive_syscalls++;
|
||||
if (sigtrap_consecutive_syscalls > 3) {
|
||||
printf("[WARN]\tGot stuck single-stepping -- you probably have a KVM bug\n");
|
||||
siglongjmp(jmpbuf, 1);
|
||||
}
|
||||
} else {
|
||||
sigtrap_consecutive_syscalls = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void sigill(int sig, siginfo_t *info, void *ctx_void)
|
||||
{
|
||||
printf("[SKIP]\tIllegal instruction\n");
|
||||
ucontext_t *ctx = (ucontext_t*)ctx_void;
|
||||
unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
|
||||
|
||||
if (*ip == 0x0b0f) {
|
||||
/* one of the ud2 instructions faulted */
|
||||
printf("[OK]\tSYSCALL returned normally\n");
|
||||
} else {
|
||||
printf("[SKIP]\tIllegal instruction\n");
|
||||
}
|
||||
siglongjmp(jmpbuf, 1);
|
||||
}
|
||||
|
||||
@ -120,9 +183,48 @@ int main()
|
||||
"movl $-1, %%ebp\n\t"
|
||||
"movl $-1, %%esp\n\t"
|
||||
"syscall\n\t"
|
||||
"pushl $0" /* make sure we segfault cleanly */
|
||||
"ud2" /* make sure we recover cleanly */
|
||||
: : : "memory", "flags");
|
||||
}
|
||||
|
||||
printf("[RUN]\tSYSENTER with TF and invalid state\n");
|
||||
sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
|
||||
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
sigtrap_consecutive_syscalls = 0;
|
||||
set_eflags(get_eflags() | X86_EFLAGS_TF);
|
||||
asm volatile (
|
||||
"movl $-1, %%eax\n\t"
|
||||
"movl $-1, %%ebx\n\t"
|
||||
"movl $-1, %%ecx\n\t"
|
||||
"movl $-1, %%edx\n\t"
|
||||
"movl $-1, %%esi\n\t"
|
||||
"movl $-1, %%edi\n\t"
|
||||
"movl $-1, %%ebp\n\t"
|
||||
"movl $-1, %%esp\n\t"
|
||||
"sysenter"
|
||||
: : : "memory", "flags");
|
||||
}
|
||||
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
|
||||
|
||||
printf("[RUN]\tSYSCALL with TF and invalid state\n");
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
sigtrap_consecutive_syscalls = 0;
|
||||
set_eflags(get_eflags() | X86_EFLAGS_TF);
|
||||
asm volatile (
|
||||
"movl $-1, %%eax\n\t"
|
||||
"movl $-1, %%ebx\n\t"
|
||||
"movl $-1, %%ecx\n\t"
|
||||
"movl $-1, %%edx\n\t"
|
||||
"movl $-1, %%esi\n\t"
|
||||
"movl $-1, %%edi\n\t"
|
||||
"movl $-1, %%ebp\n\t"
|
||||
"movl $-1, %%esp\n\t"
|
||||
"syscall\n\t"
|
||||
"ud2" /* make sure we recover cleanly */
|
||||
: : : "memory", "flags");
|
||||
}
|
||||
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user