mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-11 07:30:16 +00:00
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm updates from Ingo Molnar: "The main changes in this cycle were: - vDSO and asm entry improvements (Andy Lutomirski) - Xen paravirt entry enhancements (Boris Ostrovsky) - asm entry labels enhancement (Borislav Petkov) - and other misc changes (Thomas Gleixner, me)" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/vsdo: Fix build on PARAVIRT_CLOCK=y, KVM_GUEST=n Revert "x86/kvm: On KVM re-enable (e.g. after suspend), update clocks" x86/entry/64_compat: Make labels local x86/platform/uv: Include clocksource.h for clocksource_touch_watchdog() x86/vdso: Enable vdso pvclock access on all vdso variants x86/vdso: Remove pvclock fixmap machinery x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader x86/kvm: On KVM re-enable (e.g. after suspend), update clocks x86/entry/64: Bypass enter_from_user_mode on non-context-tracking boots x86/asm: Add asm macros for static keys/jump labels x86/asm: Error out if asm/jump_label.h is included inappropriately context_tracking: Switch to new static_branch API x86/entry, x86/paravirt: Remove the unused usergs_sysret32 PV op x86/paravirt: Remove the unused irq_enable_sysexit pv op x86/xen: Avoid fast syscall path for Xen PV guests
This commit is contained in:
commit
88cbfd0711
@ -1,3 +1,5 @@
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
/*
|
||||
|
||||
x86 function call convention, 64-bit:
|
||||
@ -232,3 +234,16 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* This does 'call enter_from_user_mode' unless we can avoid it based on
|
||||
* kernel config or using the static jump infrastructure.
|
||||
*/
|
||||
.macro CALL_enter_from_user_mode
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
#ifdef HAVE_JUMP_LABEL
|
||||
STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
|
||||
#endif
|
||||
call enter_from_user_mode
|
||||
.Lafter_call_\@:
|
||||
#endif
|
||||
.endm
|
||||
|
@ -329,7 +329,8 @@ sysenter_past_esp:
|
||||
* Return back to the vDSO, which will pop ecx and edx.
|
||||
* Don't bother with DS and ES (they already contain __USER_DS).
|
||||
*/
|
||||
ENABLE_INTERRUPTS_SYSEXIT
|
||||
sti
|
||||
sysexit
|
||||
|
||||
.pushsection .fixup, "ax"
|
||||
2: movl $0, PT_FS(%esp)
|
||||
@ -552,11 +553,6 @@ ENTRY(native_iret)
|
||||
iret
|
||||
_ASM_EXTABLE(native_iret, iret_exc)
|
||||
END(native_iret)
|
||||
|
||||
ENTRY(native_irq_enable_sysexit)
|
||||
sti
|
||||
sysexit
|
||||
END(native_irq_enable_sysexit)
|
||||
#endif
|
||||
|
||||
ENTRY(overflow)
|
||||
|
@ -520,9 +520,7 @@ END(irq_entries_start)
|
||||
*/
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
call enter_from_user_mode
|
||||
#endif
|
||||
CALL_enter_from_user_mode
|
||||
|
||||
1:
|
||||
/*
|
||||
@ -1066,9 +1064,7 @@ ENTRY(error_entry)
|
||||
* (which can take locks).
|
||||
*/
|
||||
TRACE_IRQS_OFF
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
call enter_from_user_mode
|
||||
#endif
|
||||
CALL_enter_from_user_mode
|
||||
ret
|
||||
|
||||
.Lerror_entry_done:
|
||||
|
@ -18,13 +18,6 @@
|
||||
|
||||
.section .entry.text, "ax"
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
ENTRY(native_usergs_sysret32)
|
||||
swapgs
|
||||
sysretl
|
||||
ENDPROC(native_usergs_sysret32)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 32-bit SYSENTER instruction entry.
|
||||
*
|
||||
@ -103,15 +96,15 @@ ENTRY(entry_SYSENTER_compat)
|
||||
* This needs to happen before enabling interrupts so that
|
||||
* we don't get preempted with NT set.
|
||||
*
|
||||
* NB.: sysenter_fix_flags is a label with the code under it moved
|
||||
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
|
||||
* out-of-line as an optimization: NT is unlikely to be set in the
|
||||
* majority of the cases and instead of polluting the I$ unnecessarily,
|
||||
* we're keeping that code behind a branch which will predict as
|
||||
* not-taken and therefore its instructions won't be fetched.
|
||||
*/
|
||||
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
|
||||
jnz sysenter_fix_flags
|
||||
sysenter_flags_fixed:
|
||||
jnz .Lsysenter_fix_flags
|
||||
.Lsysenter_flags_fixed:
|
||||
|
||||
/*
|
||||
* User mode is traced as though IRQs are on, and SYSENTER
|
||||
@ -126,10 +119,10 @@ sysenter_flags_fixed:
|
||||
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
|
||||
jmp sysret32_from_system_call
|
||||
|
||||
sysenter_fix_flags:
|
||||
.Lsysenter_fix_flags:
|
||||
pushq $X86_EFLAGS_FIXED
|
||||
popfq
|
||||
jmp sysenter_flags_fixed
|
||||
jmp .Lsysenter_flags_fixed
|
||||
ENDPROC(entry_SYSENTER_compat)
|
||||
|
||||
/*
|
||||
@ -238,7 +231,8 @@ sysret32_from_system_call:
|
||||
xorq %r9, %r9
|
||||
xorq %r10, %r10
|
||||
movq RSP-ORIG_RAX(%rsp), %rsp
|
||||
USERGS_SYSRET32
|
||||
swapgs
|
||||
sysretl
|
||||
END(entry_SYSCALL_compat)
|
||||
|
||||
/*
|
||||
|
@ -17,8 +17,10 @@
|
||||
#include <asm/vvar.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/pvclock.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#define gtod (&VVAR(vsyscall_gtod_data))
|
||||
|
||||
@ -36,12 +38,12 @@ static notrace cycle_t vread_hpet(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef BUILD_VDSO32
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
extern u8 pvclock_page
|
||||
__attribute__((visibility("hidden")));
|
||||
#endif
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/pvclock.h>
|
||||
#ifndef BUILD_VDSO32
|
||||
|
||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
{
|
||||
@ -60,75 +62,6 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
|
||||
static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
|
||||
{
|
||||
const struct pvclock_vsyscall_time_info *pvti_base;
|
||||
int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
|
||||
int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
|
||||
|
||||
BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
|
||||
|
||||
pvti_base = (struct pvclock_vsyscall_time_info *)
|
||||
__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
|
||||
|
||||
return &pvti_base[offset];
|
||||
}
|
||||
|
||||
static notrace cycle_t vread_pvclock(int *mode)
|
||||
{
|
||||
const struct pvclock_vsyscall_time_info *pvti;
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
u32 version;
|
||||
u8 flags;
|
||||
unsigned cpu, cpu1;
|
||||
|
||||
|
||||
/*
|
||||
* Note: hypervisor must guarantee that:
|
||||
* 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
|
||||
* 2. that per-CPU pvclock time info is updated if the
|
||||
* underlying CPU changes.
|
||||
* 3. that version is increased whenever underlying CPU
|
||||
* changes.
|
||||
*
|
||||
*/
|
||||
do {
|
||||
cpu = __getcpu() & VGETCPU_CPU_MASK;
|
||||
/* TODO: We can put vcpu id into higher bits of pvti.version.
|
||||
* This will save a couple of cycles by getting rid of
|
||||
* __getcpu() calls (Gleb).
|
||||
*/
|
||||
|
||||
pvti = get_pvti(cpu);
|
||||
|
||||
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
|
||||
|
||||
/*
|
||||
* Test we're still on the cpu as well as the version.
|
||||
* We could have been migrated just after the first
|
||||
* vgetcpu but before fetching the version, so we
|
||||
* wouldn't notice a version change.
|
||||
*/
|
||||
cpu1 = __getcpu() & VGETCPU_CPU_MASK;
|
||||
} while (unlikely(cpu != cpu1 ||
|
||||
(pvti->pvti.version & 1) ||
|
||||
pvti->pvti.version != version));
|
||||
|
||||
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
|
||||
*mode = VCLOCK_NONE;
|
||||
|
||||
/* refer to tsc.c read_tsc() comment for rationale */
|
||||
last = gtod->cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
||||
return last;
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
@ -162,15 +95,77 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
|
||||
{
|
||||
return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
|
||||
}
|
||||
|
||||
static notrace cycle_t vread_pvclock(int *mode)
|
||||
{
|
||||
*mode = VCLOCK_NONE;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
|
||||
cycle_t ret;
|
||||
u64 tsc, pvti_tsc;
|
||||
u64 last, delta, pvti_system_time;
|
||||
u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
|
||||
|
||||
/*
|
||||
* Note: The kernel and hypervisor must guarantee that cpu ID
|
||||
* number maps 1:1 to per-CPU pvclock time info.
|
||||
*
|
||||
* Because the hypervisor is entirely unaware of guest userspace
|
||||
* preemption, it cannot guarantee that per-CPU pvclock time
|
||||
* info is updated if the underlying CPU changes or that that
|
||||
* version is increased whenever underlying CPU changes.
|
||||
*
|
||||
* On KVM, we are guaranteed that pvti updates for any vCPU are
|
||||
* atomic as seen by *all* vCPUs. This is an even stronger
|
||||
* guarantee than we get with a normal seqlock.
|
||||
*
|
||||
* On Xen, we don't appear to have that guarantee, but Xen still
|
||||
* supplies a valid seqlock using the version field.
|
||||
|
||||
* We only do pvclock vdso timing at all if
|
||||
* PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
|
||||
* mean that all vCPUs have matching pvti and that the TSC is
|
||||
* synced, so we can just look at vCPU 0's pvti.
|
||||
*/
|
||||
|
||||
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
|
||||
*mode = VCLOCK_NONE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
do {
|
||||
version = pvti->version;
|
||||
|
||||
smp_rmb();
|
||||
|
||||
tsc = rdtsc_ordered();
|
||||
pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
|
||||
pvti_tsc_shift = pvti->tsc_shift;
|
||||
pvti_system_time = pvti->system_time;
|
||||
pvti_tsc = pvti->tsc_timestamp;
|
||||
|
||||
/* Make sure that the version double-check is last. */
|
||||
smp_rmb();
|
||||
} while (unlikely((version & 1) || version != pvti->version));
|
||||
|
||||
delta = tsc - pvti_tsc;
|
||||
ret = pvti_system_time +
|
||||
pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
|
||||
pvti_tsc_shift);
|
||||
|
||||
/* refer to vread_tsc() comment for rationale */
|
||||
last = gtod->cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
||||
return last;
|
||||
}
|
||||
#endif
|
||||
|
||||
notrace static cycle_t vread_tsc(void)
|
||||
|
@ -25,7 +25,7 @@ SECTIONS
|
||||
* segment.
|
||||
*/
|
||||
|
||||
vvar_start = . - 2 * PAGE_SIZE;
|
||||
vvar_start = . - 3 * PAGE_SIZE;
|
||||
vvar_page = vvar_start;
|
||||
|
||||
/* Place all vvars at the offsets in asm/vvar.h. */
|
||||
@ -36,6 +36,7 @@ SECTIONS
|
||||
#undef EMIT_VVAR
|
||||
|
||||
hpet_page = vvar_start + PAGE_SIZE;
|
||||
pvclock_page = vvar_start + 2 * PAGE_SIZE;
|
||||
|
||||
. = SIZEOF_HEADERS;
|
||||
|
||||
|
@ -73,6 +73,7 @@ enum {
|
||||
sym_vvar_start,
|
||||
sym_vvar_page,
|
||||
sym_hpet_page,
|
||||
sym_pvclock_page,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_START,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_END,
|
||||
};
|
||||
@ -80,6 +81,7 @@ enum {
|
||||
const int special_pages[] = {
|
||||
sym_vvar_page,
|
||||
sym_hpet_page,
|
||||
sym_pvclock_page,
|
||||
};
|
||||
|
||||
struct vdso_sym {
|
||||
@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = {
|
||||
[sym_vvar_start] = {"vvar_start", true},
|
||||
[sym_vvar_page] = {"vvar_page", true},
|
||||
[sym_hpet_page] = {"hpet_page", true},
|
||||
[sym_pvclock_page] = {"pvclock_page", true},
|
||||
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
|
||||
"VDSO_FAKE_SECTION_TABLE_START", false
|
||||
},
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/random.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/pvclock.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/vdso.h>
|
||||
@ -100,6 +101,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
||||
.name = "[vvar]",
|
||||
.pages = no_pages,
|
||||
};
|
||||
struct pvclock_vsyscall_time_info *pvti;
|
||||
|
||||
if (calculate_addr) {
|
||||
addr = vdso_addr(current->mm->start_stack,
|
||||
@ -169,6 +171,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
||||
}
|
||||
#endif
|
||||
|
||||
pvti = pvclock_pvti_cpu0_va();
|
||||
if (pvti && image->sym_pvclock_page) {
|
||||
ret = remap_pfn_range(vma,
|
||||
text_start + image->sym_pvclock_page,
|
||||
__pa(pvti) >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
PAGE_READONLY);
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
up_fail:
|
||||
if (ret)
|
||||
current->mm->context.vdso = NULL;
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pvclock.h>
|
||||
#ifdef CONFIG_X86_32
|
||||
#include <linux/threads.h>
|
||||
#include <asm/kmap_types.h>
|
||||
@ -72,10 +71,6 @@ enum fixed_addresses {
|
||||
#ifdef CONFIG_X86_VSYSCALL_EMULATION
|
||||
VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
|
||||
#endif
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
PVCLOCK_FIXMAP_BEGIN,
|
||||
PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
|
||||
#endif
|
||||
#endif
|
||||
FIX_DBGP_BASE,
|
||||
FIX_EARLYCON_MEM_BASE,
|
||||
|
@ -1,12 +1,18 @@
|
||||
#ifndef _ASM_X86_JUMP_LABEL_H
|
||||
#define _ASM_X86_JUMP_LABEL_H
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/stringify.h>
|
||||
#include <linux/types.h>
|
||||
#include <asm/nops.h>
|
||||
#include <asm/asm.h>
|
||||
#ifndef HAVE_JUMP_LABEL
|
||||
/*
|
||||
* For better or for worse, if jump labels (the gcc extension) are missing,
|
||||
* then the entire static branch patching infrastructure is compiled out.
|
||||
* If that happens, the code in here will malfunction. Raise a compiler
|
||||
* error instead.
|
||||
*
|
||||
* In theory, jump labels and the static branch patching infrastructure
|
||||
* could be decoupled to fix this.
|
||||
*/
|
||||
#error asm/jump_label.h included on a non-jump-label kernel
|
||||
#endif
|
||||
|
||||
#define JUMP_LABEL_NOP_SIZE 5
|
||||
|
||||
@ -16,6 +22,14 @@
|
||||
# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
|
||||
#endif
|
||||
|
||||
#include <asm/asm.h>
|
||||
#include <asm/nops.h>
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/stringify.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
|
||||
{
|
||||
asm_volatile_goto("1:"
|
||||
@ -59,5 +73,40 @@ struct jump_entry {
|
||||
jump_label_t key;
|
||||
};
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#else /* __ASSEMBLY__ */
|
||||
|
||||
.macro STATIC_JUMP_IF_TRUE target, key, def
|
||||
.Lstatic_jump_\@:
|
||||
.if \def
|
||||
/* Equivalent to "jmp.d32 \target" */
|
||||
.byte 0xe9
|
||||
.long \target - .Lstatic_jump_after_\@
|
||||
.Lstatic_jump_after_\@:
|
||||
.else
|
||||
.byte STATIC_KEY_INIT_NOP
|
||||
.endif
|
||||
.pushsection __jump_table, "aw"
|
||||
_ASM_ALIGN
|
||||
_ASM_PTR .Lstatic_jump_\@, \target, \key
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
.macro STATIC_JUMP_IF_FALSE target, key, def
|
||||
.Lstatic_jump_\@:
|
||||
.if \def
|
||||
.byte STATIC_KEY_INIT_NOP
|
||||
.else
|
||||
/* Equivalent to "jmp.d32 \target" */
|
||||
.byte 0xe9
|
||||
.long \target - .Lstatic_jump_after_\@
|
||||
.Lstatic_jump_after_\@:
|
||||
.endif
|
||||
.pushsection __jump_table, "aw"
|
||||
_ASM_ALIGN
|
||||
_ASM_PTR .Lstatic_jump_\@, \target, \key + 1
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif
|
||||
|
@ -928,23 +928,11 @@ extern void default_banner(void);
|
||||
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
|
||||
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
|
||||
|
||||
#define USERGS_SYSRET32 \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
|
||||
CLBR_NONE, \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define GET_CR0_INTO_EAX \
|
||||
push %ecx; push %edx; \
|
||||
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
|
||||
pop %edx; pop %ecx
|
||||
|
||||
#define ENABLE_INTERRUPTS_SYSEXIT \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
|
||||
CLBR_NONE, \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
|
||||
|
||||
|
||||
#else /* !CONFIG_X86_32 */
|
||||
|
||||
/*
|
||||
|
@ -162,15 +162,6 @@ struct pv_cpu_ops {
|
||||
|
||||
u64 (*read_pmc)(int counter);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Atomically enable interrupts and return to userspace. This
|
||||
* is only used in 32-bit kernels. 64-bit kernels use
|
||||
* usergs_sysret32 instead.
|
||||
*/
|
||||
void (*irq_enable_sysexit)(void);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Switch to usermode gs and return to 64-bit usermode using
|
||||
* sysret. Only used in 64-bit kernels to return to 64-bit
|
||||
@ -179,14 +170,6 @@ struct pv_cpu_ops {
|
||||
*/
|
||||
void (*usergs_sysret64)(void);
|
||||
|
||||
/*
|
||||
* Switch to usermode gs and return to 32-bit usermode using
|
||||
* sysret. Used to return to 32-on-64 compat processes.
|
||||
* Other usermode register state, including %esp, must already
|
||||
* be restored.
|
||||
*/
|
||||
void (*usergs_sysret32)(void);
|
||||
|
||||
/* Normal iret. Jump to this with the standard iret stack
|
||||
frame set up. */
|
||||
void (*iret)(void);
|
||||
|
@ -4,6 +4,15 @@
|
||||
#include <linux/clocksource.h>
|
||||
#include <asm/pvclock-abi.h>
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void);
|
||||
#else
|
||||
static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* some helper functions for xen and kvm pv clock sources */
|
||||
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
|
||||
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
|
||||
@ -91,10 +100,5 @@ struct pvclock_vsyscall_time_info {
|
||||
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
|
||||
|
||||
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
|
||||
#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1)
|
||||
|
||||
int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
|
||||
int size);
|
||||
struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu);
|
||||
|
||||
#endif /* _ASM_X86_PVCLOCK_H */
|
||||
|
@ -22,6 +22,7 @@ struct vdso_image {
|
||||
|
||||
long sym_vvar_page;
|
||||
long sym_hpet_page;
|
||||
long sym_pvclock_page;
|
||||
long sym_VDSO32_NOTE_MASK;
|
||||
long sym___kernel_sigreturn;
|
||||
long sym___kernel_rt_sigreturn;
|
||||
|
@ -65,9 +65,6 @@ void common(void) {
|
||||
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
|
||||
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
|
||||
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
|
||||
#ifdef CONFIG_X86_32
|
||||
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
|
||||
#endif
|
||||
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
|
||||
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
|
||||
#endif
|
||||
|
@ -23,7 +23,6 @@ int main(void)
|
||||
{
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
|
||||
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
|
||||
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
|
||||
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
|
||||
BLANK();
|
||||
|
@ -45,6 +45,11 @@ early_param("no-kvmclock", parse_no_kvmclock);
|
||||
static struct pvclock_vsyscall_time_info *hv_clock;
|
||||
static struct pvclock_wall_clock wall_clock;
|
||||
|
||||
struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
|
||||
{
|
||||
return hv_clock;
|
||||
}
|
||||
|
||||
/*
|
||||
* The wallclock is the time of day when we booted. Since then, some time may
|
||||
* have elapsed since the hypervisor wrote the data. So we try to account for
|
||||
@ -305,7 +310,6 @@ int __init kvm_setup_vsyscall_timeinfo(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
int cpu;
|
||||
int ret;
|
||||
u8 flags;
|
||||
struct pvclock_vcpu_time_info *vcpu_time;
|
||||
unsigned int size;
|
||||
@ -325,11 +329,6 @@ int __init kvm_setup_vsyscall_timeinfo(void)
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
|
||||
put_cpu();
|
||||
return ret;
|
||||
}
|
||||
|
||||
put_cpu();
|
||||
|
||||
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
|
||||
|
@ -162,10 +162,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
|
||||
ret = paravirt_patch_ident_64(insnbuf, len);
|
||||
|
||||
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
|
||||
#ifdef CONFIG_X86_32
|
||||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
|
||||
#endif
|
||||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
|
||||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
|
||||
/* If operation requires a jmp, then jmp */
|
||||
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
|
||||
@ -220,8 +216,6 @@ static u64 native_steal_clock(int cpu)
|
||||
|
||||
/* These are in entry.S */
|
||||
extern void native_iret(void);
|
||||
extern void native_irq_enable_sysexit(void);
|
||||
extern void native_usergs_sysret32(void);
|
||||
extern void native_usergs_sysret64(void);
|
||||
|
||||
static struct resource reserve_ioports = {
|
||||
@ -379,13 +373,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
|
||||
|
||||
.load_sp0 = native_load_sp0,
|
||||
|
||||
#if defined(CONFIG_X86_32)
|
||||
.irq_enable_sysexit = native_irq_enable_sysexit,
|
||||
#endif
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
.usergs_sysret32 = native_usergs_sysret32,
|
||||
#endif
|
||||
.usergs_sysret64 = native_usergs_sysret64,
|
||||
#endif
|
||||
.iret = native_iret,
|
||||
|
@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
|
||||
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
|
||||
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
|
||||
DEF_NATIVE(pv_cpu_ops, iret, "iret");
|
||||
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
|
||||
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
|
||||
@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
PATCH_SITE(pv_irq_ops, restore_fl);
|
||||
PATCH_SITE(pv_irq_ops, save_fl);
|
||||
PATCH_SITE(pv_cpu_ops, iret);
|
||||
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
|
||||
PATCH_SITE(pv_mmu_ops, read_cr2);
|
||||
PATCH_SITE(pv_mmu_ops, read_cr3);
|
||||
PATCH_SITE(pv_mmu_ops, write_cr3);
|
||||
|
@ -13,9 +13,7 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
|
||||
DEF_NATIVE(pv_cpu_ops, clts, "clts");
|
||||
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
|
||||
|
||||
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit");
|
||||
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
|
||||
DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
|
||||
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
|
||||
|
||||
DEF_NATIVE(, mov32, "mov %edi, %eax");
|
||||
@ -55,7 +53,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
PATCH_SITE(pv_irq_ops, save_fl);
|
||||
PATCH_SITE(pv_irq_ops, irq_enable);
|
||||
PATCH_SITE(pv_irq_ops, irq_disable);
|
||||
PATCH_SITE(pv_cpu_ops, usergs_sysret32);
|
||||
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
|
||||
PATCH_SITE(pv_cpu_ops, swapgs);
|
||||
PATCH_SITE(pv_mmu_ops, read_cr2);
|
||||
|
@ -140,27 +140,3 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
|
||||
|
||||
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Initialize the generic pvclock vsyscall state. This will allocate
|
||||
* a/some page(s) for the per-vcpu pvclock information, set up a
|
||||
* fixmap mapping for the page(s)
|
||||
*/
|
||||
|
||||
int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
|
||||
int size)
|
||||
{
|
||||
int idx;
|
||||
|
||||
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
|
||||
|
||||
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
|
||||
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
|
||||
__pa(i) + (idx*PAGE_SIZE),
|
||||
PAGE_KERNEL_VVAR);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/clocksource.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
#include <asm/current.h>
|
||||
|
@ -1229,10 +1229,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
||||
|
||||
.iret = xen_iret,
|
||||
#ifdef CONFIG_X86_64
|
||||
.usergs_sysret32 = xen_sysret32,
|
||||
.usergs_sysret64 = xen_sysret64,
|
||||
#else
|
||||
.irq_enable_sysexit = xen_sysexit,
|
||||
#endif
|
||||
|
||||
.load_tr_desc = paravirt_nop,
|
||||
|
@ -34,20 +34,6 @@ check_events:
|
||||
pop %eax
|
||||
ret
|
||||
|
||||
/*
|
||||
* We can't use sysexit directly, because we're not running in ring0.
|
||||
* But we can easily fake it up using iret. Assuming xen_sysexit is
|
||||
* jumped to with a standard stack frame, we can just strip it back to
|
||||
* a standard iret frame and use iret.
|
||||
*/
|
||||
ENTRY(xen_sysexit)
|
||||
movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
|
||||
orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
|
||||
lea PT_EIP(%esp), %esp
|
||||
|
||||
jmp xen_iret
|
||||
ENDPROC(xen_sysexit)
|
||||
|
||||
/*
|
||||
* This is run where a normal iret would be run, with the same stack setup:
|
||||
* 8: eflags
|
||||
|
@ -68,25 +68,6 @@ ENTRY(xen_sysret64)
|
||||
ENDPATCH(xen_sysret64)
|
||||
RELOC(xen_sysret64, 1b+1)
|
||||
|
||||
ENTRY(xen_sysret32)
|
||||
/*
|
||||
* We're already on the usermode stack at this point, but
|
||||
* still with the kernel gs, so we can easily switch back
|
||||
*/
|
||||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
pushq $__USER32_DS
|
||||
pushq PER_CPU_VAR(rsp_scratch)
|
||||
pushq %r11
|
||||
pushq $__USER32_CS
|
||||
pushq %rcx
|
||||
|
||||
pushq $0
|
||||
1: jmp hypercall_iret
|
||||
ENDPATCH(xen_sysret32)
|
||||
RELOC(xen_sysret32, 1b+1)
|
||||
|
||||
/*
|
||||
* Xen handles syscall callbacks much like ordinary exceptions, which
|
||||
* means we have:
|
||||
|
@ -139,9 +139,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
|
||||
|
||||
/* These are not functions, and cannot be called normally */
|
||||
__visible void xen_iret(void);
|
||||
#ifdef CONFIG_X86_32
|
||||
__visible void xen_sysexit(void);
|
||||
#endif
|
||||
__visible void xen_sysret32(void);
|
||||
__visible void xen_sysret64(void);
|
||||
__visible void xen_adjust_exception_frame(void);
|
||||
|
@ -22,12 +22,12 @@ struct context_tracking {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
extern struct static_key context_tracking_enabled;
|
||||
extern struct static_key_false context_tracking_enabled;
|
||||
DECLARE_PER_CPU(struct context_tracking, context_tracking);
|
||||
|
||||
static inline bool context_tracking_is_enabled(void)
|
||||
{
|
||||
return static_key_false(&context_tracking_enabled);
|
||||
return static_branch_unlikely(&context_tracking_enabled);
|
||||
}
|
||||
|
||||
static inline bool context_tracking_cpu_is_enabled(void)
|
||||
|
@ -24,7 +24,7 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/context_tracking.h>
|
||||
|
||||
struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
|
||||
DEFINE_STATIC_KEY_FALSE(context_tracking_enabled);
|
||||
EXPORT_SYMBOL_GPL(context_tracking_enabled);
|
||||
|
||||
DEFINE_PER_CPU(struct context_tracking, context_tracking);
|
||||
@ -191,7 +191,7 @@ void __init context_tracking_cpu_set(int cpu)
|
||||
|
||||
if (!per_cpu(context_tracking.active, cpu)) {
|
||||
per_cpu(context_tracking.active, cpu) = true;
|
||||
static_key_slow_inc(&context_tracking_enabled);
|
||||
static_branch_inc(&context_tracking_enabled);
|
||||
}
|
||||
|
||||
if (initialized)
|
||||
|
Loading…
x
Reference in New Issue
Block a user