diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 42a9379f7acb..2582984ffb4b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -136,6 +136,64 @@ END(native_usergs_sysret64) * with them due to bugs in both AMD and Intel CPUs. */ + .pushsection .entry_trampoline, "ax" + +/* + * The code in here gets remapped into cpu_entry_area's trampoline. This means + * that the assembler and linker have the wrong idea as to where this code + * lives (and, in fact, it's mapped more than once, so it's not even at a + * fixed address). So we can't reference any symbols outside the entry + * trampoline and expect it to work. + * + * Instead, we carefully abuse %rip-relative addressing. + * _entry_trampoline(%rip) refers to the start of the remapped) entry + * trampoline. We can thus find cpu_entry_area with this macro: + */ + +#define CPU_ENTRY_AREA \ + _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) + +/* The top word of the SYSENTER stack is hot and is usable as scratch space. */ +#define RSP_SCRATCH CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \ + SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA + +ENTRY(entry_SYSCALL_64_trampoline) + UNWIND_HINT_EMPTY + swapgs + + /* Stash the user RSP. */ + movq %rsp, RSP_SCRATCH + + /* Load the top of the task stack into RSP */ + movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp + + /* Start building the simulated IRET frame. */ + pushq $__USER_DS /* pt_regs->ss */ + pushq RSP_SCRATCH /* pt_regs->sp */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ + + /* + * x86 lacks a near absolute jump, and we can't jump to the real + * entry text with a relative jump. We could push the target + * address and then use retq, but this destroys the pipeline on + * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead, + * spill RDI and restore it in a second-stage trampoline. + */ + pushq %rdi + movq $entry_SYSCALL_64_stage2, %rdi + jmp *%rdi +END(entry_SYSCALL_64_trampoline) + + .popsection + +ENTRY(entry_SYSCALL_64_stage2) + UNWIND_HINT_EMPTY + popq %rdi + jmp entry_SYSCALL_64_after_hwframe +END(entry_SYSCALL_64_stage2) + ENTRY(entry_SYSCALL_64) UNWIND_HINT_EMPTY /* diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 84558b611ad3..6a699474c2c7 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -61,6 +61,8 @@ struct cpu_entry_area { * of the TSS region. */ struct tss_struct tss; + + char entry_trampoline[PAGE_SIZE]; }; #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 55858b277cf6..61b1af88ac07 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -101,4 +101,5 @@ void common(void) { /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); + OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 57968880e39b..430f950b0b7f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -486,6 +486,8 @@ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); static inline void setup_cpu_entry_area(int cpu) { #ifdef CONFIG_X86_64 + extern char _entry_trampoline[]; + /* On 64-bit systems, we use a read-only fixmap GDT. */ pgprot_t gdt_prot = PAGE_KERNEL_RO; #else @@ -532,6 +534,11 @@ static inline void setup_cpu_entry_area(int cpu) #ifdef CONFIG_X86_32 this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu)); #endif + +#ifdef CONFIG_X86_64 + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); +#endif } /* Load the original GDT from the per-cpu structure */ @@ -1395,10 +1402,16 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks /* May not be marked __init: used by software suspend */ void syscall_init(void) { + extern char _entry_trampoline[]; + extern char entry_SYSCALL_64_trampoline[]; + int cpu = smp_processor_id(); + unsigned long SYSCALL64_entry_trampoline = + (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + + (entry_SYSCALL_64_trampoline - _entry_trampoline); wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); - wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); #ifdef CONFIG_IA32_EMULATION wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a4009fb9be87..d2a8b5a24a44 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -107,6 +107,15 @@ SECTIONS SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) + +#ifdef CONFIG_X86_64 + . = ALIGN(PAGE_SIZE); + _entry_trampoline = .; + *(.entry_trampoline) + . = ALIGN(PAGE_SIZE); + ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); +#endif + /* End of text section */ _etext = .; } :text = 0x9090