mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-13 08:39:52 +00:00
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar: "There were lots of changes in this development cycle: - over 100 separate cleanups, restructuring changes, speedups and fixes in the x86 system call, irq, trap and other entry code, part of a heroic effort to deobfuscate a decade old spaghetti asm code and its C code dependencies (Denys Vlasenko, Andy Lutomirski) - alternatives code fixes and enhancements (Borislav Petkov) - simplifications and cleanups to the compat code (Brian Gerst) - signal handling fixes and new x86 testcases (Andy Lutomirski) - various other fixes and cleanups By their nature many of these changes are risky - we tried to test them well on many different x86 systems (there are no known regressions), and they are split up finely to help bisection - but there's still a fair bit of residual risk left so caveat emptor" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits) perf/x86/64: Report regs_user->ax too in get_regs_user() perf/x86/64: Simplify regs_user->abi setting code in get_regs_user() perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user() perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user() x86/asm/entry/32: Tidy up JNZ instructions after TESTs x86/asm/entry/64: Reduce padding in execve stubs x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork x86/asm/entry/64: Simplify jumps in ret_from_fork x86/asm/entry/64: Remove a redundant jump x86/asm/entry/64: Optimize [v]fork/clone stubs x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat() x86/asm/entry/64: Use common code for rt_sigreturn() epilogue x86/asm/entry/64: Add forgotten CFI annotation x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout x86/asm/entry/64: Move opportunistic sysret code to syscall code path x86, selftests: Add sigreturn selftest x86/alternatives: Guard NOPs optimization x86/asm/entry: Clear EXTRA_REGS for all executable formats x86/signal: Remove pax argument from restore_sigcontext ...
This commit is contained in:
commit
60f898eeaa
@ -406,6 +406,12 @@ Protocol: 2.00+
|
||||
- If 0, the protected-mode code is loaded at 0x10000.
|
||||
- If 1, the protected-mode code is loaded at 0x100000.
|
||||
|
||||
Bit 1 (kernel internal): ALSR_FLAG
|
||||
- Used internally by the compressed kernel to communicate
|
||||
KASLR status to kernel proper.
|
||||
If 1, KASLR enabled.
|
||||
If 0, KASLR disabled.
|
||||
|
||||
Bit 5 (write): QUIET_FLAG
|
||||
- If 0, print early messages.
|
||||
- If 1, suppress early messages.
|
||||
|
@ -295,7 +295,8 @@ static unsigned long find_random_addr(unsigned long minimum,
|
||||
return slots_fetch_random();
|
||||
}
|
||||
|
||||
unsigned char *choose_kernel_location(unsigned char *input,
|
||||
unsigned char *choose_kernel_location(struct boot_params *boot_params,
|
||||
unsigned char *input,
|
||||
unsigned long input_size,
|
||||
unsigned char *output,
|
||||
unsigned long output_size)
|
||||
@ -315,6 +316,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
|
||||
}
|
||||
#endif
|
||||
|
||||
boot_params->hdr.loadflags |= KASLR_FLAG;
|
||||
|
||||
/* Record the various known unsafe memory ranges. */
|
||||
mem_avoid_init((unsigned long)input, input_size,
|
||||
(unsigned long)output, output_size);
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/bootparam.h>
|
||||
|
||||
__HEAD
|
||||
ENTRY(startup_32)
|
||||
@ -102,7 +103,7 @@ preferred_addr:
|
||||
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
|
||||
* us to not reload segments
|
||||
*/
|
||||
testb $(1<<6), BP_loadflags(%esi)
|
||||
testb $KEEP_SEGMENTS, BP_loadflags(%esi)
|
||||
jnz 1f
|
||||
|
||||
cli
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <asm/msr.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/bootparam.h>
|
||||
|
||||
__HEAD
|
||||
.code32
|
||||
@ -46,7 +47,7 @@ ENTRY(startup_32)
|
||||
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
|
||||
* us to not reload segments
|
||||
*/
|
||||
testb $(1<<6), BP_loadflags(%esi)
|
||||
testb $KEEP_SEGMENTS, BP_loadflags(%esi)
|
||||
jnz 1f
|
||||
|
||||
cli
|
||||
@ -164,7 +165,7 @@ ENTRY(startup_32)
|
||||
/* After gdt is loaded */
|
||||
xorl %eax, %eax
|
||||
lldt %ax
|
||||
movl $0x20, %eax
|
||||
movl $__BOOT_TSS, %eax
|
||||
ltr %ax
|
||||
|
||||
/*
|
||||
|
@ -377,6 +377,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
|
||||
|
||||
real_mode = rmode;
|
||||
|
||||
/* Clear it for solely in-kernel use */
|
||||
real_mode->hdr.loadflags &= ~KASLR_FLAG;
|
||||
|
||||
sanitize_boot_params(real_mode);
|
||||
|
||||
if (real_mode->screen_info.orig_video_mode == 7) {
|
||||
@ -401,7 +404,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
|
||||
* the entire decompressed kernel plus relocation table, or the
|
||||
* entire decompressed kernel plus .bss and .brk sections.
|
||||
*/
|
||||
output = choose_kernel_location(input_data, input_len, output,
|
||||
output = choose_kernel_location(real_mode, input_data, input_len, output,
|
||||
output_len > run_size ? output_len
|
||||
: run_size);
|
||||
|
||||
|
@ -57,7 +57,8 @@ int cmdline_find_option_bool(const char *option);
|
||||
|
||||
#if CONFIG_RANDOMIZE_BASE
|
||||
/* aslr.c */
|
||||
unsigned char *choose_kernel_location(unsigned char *input,
|
||||
unsigned char *choose_kernel_location(struct boot_params *boot_params,
|
||||
unsigned char *input,
|
||||
unsigned long input_size,
|
||||
unsigned char *output,
|
||||
unsigned long output_size);
|
||||
@ -65,7 +66,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
|
||||
bool has_cpuflag(int flag);
|
||||
#else
|
||||
static inline
|
||||
unsigned char *choose_kernel_location(unsigned char *input,
|
||||
unsigned char *choose_kernel_location(struct boot_params *boot_params,
|
||||
unsigned char *input,
|
||||
unsigned long input_size,
|
||||
unsigned char *output,
|
||||
unsigned long output_size)
|
||||
|
@ -178,7 +178,7 @@ continue_block:
|
||||
## 2a) PROCESS FULL BLOCKS:
|
||||
################################################################
|
||||
full_block:
|
||||
movq $128,%rax
|
||||
movl $128,%eax
|
||||
lea 128*8*2(block_0), block_1
|
||||
lea 128*8*3(block_0), block_2
|
||||
add $128*8*1, block_0
|
||||
|
@ -264,7 +264,7 @@ ENTRY(twofish_enc_blk)
|
||||
movq R1, 8(%rsi)
|
||||
|
||||
popq R1
|
||||
movq $1,%rax
|
||||
movl $1,%eax
|
||||
ret
|
||||
ENDPROC(twofish_enc_blk)
|
||||
|
||||
@ -316,6 +316,6 @@ ENTRY(twofish_dec_blk)
|
||||
movq R1, 8(%rsi)
|
||||
|
||||
popq R1
|
||||
movq $1,%rax
|
||||
movl $1,%eax
|
||||
ret
|
||||
ENDPROC(twofish_dec_blk)
|
||||
|
@ -3,7 +3,6 @@
|
||||
#
|
||||
|
||||
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
|
||||
obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
|
||||
|
||||
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
|
||||
|
||||
|
@ -161,8 +161,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
|
||||
}
|
||||
|
||||
static int ia32_restore_sigcontext(struct pt_regs *regs,
|
||||
struct sigcontext_ia32 __user *sc,
|
||||
unsigned int *pax)
|
||||
struct sigcontext_ia32 __user *sc)
|
||||
{
|
||||
unsigned int tmpflags, err = 0;
|
||||
void __user *buf;
|
||||
@ -184,7 +183,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
|
||||
RELOAD_SEG(es);
|
||||
|
||||
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
|
||||
COPY(dx); COPY(cx); COPY(ip);
|
||||
COPY(dx); COPY(cx); COPY(ip); COPY(ax);
|
||||
/* Don't touch extended registers */
|
||||
|
||||
COPY_SEG_CPL3(cs);
|
||||
@ -197,12 +196,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
|
||||
|
||||
get_user_ex(tmp, &sc->fpstate);
|
||||
buf = compat_ptr(tmp);
|
||||
|
||||
get_user_ex(*pax, &sc->ax);
|
||||
} get_user_catch(err);
|
||||
|
||||
err |= restore_xstate_sig(buf, 1);
|
||||
|
||||
force_iret();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -211,7 +210,6 @@ asmlinkage long sys32_sigreturn(void)
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
|
||||
sigset_t set;
|
||||
unsigned int ax;
|
||||
|
||||
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
|
||||
goto badframe;
|
||||
@ -224,9 +222,9 @@ asmlinkage long sys32_sigreturn(void)
|
||||
|
||||
set_current_blocked(&set);
|
||||
|
||||
if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
|
||||
if (ia32_restore_sigcontext(regs, &frame->sc))
|
||||
goto badframe;
|
||||
return ax;
|
||||
return regs->ax;
|
||||
|
||||
badframe:
|
||||
signal_fault(regs, frame, "32bit sigreturn");
|
||||
@ -238,7 +236,6 @@ asmlinkage long sys32_rt_sigreturn(void)
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
struct rt_sigframe_ia32 __user *frame;
|
||||
sigset_t set;
|
||||
unsigned int ax;
|
||||
|
||||
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
|
||||
|
||||
@ -249,13 +246,13 @@ asmlinkage long sys32_rt_sigreturn(void)
|
||||
|
||||
set_current_blocked(&set);
|
||||
|
||||
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
|
||||
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
|
||||
goto badframe;
|
||||
|
||||
if (compat_restore_altstack(&frame->uc.uc_stack))
|
||||
goto badframe;
|
||||
|
||||
return ax;
|
||||
return regs->ax;
|
||||
|
||||
badframe:
|
||||
signal_fault(regs, frame, "32bit rt sigreturn");
|
||||
|
@ -30,24 +30,13 @@
|
||||
|
||||
.section .entry.text, "ax"
|
||||
|
||||
.macro IA32_ARG_FIXUP noebp=0
|
||||
movl %edi,%r8d
|
||||
.if \noebp
|
||||
.else
|
||||
movl %ebp,%r9d
|
||||
.endif
|
||||
xchg %ecx,%esi
|
||||
movl %ebx,%edi
|
||||
movl %edx,%edx /* zero extension */
|
||||
.endm
|
||||
|
||||
/* clobbers %eax */
|
||||
.macro CLEAR_RREGS offset=0, _r9=rax
|
||||
/* clobbers %rax */
|
||||
.macro CLEAR_RREGS _r9=rax
|
||||
xorl %eax,%eax
|
||||
movq %rax,\offset+R11(%rsp)
|
||||
movq %rax,\offset+R10(%rsp)
|
||||
movq %\_r9,\offset+R9(%rsp)
|
||||
movq %rax,\offset+R8(%rsp)
|
||||
movq %rax,R11(%rsp)
|
||||
movq %rax,R10(%rsp)
|
||||
movq %\_r9,R9(%rsp)
|
||||
movq %rax,R8(%rsp)
|
||||
.endm
|
||||
|
||||
/*
|
||||
@ -60,14 +49,14 @@
|
||||
* If it's -1 to make us punt the syscall, then (u32)-1 is still
|
||||
* an appropriately invalid value.
|
||||
*/
|
||||
.macro LOAD_ARGS32 offset, _r9=0
|
||||
.macro LOAD_ARGS32 _r9=0
|
||||
.if \_r9
|
||||
movl \offset+16(%rsp),%r9d
|
||||
movl R9(%rsp),%r9d
|
||||
.endif
|
||||
movl \offset+40(%rsp),%ecx
|
||||
movl \offset+48(%rsp),%edx
|
||||
movl \offset+56(%rsp),%esi
|
||||
movl \offset+64(%rsp),%edi
|
||||
movl RCX(%rsp),%ecx
|
||||
movl RDX(%rsp),%edx
|
||||
movl RSI(%rsp),%esi
|
||||
movl RDI(%rsp),%edi
|
||||
movl %eax,%eax /* zero extension */
|
||||
.endm
|
||||
|
||||
@ -99,54 +88,69 @@ ENDPROC(native_irq_enable_sysexit)
|
||||
/*
|
||||
* 32bit SYSENTER instruction entry.
|
||||
*
|
||||
* SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
|
||||
* IF and VM in rflags are cleared (IOW: interrupts are off).
|
||||
* SYSENTER does not save anything on the stack,
|
||||
* and does not save old rip (!!!) and rflags.
|
||||
*
|
||||
* Arguments:
|
||||
* %eax System call number.
|
||||
* %ebx Arg1
|
||||
* %ecx Arg2
|
||||
* %edx Arg3
|
||||
* %esi Arg4
|
||||
* %edi Arg5
|
||||
* %ebp user stack
|
||||
* 0(%ebp) Arg6
|
||||
*
|
||||
* Interrupts off.
|
||||
*
|
||||
* eax system call number
|
||||
* ebx arg1
|
||||
* ecx arg2
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* ebp user stack
|
||||
* 0(%ebp) arg6
|
||||
*
|
||||
* This is purely a fast path. For anything complicated we use the int 0x80
|
||||
* path below. Set up a complete hardware stack frame to share code
|
||||
* path below. We set up a complete hardware stack frame to share code
|
||||
* with the int 0x80 path.
|
||||
*/
|
||||
*/
|
||||
ENTRY(ia32_sysenter_target)
|
||||
CFI_STARTPROC32 simple
|
||||
CFI_SIGNAL_FRAME
|
||||
CFI_DEF_CFA rsp,0
|
||||
CFI_REGISTER rsp,rbp
|
||||
SWAPGS_UNSAFE_STACK
|
||||
movq PER_CPU_VAR(kernel_stack), %rsp
|
||||
addq $(KERNEL_STACK_OFFSET),%rsp
|
||||
|
||||
/*
|
||||
* No need to follow this irqs on/off section: the syscall
|
||||
* disabled irqs, here we enable it straight after entry:
|
||||
* Interrupts are off on entry.
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
*/
|
||||
SWAPGS_UNSAFE_STACK
|
||||
movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
movl %ebp,%ebp /* zero extension */
|
||||
pushq_cfi $__USER32_DS
|
||||
/*CFI_REL_OFFSET ss,0*/
|
||||
pushq_cfi %rbp
|
||||
CFI_REL_OFFSET rsp,0
|
||||
pushfq_cfi
|
||||
/*CFI_REL_OFFSET rflags,0*/
|
||||
movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
|
||||
CFI_REGISTER rip,r10
|
||||
pushq_cfi $__USER32_CS
|
||||
/*CFI_REL_OFFSET cs,0*/
|
||||
|
||||
/* Zero-extending 32-bit regs, do not remove */
|
||||
movl %ebp, %ebp
|
||||
movl %eax, %eax
|
||||
pushq_cfi %r10
|
||||
CFI_REL_OFFSET rip,0
|
||||
pushq_cfi %rax
|
||||
|
||||
movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
|
||||
CFI_REGISTER rip,r10
|
||||
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq_cfi $__USER32_DS /* pt_regs->ss */
|
||||
pushq_cfi %rbp /* pt_regs->sp */
|
||||
CFI_REL_OFFSET rsp,0
|
||||
pushfq_cfi /* pt_regs->flags */
|
||||
pushq_cfi $__USER32_CS /* pt_regs->cs */
|
||||
pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */
|
||||
CFI_REL_OFFSET rip,0
|
||||
pushq_cfi_reg rax /* pt_regs->orig_ax */
|
||||
pushq_cfi_reg rdi /* pt_regs->di */
|
||||
pushq_cfi_reg rsi /* pt_regs->si */
|
||||
pushq_cfi_reg rdx /* pt_regs->dx */
|
||||
pushq_cfi_reg rcx /* pt_regs->cx */
|
||||
pushq_cfi_reg rax /* pt_regs->ax */
|
||||
cld
|
||||
SAVE_ARGS 0,1,0
|
||||
/* no need to do an access_ok check here because rbp has been
|
||||
32bit zero extended */
|
||||
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
|
||||
CFI_ADJUST_CFA_OFFSET 10*8
|
||||
|
||||
/*
|
||||
* no need to do an access_ok check here because rbp has been
|
||||
* 32bit zero extended
|
||||
*/
|
||||
ASM_STAC
|
||||
1: movl (%rbp),%ebp
|
||||
_ASM_EXTABLE(1b,ia32_badarg)
|
||||
@ -157,42 +161,80 @@ ENTRY(ia32_sysenter_target)
|
||||
* ourselves. To save a few cycles, we can check whether
|
||||
* NT was set instead of doing an unconditional popfq.
|
||||
*/
|
||||
testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
|
||||
testl $X86_EFLAGS_NT,EFLAGS(%rsp)
|
||||
jnz sysenter_fix_flags
|
||||
sysenter_flags_fixed:
|
||||
|
||||
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
CFI_REMEMBER_STATE
|
||||
jnz sysenter_tracesys
|
||||
cmpq $(IA32_NR_syscalls-1),%rax
|
||||
ja ia32_badsys
|
||||
sysenter_do_call:
|
||||
IA32_ARG_FIXUP
|
||||
/* 32bit syscall -> 64bit C ABI argument conversion */
|
||||
movl %edi,%r8d /* arg5 */
|
||||
movl %ebp,%r9d /* arg6 */
|
||||
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
|
||||
movl %ebx,%edi /* arg1 */
|
||||
movl %edx,%edx /* arg3 (zero extension) */
|
||||
sysenter_dispatch:
|
||||
call *ia32_sys_call_table(,%rax,8)
|
||||
movq %rax,RAX-ARGOFFSET(%rsp)
|
||||
movq %rax,RAX(%rsp)
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz sysexit_audit
|
||||
sysexit_from_sys_call:
|
||||
andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
/* clear IF, that popfq doesn't enable interrupts early */
|
||||
andl $~0x200,EFLAGS-ARGOFFSET(%rsp)
|
||||
movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */
|
||||
CFI_REGISTER rip,rdx
|
||||
RESTORE_ARGS 0,24,0,0,0,0
|
||||
/*
|
||||
* NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
|
||||
* NMI between STI and SYSEXIT has poorly specified behavior,
|
||||
* and and NMI followed by an IRQ with usergs is fatal. So
|
||||
* we just pretend we're using SYSEXIT but we really use
|
||||
* SYSRETL instead.
|
||||
*
|
||||
* This code path is still called 'sysexit' because it pairs
|
||||
* with 'sysenter' and it uses the SYSENTER calling convention.
|
||||
*/
|
||||
andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
movl RIP(%rsp),%ecx /* User %eip */
|
||||
CFI_REGISTER rip,rcx
|
||||
RESTORE_RSI_RDI
|
||||
xorl %edx,%edx /* avoid info leaks */
|
||||
xorq %r8,%r8
|
||||
xorq %r9,%r9
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
popfq_cfi
|
||||
movl EFLAGS(%rsp),%r11d /* User eflags */
|
||||
/*CFI_RESTORE rflags*/
|
||||
popq_cfi %rcx /* User %esp */
|
||||
CFI_REGISTER rsp,rcx
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS_SYSEXIT32
|
||||
|
||||
/*
|
||||
* SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
|
||||
* since it avoids a dicey window with interrupts enabled.
|
||||
*/
|
||||
movl RSP(%rsp),%esp
|
||||
|
||||
/*
|
||||
* USERGS_SYSRET32 does:
|
||||
* gsbase = user's gs base
|
||||
* eip = ecx
|
||||
* rflags = r11
|
||||
* cs = __USER32_CS
|
||||
* ss = __USER_DS
|
||||
*
|
||||
* The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
|
||||
*
|
||||
* pop %ebp
|
||||
* pop %edx
|
||||
* pop %ecx
|
||||
*
|
||||
* Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
|
||||
* avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
|
||||
* address (already known to user code), and R12-R15 are
|
||||
* callee-saved and therefore don't contain any interesting
|
||||
* kernel data.
|
||||
*/
|
||||
USERGS_SYSRET32
|
||||
|
||||
CFI_RESTORE_STATE
|
||||
|
||||
@ -205,18 +247,18 @@ sysexit_from_sys_call:
|
||||
movl %ebx,%esi /* 2nd arg: 1st syscall arg */
|
||||
movl %eax,%edi /* 1st arg: syscall number */
|
||||
call __audit_syscall_entry
|
||||
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
|
||||
movl RAX(%rsp),%eax /* reload syscall number */
|
||||
cmpq $(IA32_NR_syscalls-1),%rax
|
||||
ja ia32_badsys
|
||||
movl %ebx,%edi /* reload 1st syscall arg */
|
||||
movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
|
||||
movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */
|
||||
movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */
|
||||
movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
|
||||
movl RCX(%rsp),%esi /* reload 2nd syscall arg */
|
||||
movl RDX(%rsp),%edx /* reload 3rd syscall arg */
|
||||
movl RSI(%rsp),%ecx /* reload 4th syscall arg */
|
||||
movl RDI(%rsp),%r8d /* reload 5th syscall arg */
|
||||
.endm
|
||||
|
||||
.macro auditsys_exit exit
|
||||
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz ia32_ret_from_sys_call
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
@ -227,13 +269,13 @@ sysexit_from_sys_call:
|
||||
1: setbe %al /* 1 if error, 0 if not */
|
||||
movzbl %al,%edi /* zero-extend that into %edi */
|
||||
call __audit_syscall_exit
|
||||
movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
|
||||
movq RAX(%rsp),%rax /* reload syscall return value */
|
||||
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jz \exit
|
||||
CLEAR_RREGS -ARGOFFSET
|
||||
CLEAR_RREGS
|
||||
jmp int_with_check
|
||||
.endm
|
||||
|
||||
@ -253,16 +295,16 @@ sysenter_fix_flags:
|
||||
|
||||
sysenter_tracesys:
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jz sysenter_auditsys
|
||||
#endif
|
||||
SAVE_REST
|
||||
SAVE_EXTRA_REGS
|
||||
CLEAR_RREGS
|
||||
movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
|
||||
movq %rsp,%rdi /* &pt_regs -> arg1 */
|
||||
call syscall_trace_enter
|
||||
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
|
||||
RESTORE_REST
|
||||
LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
|
||||
RESTORE_EXTRA_REGS
|
||||
cmpq $(IA32_NR_syscalls-1),%rax
|
||||
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
|
||||
jmp sysenter_do_call
|
||||
@ -272,94 +314,128 @@ ENDPROC(ia32_sysenter_target)
|
||||
/*
|
||||
* 32bit SYSCALL instruction entry.
|
||||
*
|
||||
* 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
|
||||
* then loads new ss, cs, and rip from previously programmed MSRs.
|
||||
* rflags gets masked by a value from another MSR (so CLD and CLAC
|
||||
* are not needed). SYSCALL does not save anything on the stack
|
||||
* and does not change rsp.
|
||||
*
|
||||
* Note: rflags saving+masking-with-MSR happens only in Long mode
|
||||
* (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
|
||||
* Don't get confused: rflags saving+masking depends on Long Mode Active bit
|
||||
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
|
||||
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
|
||||
*
|
||||
* Arguments:
|
||||
* %eax System call number.
|
||||
* %ebx Arg1
|
||||
* %ecx return EIP
|
||||
* %edx Arg3
|
||||
* %esi Arg4
|
||||
* %edi Arg5
|
||||
* %ebp Arg2 [note: not saved in the stack frame, should not be touched]
|
||||
* %esp user stack
|
||||
* 0(%esp) Arg6
|
||||
*
|
||||
* Interrupts off.
|
||||
*
|
||||
* eax system call number
|
||||
* ecx return address
|
||||
* ebx arg1
|
||||
* ebp arg2 (note: not saved in the stack frame, should not be touched)
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* esp user stack
|
||||
* 0(%esp) arg6
|
||||
*
|
||||
* This is purely a fast path. For anything complicated we use the int 0x80
|
||||
* path below. Set up a complete hardware stack frame to share code
|
||||
* with the int 0x80 path.
|
||||
*/
|
||||
* path below. We set up a complete hardware stack frame to share code
|
||||
* with the int 0x80 path.
|
||||
*/
|
||||
ENTRY(ia32_cstar_target)
|
||||
CFI_STARTPROC32 simple
|
||||
CFI_SIGNAL_FRAME
|
||||
CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
|
||||
CFI_DEF_CFA rsp,0
|
||||
CFI_REGISTER rip,rcx
|
||||
/*CFI_REGISTER rflags,r11*/
|
||||
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
*/
|
||||
SWAPGS_UNSAFE_STACK
|
||||
movl %esp,%r8d
|
||||
CFI_REGISTER rsp,r8
|
||||
movq PER_CPU_VAR(kernel_stack),%rsp
|
||||
/*
|
||||
* No need to follow this irqs on/off section: the syscall
|
||||
* disabled irqs and here we enable it straight after entry:
|
||||
*/
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
SAVE_ARGS 8,0,0
|
||||
movl %eax,%eax /* zero extension */
|
||||
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
|
||||
movq %rcx,RIP-ARGOFFSET(%rsp)
|
||||
CFI_REL_OFFSET rip,RIP-ARGOFFSET
|
||||
movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
|
||||
|
||||
/* Zero-extending 32-bit regs, do not remove */
|
||||
movl %eax,%eax
|
||||
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq_cfi $__USER32_DS /* pt_regs->ss */
|
||||
pushq_cfi %r8 /* pt_regs->sp */
|
||||
CFI_REL_OFFSET rsp,0
|
||||
pushq_cfi %r11 /* pt_regs->flags */
|
||||
pushq_cfi $__USER32_CS /* pt_regs->cs */
|
||||
pushq_cfi %rcx /* pt_regs->ip */
|
||||
CFI_REL_OFFSET rip,0
|
||||
pushq_cfi_reg rax /* pt_regs->orig_ax */
|
||||
pushq_cfi_reg rdi /* pt_regs->di */
|
||||
pushq_cfi_reg rsi /* pt_regs->si */
|
||||
pushq_cfi_reg rdx /* pt_regs->dx */
|
||||
pushq_cfi_reg rbp /* pt_regs->cx */
|
||||
movl %ebp,%ecx
|
||||
movq $__USER32_CS,CS-ARGOFFSET(%rsp)
|
||||
movq $__USER32_DS,SS-ARGOFFSET(%rsp)
|
||||
movq %r11,EFLAGS-ARGOFFSET(%rsp)
|
||||
/*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
|
||||
movq %r8,RSP-ARGOFFSET(%rsp)
|
||||
CFI_REL_OFFSET rsp,RSP-ARGOFFSET
|
||||
/* no need to do an access_ok check here because r8 has been
|
||||
32bit zero extended */
|
||||
/* hardware stack frame is complete now */
|
||||
pushq_cfi_reg rax /* pt_regs->ax */
|
||||
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
|
||||
CFI_ADJUST_CFA_OFFSET 10*8
|
||||
|
||||
/*
|
||||
* no need to do an access_ok check here because r8 has been
|
||||
* 32bit zero extended
|
||||
*/
|
||||
ASM_STAC
|
||||
1: movl (%r8),%r9d
|
||||
_ASM_EXTABLE(1b,ia32_badarg)
|
||||
ASM_CLAC
|
||||
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
CFI_REMEMBER_STATE
|
||||
jnz cstar_tracesys
|
||||
cmpq $IA32_NR_syscalls-1,%rax
|
||||
ja ia32_badsys
|
||||
cstar_do_call:
|
||||
IA32_ARG_FIXUP 1
|
||||
/* 32bit syscall -> 64bit C ABI argument conversion */
|
||||
movl %edi,%r8d /* arg5 */
|
||||
/* r9 already loaded */ /* arg6 */
|
||||
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
|
||||
movl %ebx,%edi /* arg1 */
|
||||
movl %edx,%edx /* arg3 (zero extension) */
|
||||
cstar_dispatch:
|
||||
call *ia32_sys_call_table(,%rax,8)
|
||||
movq %rax,RAX-ARGOFFSET(%rsp)
|
||||
movq %rax,RAX(%rsp)
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz sysretl_audit
|
||||
sysretl_from_sys_call:
|
||||
andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
RESTORE_ARGS 0,-ARG_SKIP,0,0,0
|
||||
movl RIP-ARGOFFSET(%rsp),%ecx
|
||||
andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
RESTORE_RSI_RDI_RDX
|
||||
movl RIP(%rsp),%ecx
|
||||
CFI_REGISTER rip,rcx
|
||||
movl EFLAGS-ARGOFFSET(%rsp),%r11d
|
||||
movl EFLAGS(%rsp),%r11d
|
||||
/*CFI_REGISTER rflags,r11*/
|
||||
xorq %r10,%r10
|
||||
xorq %r9,%r9
|
||||
xorq %r8,%r8
|
||||
TRACE_IRQS_ON
|
||||
movl RSP-ARGOFFSET(%rsp),%esp
|
||||
movl RSP(%rsp),%esp
|
||||
CFI_RESTORE rsp
|
||||
/*
|
||||
* 64bit->32bit SYSRET restores eip from ecx,
|
||||
* eflags from r11 (but RF and VM bits are forced to 0),
|
||||
* cs and ss are loaded from MSRs.
|
||||
* (Note: 32bit->32bit SYSRET is different: since r11
|
||||
* does not exist, it merely sets eflags.IF=1).
|
||||
*/
|
||||
USERGS_SYSRET32
|
||||
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
cstar_auditsys:
|
||||
CFI_RESTORE_STATE
|
||||
movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */
|
||||
movl %r9d,R9(%rsp) /* register to be clobbered by call */
|
||||
auditsys_entry_common
|
||||
movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */
|
||||
movl R9(%rsp),%r9d /* reload 6th syscall arg */
|
||||
jmp cstar_dispatch
|
||||
|
||||
sysretl_audit:
|
||||
@ -368,17 +444,17 @@ sysretl_audit:
|
||||
|
||||
cstar_tracesys:
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jz cstar_auditsys
|
||||
#endif
|
||||
xchgl %r9d,%ebp
|
||||
SAVE_REST
|
||||
CLEAR_RREGS 0, r9
|
||||
SAVE_EXTRA_REGS
|
||||
CLEAR_RREGS r9
|
||||
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
|
||||
movq %rsp,%rdi /* &pt_regs -> arg1 */
|
||||
call syscall_trace_enter
|
||||
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
|
||||
RESTORE_REST
|
||||
LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */
|
||||
RESTORE_EXTRA_REGS
|
||||
xchgl %ebp,%r9d
|
||||
cmpq $(IA32_NR_syscalls-1),%rax
|
||||
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
|
||||
@ -391,78 +467,94 @@ ia32_badarg:
|
||||
jmp ia32_sysret
|
||||
CFI_ENDPROC
|
||||
|
||||
/*
|
||||
* Emulated IA32 system calls via int 0x80.
|
||||
/*
|
||||
* Emulated IA32 system calls via int 0x80.
|
||||
*
|
||||
* Arguments:
|
||||
* %eax System call number.
|
||||
* %ebx Arg1
|
||||
* %ecx Arg2
|
||||
* %edx Arg3
|
||||
* %esi Arg4
|
||||
* %edi Arg5
|
||||
* %ebp Arg6 [note: not saved in the stack frame, should not be touched]
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
* ebx arg1
|
||||
* ecx arg2
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* ebp arg6 (note: not saved in the stack frame, should not be touched)
|
||||
*
|
||||
* Notes:
|
||||
* Uses the same stack frame as the x86-64 version.
|
||||
* All registers except %eax must be saved (but ptrace may violate that)
|
||||
* Uses the same stack frame as the x86-64 version.
|
||||
* All registers except eax must be saved (but ptrace may violate that).
|
||||
* Arguments are zero extended. For system calls that want sign extension and
|
||||
* take long arguments a wrapper is needed. Most calls can just be called
|
||||
* directly.
|
||||
* Assumes it is only called from user space and entered with interrupts off.
|
||||
*/
|
||||
* Assumes it is only called from user space and entered with interrupts off.
|
||||
*/
|
||||
|
||||
ENTRY(ia32_syscall)
|
||||
CFI_STARTPROC32 simple
|
||||
CFI_SIGNAL_FRAME
|
||||
CFI_DEF_CFA rsp,SS+8-RIP
|
||||
/*CFI_REL_OFFSET ss,SS-RIP*/
|
||||
CFI_REL_OFFSET rsp,RSP-RIP
|
||||
/*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
|
||||
/*CFI_REL_OFFSET cs,CS-RIP*/
|
||||
CFI_REL_OFFSET rip,RIP-RIP
|
||||
CFI_DEF_CFA rsp,5*8
|
||||
/*CFI_REL_OFFSET ss,4*8 */
|
||||
CFI_REL_OFFSET rsp,3*8
|
||||
/*CFI_REL_OFFSET rflags,2*8 */
|
||||
/*CFI_REL_OFFSET cs,1*8 */
|
||||
CFI_REL_OFFSET rip,0*8
|
||||
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
*/
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
SWAPGS
|
||||
/*
|
||||
* No need to follow this irqs on/off section: the syscall
|
||||
* disabled irqs and here we enable it straight after entry:
|
||||
*/
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
movl %eax,%eax
|
||||
pushq_cfi %rax
|
||||
|
||||
/* Zero-extending 32-bit regs, do not remove */
|
||||
movl %eax,%eax
|
||||
|
||||
/* Construct struct pt_regs on stack (iret frame is already on stack) */
|
||||
pushq_cfi_reg rax /* pt_regs->orig_ax */
|
||||
pushq_cfi_reg rdi /* pt_regs->di */
|
||||
pushq_cfi_reg rsi /* pt_regs->si */
|
||||
pushq_cfi_reg rdx /* pt_regs->dx */
|
||||
pushq_cfi_reg rcx /* pt_regs->cx */
|
||||
pushq_cfi_reg rax /* pt_regs->ax */
|
||||
cld
|
||||
/* note the registers are not zero extended to the sf.
|
||||
this could be a problem. */
|
||||
SAVE_ARGS 0,1,0
|
||||
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
|
||||
CFI_ADJUST_CFA_OFFSET 10*8
|
||||
|
||||
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz ia32_tracesys
|
||||
cmpq $(IA32_NR_syscalls-1),%rax
|
||||
ja ia32_badsys
|
||||
ia32_do_call:
|
||||
IA32_ARG_FIXUP
|
||||
/* 32bit syscall -> 64bit C ABI argument conversion */
|
||||
movl %edi,%r8d /* arg5 */
|
||||
movl %ebp,%r9d /* arg6 */
|
||||
xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
|
||||
movl %ebx,%edi /* arg1 */
|
||||
movl %edx,%edx /* arg3 (zero extension) */
|
||||
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
|
||||
ia32_sysret:
|
||||
movq %rax,RAX-ARGOFFSET(%rsp)
|
||||
movq %rax,RAX(%rsp)
|
||||
ia32_ret_from_sys_call:
|
||||
CLEAR_RREGS -ARGOFFSET
|
||||
jmp int_ret_from_sys_call
|
||||
CLEAR_RREGS
|
||||
jmp int_ret_from_sys_call
|
||||
|
||||
ia32_tracesys:
|
||||
SAVE_REST
|
||||
ia32_tracesys:
|
||||
SAVE_EXTRA_REGS
|
||||
CLEAR_RREGS
|
||||
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
|
||||
movq %rsp,%rdi /* &pt_regs -> arg1 */
|
||||
call syscall_trace_enter
|
||||
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
|
||||
RESTORE_REST
|
||||
LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
|
||||
RESTORE_EXTRA_REGS
|
||||
cmpq $(IA32_NR_syscalls-1),%rax
|
||||
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
|
||||
jmp ia32_do_call
|
||||
END(ia32_syscall)
|
||||
|
||||
ia32_badsys:
|
||||
movq $0,ORIG_RAX-ARGOFFSET(%rsp)
|
||||
movq $0,ORIG_RAX(%rsp)
|
||||
movq $-ENOSYS,%rax
|
||||
jmp ia32_sysret
|
||||
|
||||
@ -479,8 +571,6 @@ GLOBAL(\label)
|
||||
|
||||
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
|
||||
PTREGSCALL stub32_sigreturn, sys32_sigreturn
|
||||
PTREGSCALL stub32_execve, compat_sys_execve
|
||||
PTREGSCALL stub32_execveat, compat_sys_execveat
|
||||
PTREGSCALL stub32_fork, sys_fork
|
||||
PTREGSCALL stub32_vfork, sys_vfork
|
||||
|
||||
@ -492,24 +582,23 @@ GLOBAL(stub32_clone)
|
||||
|
||||
ALIGN
|
||||
ia32_ptregs_common:
|
||||
popq %r11
|
||||
CFI_ENDPROC
|
||||
CFI_STARTPROC32 simple
|
||||
CFI_SIGNAL_FRAME
|
||||
CFI_DEF_CFA rsp,SS+8-ARGOFFSET
|
||||
CFI_REL_OFFSET rax,RAX-ARGOFFSET
|
||||
CFI_REL_OFFSET rcx,RCX-ARGOFFSET
|
||||
CFI_REL_OFFSET rdx,RDX-ARGOFFSET
|
||||
CFI_REL_OFFSET rsi,RSI-ARGOFFSET
|
||||
CFI_REL_OFFSET rdi,RDI-ARGOFFSET
|
||||
CFI_REL_OFFSET rip,RIP-ARGOFFSET
|
||||
/* CFI_REL_OFFSET cs,CS-ARGOFFSET*/
|
||||
/* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
|
||||
CFI_REL_OFFSET rsp,RSP-ARGOFFSET
|
||||
/* CFI_REL_OFFSET ss,SS-ARGOFFSET*/
|
||||
SAVE_REST
|
||||
CFI_DEF_CFA rsp,SIZEOF_PTREGS
|
||||
CFI_REL_OFFSET rax,RAX
|
||||
CFI_REL_OFFSET rcx,RCX
|
||||
CFI_REL_OFFSET rdx,RDX
|
||||
CFI_REL_OFFSET rsi,RSI
|
||||
CFI_REL_OFFSET rdi,RDI
|
||||
CFI_REL_OFFSET rip,RIP
|
||||
/* CFI_REL_OFFSET cs,CS*/
|
||||
/* CFI_REL_OFFSET rflags,EFLAGS*/
|
||||
CFI_REL_OFFSET rsp,RSP
|
||||
/* CFI_REL_OFFSET ss,SS*/
|
||||
SAVE_EXTRA_REGS 8
|
||||
call *%rax
|
||||
RESTORE_REST
|
||||
jmp ia32_sysret /* misbalances the return cache */
|
||||
RESTORE_EXTRA_REGS 8
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(ia32_ptregs_common)
|
||||
|
@ -1,7 +0,0 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
|
||||
long compat_ni_syscall(void)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
@ -201,20 +201,6 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
|
||||
advice);
|
||||
}
|
||||
|
||||
long sys32_vm86_warning(void)
|
||||
{
|
||||
struct task_struct *me = current;
|
||||
static char lastcomm[sizeof(me->comm)];
|
||||
|
||||
if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
|
||||
compat_printk(KERN_INFO
|
||||
"%s: vm86 mode not supported on 64 bit kernel\n",
|
||||
me->comm);
|
||||
strncpy(lastcomm, me->comm, sizeof(lastcomm));
|
||||
}
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
|
||||
size_t count)
|
||||
{
|
||||
|
@ -1,25 +0,0 @@
|
||||
/* System call table for ia32 emulation. */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/sys.h>
|
||||
#include <linux/cache.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
|
||||
#include <asm/syscalls_32.h>
|
||||
#undef __SYSCALL_I386
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
|
||||
|
||||
typedef void (*sys_call_ptr_t)(void);
|
||||
|
||||
extern void compat_ni_syscall(void);
|
||||
|
||||
const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
|
||||
/*
|
||||
* Smells like a compiler bug -- it doesn't work
|
||||
* when the & below is removed.
|
||||
*/
|
||||
[0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
|
||||
#include <asm/syscalls_32.h>
|
||||
};
|
@ -18,12 +18,63 @@
|
||||
.endm
|
||||
#endif
|
||||
|
||||
.macro altinstruction_entry orig alt feature orig_len alt_len
|
||||
.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
|
||||
.long \orig - .
|
||||
.long \alt - .
|
||||
.word \feature
|
||||
.byte \orig_len
|
||||
.byte \alt_len
|
||||
.byte \pad_len
|
||||
.endm
|
||||
|
||||
.macro ALTERNATIVE oldinstr, newinstr, feature
|
||||
140:
|
||||
\oldinstr
|
||||
141:
|
||||
.skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
|
||||
142:
|
||||
|
||||
.pushsection .altinstructions,"a"
|
||||
altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
|
||||
.popsection
|
||||
|
||||
.pushsection .altinstr_replacement,"ax"
|
||||
143:
|
||||
\newinstr
|
||||
144:
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
#define old_len 141b-140b
|
||||
#define new_len1 144f-143f
|
||||
#define new_len2 145f-144f
|
||||
|
||||
/*
|
||||
* max without conditionals. Idea adapted from:
|
||||
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
|
||||
*/
|
||||
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
|
||||
|
||||
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
|
||||
140:
|
||||
\oldinstr
|
||||
141:
|
||||
.skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
|
||||
(alt_max_short(new_len1, new_len2) - (old_len)),0x90
|
||||
142:
|
||||
|
||||
.pushsection .altinstructions,"a"
|
||||
altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
|
||||
altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
|
||||
.popsection
|
||||
|
||||
.pushsection .altinstr_replacement,"ax"
|
||||
143:
|
||||
\newinstr1
|
||||
144:
|
||||
\newinstr2
|
||||
145:
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
@ -48,8 +48,9 @@ struct alt_instr {
|
||||
s32 repl_offset; /* offset to replacement instruction */
|
||||
u16 cpuid; /* cpuid bit set for replacement */
|
||||
u8 instrlen; /* length of original instruction */
|
||||
u8 replacementlen; /* length of new instruction, <= instrlen */
|
||||
};
|
||||
u8 replacementlen; /* length of new instruction */
|
||||
u8 padlen; /* length of build-time padding */
|
||||
} __packed;
|
||||
|
||||
extern void alternative_instructions(void);
|
||||
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
|
||||
@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end)
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n"
|
||||
#define b_replacement(num) "664"#num
|
||||
#define e_replacement(num) "665"#num
|
||||
|
||||
#define b_replacement(number) "663"#number
|
||||
#define e_replacement(number) "664"#number
|
||||
#define alt_end_marker "663"
|
||||
#define alt_slen "662b-661b"
|
||||
#define alt_pad_len alt_end_marker"b-662b"
|
||||
#define alt_total_slen alt_end_marker"b-661b"
|
||||
#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
|
||||
|
||||
#define alt_slen "662b-661b"
|
||||
#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
|
||||
#define __OLDINSTR(oldinstr, num) \
|
||||
"661:\n\t" oldinstr "\n662:\n" \
|
||||
".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
|
||||
"((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
|
||||
|
||||
#define ALTINSTR_ENTRY(feature, number) \
|
||||
#define OLDINSTR(oldinstr, num) \
|
||||
__OLDINSTR(oldinstr, num) \
|
||||
alt_end_marker ":\n"
|
||||
|
||||
/*
|
||||
* max without conditionals. Idea adapted from:
|
||||
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
|
||||
*
|
||||
* The additional "-" is needed because gas works with s32s.
|
||||
*/
|
||||
#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
|
||||
|
||||
/*
|
||||
* Pad the second replacement alternative with additional NOPs if it is
|
||||
* additionally longer than the first replacement alternative.
|
||||
*/
|
||||
#define OLDINSTR_2(oldinstr, num1, num2) \
|
||||
"661:\n\t" oldinstr "\n662:\n" \
|
||||
".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
|
||||
"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
|
||||
alt_end_marker ":\n"
|
||||
|
||||
#define ALTINSTR_ENTRY(feature, num) \
|
||||
" .long 661b - .\n" /* label */ \
|
||||
" .long " b_replacement(number)"f - .\n" /* new instruction */ \
|
||||
" .long " b_replacement(num)"f - .\n" /* new instruction */ \
|
||||
" .word " __stringify(feature) "\n" /* feature bit */ \
|
||||
" .byte " alt_slen "\n" /* source len */ \
|
||||
" .byte " alt_rlen(number) "\n" /* replacement len */
|
||||
" .byte " alt_total_slen "\n" /* source len */ \
|
||||
" .byte " alt_rlen(num) "\n" /* replacement len */ \
|
||||
" .byte " alt_pad_len "\n" /* pad len */
|
||||
|
||||
#define DISCARD_ENTRY(number) /* rlen <= slen */ \
|
||||
" .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
|
||||
|
||||
#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \
|
||||
b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
|
||||
#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
|
||||
b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
|
||||
|
||||
/* alternative assembly primitive: */
|
||||
#define ALTERNATIVE(oldinstr, newinstr, feature) \
|
||||
OLDINSTR(oldinstr) \
|
||||
OLDINSTR(oldinstr, 1) \
|
||||
".pushsection .altinstructions,\"a\"\n" \
|
||||
ALTINSTR_ENTRY(feature, 1) \
|
||||
".popsection\n" \
|
||||
".pushsection .discard,\"aw\",@progbits\n" \
|
||||
DISCARD_ENTRY(1) \
|
||||
".popsection\n" \
|
||||
".pushsection .altinstr_replacement, \"ax\"\n" \
|
||||
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
|
||||
".popsection"
|
||||
|
||||
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
|
||||
OLDINSTR(oldinstr) \
|
||||
OLDINSTR_2(oldinstr, 1, 2) \
|
||||
".pushsection .altinstructions,\"a\"\n" \
|
||||
ALTINSTR_ENTRY(feature1, 1) \
|
||||
ALTINSTR_ENTRY(feature2, 2) \
|
||||
".popsection\n" \
|
||||
".pushsection .discard,\"aw\",@progbits\n" \
|
||||
DISCARD_ENTRY(1) \
|
||||
DISCARD_ENTRY(2) \
|
||||
".popsection\n" \
|
||||
".pushsection .altinstr_replacement, \"ax\"\n" \
|
||||
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
|
||||
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
|
||||
@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
|
||||
#define alternative(oldinstr, newinstr, feature) \
|
||||
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
|
||||
|
||||
#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
|
||||
asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
|
||||
|
||||
/*
|
||||
* Alternative inline assembly with input.
|
||||
*
|
||||
|
@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
|
||||
{
|
||||
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
|
||||
|
||||
alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
|
||||
alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
|
||||
ASM_OUTPUT2("=r" (v), "=m" (*addr)),
|
||||
ASM_OUTPUT2("0" (v), "m" (*addr)));
|
||||
}
|
||||
|
@ -95,13 +95,11 @@ do { \
|
||||
* Stop RDTSC speculation. This is needed when you need to use RDTSC
|
||||
* (or get_cycles or vread that possibly accesses the TSC) in a defined
|
||||
* code region.
|
||||
*
|
||||
* (Could use an alternative three way for this if there was one.)
|
||||
*/
|
||||
static __always_inline void rdtsc_barrier(void)
|
||||
{
|
||||
alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
|
||||
alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
|
||||
alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
|
||||
"lfence", X86_FEATURE_LFENCE_RDTSC);
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_BARRIER_H */
|
||||
|
@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
* for assembly code:
|
||||
*/
|
||||
|
||||
#define R15 0
|
||||
#define R14 8
|
||||
#define R13 16
|
||||
#define R12 24
|
||||
#define RBP 32
|
||||
#define RBX 40
|
||||
/* The layout forms the "struct pt_regs" on the stack: */
|
||||
/*
|
||||
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
|
||||
* unless syscall needs a complete, fully filled "struct pt_regs".
|
||||
*/
|
||||
#define R15 0*8
|
||||
#define R14 1*8
|
||||
#define R13 2*8
|
||||
#define R12 3*8
|
||||
#define RBP 4*8
|
||||
#define RBX 5*8
|
||||
/* These regs are callee-clobbered. Always saved on kernel entry. */
|
||||
#define R11 6*8
|
||||
#define R10 7*8
|
||||
#define R9 8*8
|
||||
#define R8 9*8
|
||||
#define RAX 10*8
|
||||
#define RCX 11*8
|
||||
#define RDX 12*8
|
||||
#define RSI 13*8
|
||||
#define RDI 14*8
|
||||
/*
|
||||
* On syscall entry, this is syscall#. On CPU exception, this is error code.
|
||||
* On hw interrupt, it's IRQ number:
|
||||
*/
|
||||
#define ORIG_RAX 15*8
|
||||
/* Return frame for iretq */
|
||||
#define RIP 16*8
|
||||
#define CS 17*8
|
||||
#define EFLAGS 18*8
|
||||
#define RSP 19*8
|
||||
#define SS 20*8
|
||||
|
||||
/* arguments: interrupts/non tracing syscalls only save up to here: */
|
||||
#define R11 48
|
||||
#define R10 56
|
||||
#define R9 64
|
||||
#define R8 72
|
||||
#define RAX 80
|
||||
#define RCX 88
|
||||
#define RDX 96
|
||||
#define RSI 104
|
||||
#define RDI 112
|
||||
#define ORIG_RAX 120 /* + error_code */
|
||||
/* end of arguments */
|
||||
|
||||
/* cpu exception frame or undefined in case of fast syscall: */
|
||||
#define RIP 128
|
||||
#define CS 136
|
||||
#define EFLAGS 144
|
||||
#define RSP 152
|
||||
#define SS 160
|
||||
|
||||
#define ARGOFFSET R11
|
||||
|
||||
.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
|
||||
subq $9*8+\addskip, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET 9*8+\addskip
|
||||
movq_cfi rdi, 8*8
|
||||
movq_cfi rsi, 7*8
|
||||
movq_cfi rdx, 6*8
|
||||
|
||||
.if \save_rcx
|
||||
movq_cfi rcx, 5*8
|
||||
.endif
|
||||
|
||||
.if \rax_enosys
|
||||
movq $-ENOSYS, 4*8(%rsp)
|
||||
.else
|
||||
movq_cfi rax, 4*8
|
||||
.endif
|
||||
|
||||
.if \save_r891011
|
||||
movq_cfi r8, 3*8
|
||||
movq_cfi r9, 2*8
|
||||
movq_cfi r10, 1*8
|
||||
movq_cfi r11, 0*8
|
||||
.endif
|
||||
#define SIZEOF_PTREGS 21*8
|
||||
|
||||
.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
|
||||
subq $15*8+\addskip, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET 15*8+\addskip
|
||||
.endm
|
||||
|
||||
#define ARG_SKIP (9*8)
|
||||
.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
|
||||
.if \r11
|
||||
movq_cfi r11, 6*8+\offset
|
||||
.endif
|
||||
.if \r8910
|
||||
movq_cfi r10, 7*8+\offset
|
||||
movq_cfi r9, 8*8+\offset
|
||||
movq_cfi r8, 9*8+\offset
|
||||
.endif
|
||||
.if \rax
|
||||
movq_cfi rax, 10*8+\offset
|
||||
.endif
|
||||
.if \rcx
|
||||
movq_cfi rcx, 11*8+\offset
|
||||
.endif
|
||||
movq_cfi rdx, 12*8+\offset
|
||||
movq_cfi rsi, 13*8+\offset
|
||||
movq_cfi rdi, 14*8+\offset
|
||||
.endm
|
||||
.macro SAVE_C_REGS offset=0
|
||||
SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
|
||||
.endm
|
||||
.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
|
||||
SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
|
||||
.endm
|
||||
.macro SAVE_C_REGS_EXCEPT_R891011
|
||||
SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
|
||||
.endm
|
||||
.macro SAVE_C_REGS_EXCEPT_RCX_R891011
|
||||
SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
|
||||
.endm
|
||||
.macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
|
||||
SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
|
||||
.endm
|
||||
|
||||
.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
|
||||
rstor_r8910=1, rstor_rdx=1
|
||||
.macro SAVE_EXTRA_REGS offset=0
|
||||
movq_cfi r15, 0*8+\offset
|
||||
movq_cfi r14, 1*8+\offset
|
||||
movq_cfi r13, 2*8+\offset
|
||||
movq_cfi r12, 3*8+\offset
|
||||
movq_cfi rbp, 4*8+\offset
|
||||
movq_cfi rbx, 5*8+\offset
|
||||
.endm
|
||||
.macro SAVE_EXTRA_REGS_RBP offset=0
|
||||
movq_cfi rbp, 4*8+\offset
|
||||
.endm
|
||||
|
||||
.macro RESTORE_EXTRA_REGS offset=0
|
||||
movq_cfi_restore 0*8+\offset, r15
|
||||
movq_cfi_restore 1*8+\offset, r14
|
||||
movq_cfi_restore 2*8+\offset, r13
|
||||
movq_cfi_restore 3*8+\offset, r12
|
||||
movq_cfi_restore 4*8+\offset, rbp
|
||||
movq_cfi_restore 5*8+\offset, rbx
|
||||
.endm
|
||||
|
||||
.macro ZERO_EXTRA_REGS
|
||||
xorl %r15d, %r15d
|
||||
xorl %r14d, %r14d
|
||||
xorl %r13d, %r13d
|
||||
xorl %r12d, %r12d
|
||||
xorl %ebp, %ebp
|
||||
xorl %ebx, %ebx
|
||||
.endm
|
||||
|
||||
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
|
||||
.if \rstor_r11
|
||||
movq_cfi_restore 0*8, r11
|
||||
movq_cfi_restore 6*8, r11
|
||||
.endif
|
||||
|
||||
.if \rstor_r8910
|
||||
movq_cfi_restore 1*8, r10
|
||||
movq_cfi_restore 2*8, r9
|
||||
movq_cfi_restore 3*8, r8
|
||||
movq_cfi_restore 7*8, r10
|
||||
movq_cfi_restore 8*8, r9
|
||||
movq_cfi_restore 9*8, r8
|
||||
.endif
|
||||
|
||||
.if \rstor_rax
|
||||
movq_cfi_restore 4*8, rax
|
||||
movq_cfi_restore 10*8, rax
|
||||
.endif
|
||||
|
||||
.if \rstor_rcx
|
||||
movq_cfi_restore 5*8, rcx
|
||||
movq_cfi_restore 11*8, rcx
|
||||
.endif
|
||||
|
||||
.if \rstor_rdx
|
||||
movq_cfi_restore 6*8, rdx
|
||||
.endif
|
||||
|
||||
movq_cfi_restore 7*8, rsi
|
||||
movq_cfi_restore 8*8, rdi
|
||||
|
||||
.if ARG_SKIP+\addskip > 0
|
||||
addq $ARG_SKIP+\addskip, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
|
||||
movq_cfi_restore 12*8, rdx
|
||||
.endif
|
||||
movq_cfi_restore 13*8, rsi
|
||||
movq_cfi_restore 14*8, rdi
|
||||
.endm
|
||||
.macro RESTORE_C_REGS
|
||||
RESTORE_C_REGS_HELPER 1,1,1,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_RAX
|
||||
RESTORE_C_REGS_HELPER 0,1,1,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_RCX
|
||||
RESTORE_C_REGS_HELPER 1,0,1,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_R11
|
||||
RESTORE_C_REGS_HELPER 1,1,0,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
RESTORE_C_REGS_HELPER 1,0,0,1,1
|
||||
.endm
|
||||
.macro RESTORE_RSI_RDI
|
||||
RESTORE_C_REGS_HELPER 0,0,0,0,0
|
||||
.endm
|
||||
.macro RESTORE_RSI_RDI_RDX
|
||||
RESTORE_C_REGS_HELPER 0,0,0,0,1
|
||||
.endm
|
||||
|
||||
.macro LOAD_ARGS offset, skiprax=0
|
||||
movq \offset(%rsp), %r11
|
||||
movq \offset+8(%rsp), %r10
|
||||
movq \offset+16(%rsp), %r9
|
||||
movq \offset+24(%rsp), %r8
|
||||
movq \offset+40(%rsp), %rcx
|
||||
movq \offset+48(%rsp), %rdx
|
||||
movq \offset+56(%rsp), %rsi
|
||||
movq \offset+64(%rsp), %rdi
|
||||
.if \skiprax
|
||||
.else
|
||||
movq \offset+72(%rsp), %rax
|
||||
.endif
|
||||
.endm
|
||||
|
||||
#define REST_SKIP (6*8)
|
||||
|
||||
.macro SAVE_REST
|
||||
subq $REST_SKIP, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET REST_SKIP
|
||||
movq_cfi rbx, 5*8
|
||||
movq_cfi rbp, 4*8
|
||||
movq_cfi r12, 3*8
|
||||
movq_cfi r13, 2*8
|
||||
movq_cfi r14, 1*8
|
||||
movq_cfi r15, 0*8
|
||||
.endm
|
||||
|
||||
.macro RESTORE_REST
|
||||
movq_cfi_restore 0*8, r15
|
||||
movq_cfi_restore 1*8, r14
|
||||
movq_cfi_restore 2*8, r13
|
||||
movq_cfi_restore 3*8, r12
|
||||
movq_cfi_restore 4*8, rbp
|
||||
movq_cfi_restore 5*8, rbx
|
||||
addq $REST_SKIP, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
|
||||
.endm
|
||||
|
||||
.macro SAVE_ALL
|
||||
SAVE_ARGS
|
||||
SAVE_REST
|
||||
.endm
|
||||
|
||||
.macro RESTORE_ALL addskip=0
|
||||
RESTORE_REST
|
||||
RESTORE_ARGS 1, \addskip
|
||||
.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
|
||||
addq $15*8+\addskip, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
|
||||
.endm
|
||||
|
||||
.macro icebp
|
||||
@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
*/
|
||||
|
||||
.macro SAVE_ALL
|
||||
pushl_cfi %eax
|
||||
CFI_REL_OFFSET eax, 0
|
||||
pushl_cfi %ebp
|
||||
CFI_REL_OFFSET ebp, 0
|
||||
pushl_cfi %edi
|
||||
CFI_REL_OFFSET edi, 0
|
||||
pushl_cfi %esi
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl_cfi %edx
|
||||
CFI_REL_OFFSET edx, 0
|
||||
pushl_cfi %ecx
|
||||
CFI_REL_OFFSET ecx, 0
|
||||
pushl_cfi %ebx
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl_cfi_reg eax
|
||||
pushl_cfi_reg ebp
|
||||
pushl_cfi_reg edi
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg edx
|
||||
pushl_cfi_reg ecx
|
||||
pushl_cfi_reg ebx
|
||||
.endm
|
||||
|
||||
.macro RESTORE_ALL
|
||||
popl_cfi %ebx
|
||||
CFI_RESTORE ebx
|
||||
popl_cfi %ecx
|
||||
CFI_RESTORE ecx
|
||||
popl_cfi %edx
|
||||
CFI_RESTORE edx
|
||||
popl_cfi %esi
|
||||
CFI_RESTORE esi
|
||||
popl_cfi %edi
|
||||
CFI_RESTORE edi
|
||||
popl_cfi %ebp
|
||||
CFI_RESTORE ebp
|
||||
popl_cfi %eax
|
||||
CFI_RESTORE eax
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg ecx
|
||||
popl_cfi_reg edx
|
||||
popl_cfi_reg esi
|
||||
popl_cfi_reg edi
|
||||
popl_cfi_reg ebp
|
||||
popl_cfi_reg eax
|
||||
.endm
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
|
||||
sp = task_pt_regs(current)->sp;
|
||||
} else {
|
||||
/* -128 for the x32 ABI redzone */
|
||||
sp = this_cpu_read(old_rsp) - 128;
|
||||
sp = task_pt_regs(current)->sp - 128;
|
||||
}
|
||||
|
||||
return (void __user *)round_down(sp - len, 16);
|
||||
|
@ -231,7 +231,9 @@
|
||||
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
|
||||
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
|
||||
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
|
||||
#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
|
||||
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
|
||||
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
|
||||
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
|
||||
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
|
||||
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
|
||||
@ -418,6 +420,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
|
||||
" .word %P0\n" /* 1: do replace */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 0\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
/* skipping size check since replacement size = 0 */
|
||||
: : "i" (X86_FEATURE_ALWAYS) : : t_warn);
|
||||
@ -432,6 +435,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
|
||||
" .word %P0\n" /* feature bit */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 0\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
/* skipping size check since replacement size = 0 */
|
||||
: : "i" (bit) : : t_no);
|
||||
@ -457,6 +461,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
|
||||
" .word %P1\n" /* feature bit */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 4f - 3f\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
".section .discard,\"aw\",@progbits\n"
|
||||
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
|
||||
@ -483,31 +488,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
|
||||
static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
|
||||
{
|
||||
#ifdef CC_HAVE_ASM_GOTO
|
||||
/*
|
||||
* We need to spell the jumps to the compiler because, depending on the offset,
|
||||
* the replacement jump can be bigger than the original jump, and this we cannot
|
||||
* have. Thus, we force the jump to the widest, 4-byte, signed relative
|
||||
* offset even though the last would often fit in less bytes.
|
||||
*/
|
||||
asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
|
||||
asm_volatile_goto("1: jmp %l[t_dynamic]\n"
|
||||
"2:\n"
|
||||
".skip -(((5f-4f) - (2b-1b)) > 0) * "
|
||||
"((5f-4f) - (2b-1b)),0x90\n"
|
||||
"3:\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n" /* src offset */
|
||||
" .long 3f - .\n" /* repl offset */
|
||||
" .long 4f - .\n" /* repl offset */
|
||||
" .word %P1\n" /* always replace */
|
||||
" .byte 2b - 1b\n" /* src len */
|
||||
" .byte 4f - 3f\n" /* repl len */
|
||||
" .byte 3b - 1b\n" /* src len */
|
||||
" .byte 5f - 4f\n" /* repl len */
|
||||
" .byte 3b - 2b\n" /* pad len */
|
||||
".previous\n"
|
||||
".section .altinstr_replacement,\"ax\"\n"
|
||||
"3: .byte 0xe9\n .long %l[t_no] - 2b\n"
|
||||
"4:\n"
|
||||
"4: jmp %l[t_no]\n"
|
||||
"5:\n"
|
||||
".previous\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n" /* src offset */
|
||||
" .long 0\n" /* no replacement */
|
||||
" .word %P0\n" /* feature bit */
|
||||
" .byte 2b - 1b\n" /* src len */
|
||||
" .byte 3b - 1b\n" /* src len */
|
||||
" .byte 0\n" /* repl len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
: : "i" (bit), "i" (X86_FEATURE_ALWAYS)
|
||||
: : t_dynamic, t_no);
|
||||
@ -527,6 +531,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
|
||||
" .word %P2\n" /* always replace */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 4f - 3f\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
".section .discard,\"aw\",@progbits\n"
|
||||
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
|
||||
@ -541,6 +546,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
|
||||
" .word %P1\n" /* feature bit */
|
||||
" .byte 4b - 3b\n" /* src len */
|
||||
" .byte 6f - 5f\n" /* repl len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
".section .discard,\"aw\",@progbits\n"
|
||||
" .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
|
||||
|
@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
|
||||
* Pentium F0 0F bugfix can have resulted in the mapped
|
||||
* IDT being write-protected.
|
||||
*/
|
||||
#define set_intr_gate(n, addr) \
|
||||
#define set_intr_gate_notrace(n, addr) \
|
||||
do { \
|
||||
BUG_ON((unsigned)n > 0xFF); \
|
||||
_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
|
||||
__KERNEL_CS); \
|
||||
} while (0)
|
||||
|
||||
#define set_intr_gate(n, addr) \
|
||||
do { \
|
||||
set_intr_gate_notrace(n, addr); \
|
||||
_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
|
||||
0, 0, __KERNEL_CS); \
|
||||
} while (0)
|
||||
|
@ -86,11 +86,23 @@
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
.endm
|
||||
|
||||
.macro pushq_cfi_reg reg
|
||||
pushq %\reg
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
CFI_REL_OFFSET \reg, 0
|
||||
.endm
|
||||
|
||||
.macro popq_cfi reg
|
||||
popq \reg
|
||||
CFI_ADJUST_CFA_OFFSET -8
|
||||
.endm
|
||||
|
||||
.macro popq_cfi_reg reg
|
||||
popq %\reg
|
||||
CFI_ADJUST_CFA_OFFSET -8
|
||||
CFI_RESTORE \reg
|
||||
.endm
|
||||
|
||||
.macro pushfq_cfi
|
||||
pushfq
|
||||
CFI_ADJUST_CFA_OFFSET 8
|
||||
@ -116,11 +128,23 @@
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
.endm
|
||||
|
||||
.macro pushl_cfi_reg reg
|
||||
pushl %\reg
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET \reg, 0
|
||||
.endm
|
||||
|
||||
.macro popl_cfi reg
|
||||
popl \reg
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
.endm
|
||||
|
||||
.macro popl_cfi_reg reg
|
||||
popl %\reg
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE \reg
|
||||
.endm
|
||||
|
||||
.macro pushfl_cfi
|
||||
pushfl
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
|
@ -171,10 +171,11 @@ do { \
|
||||
static inline void elf_common_init(struct thread_struct *t,
|
||||
struct pt_regs *regs, const u16 ds)
|
||||
{
|
||||
regs->ax = regs->bx = regs->cx = regs->dx = 0;
|
||||
regs->si = regs->di = regs->bp = 0;
|
||||
/* Commented-out registers are cleared in stub_execve */
|
||||
/*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
|
||||
regs->si = regs->di /*= regs->bp*/ = 0;
|
||||
regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
|
||||
regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
|
||||
/*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
|
||||
t->fs = t->gs = 0;
|
||||
t->fsindex = t->gsindex = 0;
|
||||
t->ds = t->es = ds;
|
||||
|
@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
|
||||
extern __visible void smp_invalidate_interrupt(struct pt_regs *);
|
||||
#endif
|
||||
|
||||
extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
|
||||
- FIRST_EXTERNAL_VECTOR])(void);
|
||||
extern char irq_entries_start[];
|
||||
#ifdef CONFIG_TRACING
|
||||
#define trace_interrupt interrupt
|
||||
#define trace_irq_entries_start irq_entries_start
|
||||
#endif
|
||||
|
||||
#define VECTOR_UNDEFINED (-1)
|
||||
|
@ -69,7 +69,7 @@ struct insn {
|
||||
const insn_byte_t *next_byte;
|
||||
};
|
||||
|
||||
#define MAX_INSN_SIZE 16
|
||||
#define MAX_INSN_SIZE 15
|
||||
|
||||
#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
|
||||
#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
|
||||
|
@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
|
||||
#define USERGS_SYSRET32 \
|
||||
swapgs; \
|
||||
sysretl
|
||||
#define ENABLE_INTERRUPTS_SYSEXIT32 \
|
||||
swapgs; \
|
||||
sti; \
|
||||
sysexit
|
||||
|
||||
#else
|
||||
#define INTERRUPT_RETURN iret
|
||||
@ -163,33 +159,9 @@ static inline int arch_irqs_disabled(void)
|
||||
|
||||
return arch_irqs_disabled_flags(flags);
|
||||
}
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#else
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
|
||||
#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
|
||||
TRACE_IRQS_ON; \
|
||||
sti; \
|
||||
SAVE_REST; \
|
||||
LOCKDEP_SYS_EXIT; \
|
||||
RESTORE_REST; \
|
||||
cli; \
|
||||
TRACE_IRQS_OFF;
|
||||
|
||||
#else
|
||||
#define ARCH_LOCKDEP_SYS_EXIT \
|
||||
pushl %eax; \
|
||||
pushl %ecx; \
|
||||
pushl %edx; \
|
||||
call lockdep_sys_exit; \
|
||||
popl %edx; \
|
||||
popl %ecx; \
|
||||
popl %eax;
|
||||
|
||||
#define ARCH_LOCKDEP_SYS_EXIT_IRQ
|
||||
#endif
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
|
||||
# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
|
||||
@ -198,12 +170,29 @@ static inline int arch_irqs_disabled(void)
|
||||
# define TRACE_IRQS_OFF
|
||||
#endif
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT
|
||||
# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
|
||||
# else
|
||||
# ifdef CONFIG_X86_64
|
||||
# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
|
||||
# define LOCKDEP_SYS_EXIT_IRQ \
|
||||
TRACE_IRQS_ON; \
|
||||
sti; \
|
||||
call lockdep_sys_exit_thunk; \
|
||||
cli; \
|
||||
TRACE_IRQS_OFF;
|
||||
# else
|
||||
# define LOCKDEP_SYS_EXIT \
|
||||
pushl %eax; \
|
||||
pushl %ecx; \
|
||||
pushl %edx; \
|
||||
call lockdep_sys_exit; \
|
||||
popl %edx; \
|
||||
popl %ecx; \
|
||||
popl %eax;
|
||||
# define LOCKDEP_SYS_EXIT_IRQ
|
||||
# endif
|
||||
#else
|
||||
# define LOCKDEP_SYS_EXIT
|
||||
# define LOCKDEP_SYS_EXIT_IRQ
|
||||
# endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif
|
||||
|
@ -976,11 +976,6 @@ extern void default_banner(void);
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
|
||||
CLBR_NONE, \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
|
||||
|
||||
#define ENABLE_INTERRUPTS_SYSEXIT32 \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
|
||||
CLBR_NONE, \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
@ -210,8 +210,23 @@ struct x86_hw_tss {
|
||||
unsigned long sp0;
|
||||
unsigned short ss0, __ss0h;
|
||||
unsigned long sp1;
|
||||
/* ss1 caches MSR_IA32_SYSENTER_CS: */
|
||||
unsigned short ss1, __ss1h;
|
||||
|
||||
/*
|
||||
* We don't use ring 1, so ss1 is a convenient scratch space in
|
||||
* the same cacheline as sp0. We use ss1 to cache the value in
|
||||
* MSR_IA32_SYSENTER_CS. When we context switch
|
||||
* MSR_IA32_SYSENTER_CS, we first check if the new value being
|
||||
* written matches ss1, and, if it's not, then we wrmsr the new
|
||||
* value and update ss1.
|
||||
*
|
||||
* The only reason we context switch MSR_IA32_SYSENTER_CS is
|
||||
* that we set it to zero in vm86 tasks to avoid corrupting the
|
||||
* stack if we were to go through the sysenter path from vm86
|
||||
* mode.
|
||||
*/
|
||||
unsigned short ss1; /* MSR_IA32_SYSENTER_CS */
|
||||
|
||||
unsigned short __ss1h;
|
||||
unsigned long sp2;
|
||||
unsigned short ss2, __ss2h;
|
||||
unsigned long __cr3;
|
||||
@ -276,13 +291,17 @@ struct tss_struct {
|
||||
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
|
||||
|
||||
/*
|
||||
* .. and then another 0x100 bytes for the emergency kernel stack:
|
||||
* Space for the temporary SYSENTER stack:
|
||||
*/
|
||||
unsigned long stack[64];
|
||||
unsigned long SYSENTER_stack[64];
|
||||
|
||||
} ____cacheline_aligned;
|
||||
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Save the original ist values for checking stack pointers during debugging
|
||||
@ -474,7 +493,6 @@ struct thread_struct {
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long sysenter_cs;
|
||||
#else
|
||||
unsigned long usersp; /* Copy from PDA */
|
||||
unsigned short es;
|
||||
unsigned short ds;
|
||||
unsigned short fsindex;
|
||||
@ -564,6 +582,16 @@ static inline void native_swapgs(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned long current_top_of_stack(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
|
||||
#else
|
||||
/* sp0 on x86_32 is special in and around vm86 mode. */
|
||||
return this_cpu_read_stable(cpu_current_top_of_stack);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/paravirt.h>
|
||||
#else
|
||||
@ -761,10 +789,10 @@ extern char ignore_fpu_irq;
|
||||
#define ARCH_HAS_SPINLOCK_PREFETCH
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
# define BASE_PREFETCH ASM_NOP4
|
||||
# define BASE_PREFETCH ""
|
||||
# define ARCH_HAS_PREFETCH
|
||||
#else
|
||||
# define BASE_PREFETCH "prefetcht0 (%1)"
|
||||
# define BASE_PREFETCH "prefetcht0 %P1"
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -775,10 +803,9 @@ extern char ignore_fpu_irq;
|
||||
*/
|
||||
static inline void prefetch(const void *x)
|
||||
{
|
||||
alternative_input(BASE_PREFETCH,
|
||||
"prefetchnta (%1)",
|
||||
alternative_input(BASE_PREFETCH, "prefetchnta %P1",
|
||||
X86_FEATURE_XMM,
|
||||
"r" (x));
|
||||
"m" (*(const char *)x));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -788,10 +815,9 @@ static inline void prefetch(const void *x)
|
||||
*/
|
||||
static inline void prefetchw(const void *x)
|
||||
{
|
||||
alternative_input(BASE_PREFETCH,
|
||||
"prefetchw (%1)",
|
||||
X86_FEATURE_3DNOW,
|
||||
"r" (x));
|
||||
alternative_input(BASE_PREFETCH, "prefetchw %P1",
|
||||
X86_FEATURE_3DNOWPREFETCH,
|
||||
"m" (*(const char *)x));
|
||||
}
|
||||
|
||||
static inline void spin_lock_prefetch(const void *x)
|
||||
@ -799,6 +825,9 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
prefetchw(x);
|
||||
}
|
||||
|
||||
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
|
||||
TOP_OF_KERNEL_STACK_PADDING)
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* User space process size: 3GB (default).
|
||||
@ -809,39 +838,16 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
#define STACK_TOP_MAX STACK_TOP
|
||||
|
||||
#define INIT_THREAD { \
|
||||
.sp0 = sizeof(init_stack) + (long)&init_stack, \
|
||||
.sp0 = TOP_OF_INIT_STACK, \
|
||||
.vm86_info = NULL, \
|
||||
.sysenter_cs = __KERNEL_CS, \
|
||||
.io_bitmap_ptr = NULL, \
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that the .io_bitmap member must be extra-big. This is because
|
||||
* the CPU will access an additional byte beyond the end of the IO
|
||||
* permission bitmap. The extra byte must be all 1 bits, and must
|
||||
* be within the limit.
|
||||
*/
|
||||
#define INIT_TSS { \
|
||||
.x86_tss = { \
|
||||
.sp0 = sizeof(init_stack) + (long)&init_stack, \
|
||||
.ss0 = __KERNEL_DS, \
|
||||
.ss1 = __KERNEL_CS, \
|
||||
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
|
||||
}, \
|
||||
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
|
||||
}
|
||||
|
||||
extern unsigned long thread_saved_pc(struct task_struct *tsk);
|
||||
|
||||
#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long))
|
||||
#define KSTK_TOP(info) \
|
||||
({ \
|
||||
unsigned long *__ptr = (unsigned long *)(info); \
|
||||
(unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \
|
||||
})
|
||||
|
||||
/*
|
||||
* The below -8 is to reserve 8 bytes on top of the ring0 stack.
|
||||
* TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
|
||||
* This is necessary to guarantee that the entire "struct pt_regs"
|
||||
* is accessible even if the CPU haven't stored the SS/ESP registers
|
||||
* on the stack (interrupt gate does not save these registers
|
||||
@ -850,11 +856,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
|
||||
* "struct pt_regs" is possible, but they may contain the
|
||||
* completely wrong values.
|
||||
*/
|
||||
#define task_pt_regs(task) \
|
||||
({ \
|
||||
struct pt_regs *__regs__; \
|
||||
__regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
|
||||
__regs__ - 1; \
|
||||
#define task_pt_regs(task) \
|
||||
({ \
|
||||
unsigned long __ptr = (unsigned long)task_stack_page(task); \
|
||||
__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
|
||||
((struct pt_regs *)__ptr) - 1; \
|
||||
})
|
||||
|
||||
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
|
||||
@ -886,11 +892,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
|
||||
#define STACK_TOP_MAX TASK_SIZE_MAX
|
||||
|
||||
#define INIT_THREAD { \
|
||||
.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
|
||||
}
|
||||
|
||||
#define INIT_TSS { \
|
||||
.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
|
||||
.sp0 = TOP_OF_INIT_STACK \
|
||||
}
|
||||
|
||||
/*
|
||||
@ -902,11 +904,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
|
||||
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
|
||||
extern unsigned long KSTK_ESP(struct task_struct *task);
|
||||
|
||||
/*
|
||||
* User space RSP while inside the SYSCALL fast path
|
||||
*/
|
||||
DECLARE_PER_CPU(unsigned long, old_rsp);
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
|
||||
|
@ -31,13 +31,17 @@ struct pt_regs {
|
||||
#else /* __i386__ */
|
||||
|
||||
struct pt_regs {
|
||||
/*
|
||||
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
|
||||
* unless syscall needs a complete, fully filled "struct pt_regs".
|
||||
*/
|
||||
unsigned long r15;
|
||||
unsigned long r14;
|
||||
unsigned long r13;
|
||||
unsigned long r12;
|
||||
unsigned long bp;
|
||||
unsigned long bx;
|
||||
/* arguments: non interrupts/non tracing syscalls only save up to here*/
|
||||
/* These regs are callee-clobbered. Always saved on kernel entry. */
|
||||
unsigned long r11;
|
||||
unsigned long r10;
|
||||
unsigned long r9;
|
||||
@ -47,9 +51,12 @@ struct pt_regs {
|
||||
unsigned long dx;
|
||||
unsigned long si;
|
||||
unsigned long di;
|
||||
/*
|
||||
* On syscall entry, this is syscall#. On CPU exception, this is error code.
|
||||
* On hw interrupt, it's IRQ number:
|
||||
*/
|
||||
unsigned long orig_ax;
|
||||
/* end of arguments */
|
||||
/* cpu exception frame or undefined */
|
||||
/* Return frame for iretq */
|
||||
unsigned long ip;
|
||||
unsigned long cs;
|
||||
unsigned long flags;
|
||||
@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
|
||||
}
|
||||
|
||||
/*
|
||||
* user_mode_vm(regs) determines whether a register set came from user mode.
|
||||
* This is true if V8086 mode was enabled OR if the register set was from
|
||||
* protected mode with RPL-3 CS value. This tricky test checks that with
|
||||
* one comparison. Many places in the kernel can bypass this full check
|
||||
* if they have already ruled out V8086 mode, so user_mode(regs) can be used.
|
||||
* user_mode(regs) determines whether a register set came from user
|
||||
* mode. On x86_32, this is true if V8086 mode was enabled OR if the
|
||||
* register set was from protected mode with RPL-3 CS value. This
|
||||
* tricky test checks that with one comparison.
|
||||
*
|
||||
* On x86_64, vm86 mode is mercifully nonexistent, and we don't need
|
||||
* the extra check.
|
||||
*/
|
||||
static inline int user_mode(struct pt_regs *regs)
|
||||
{
|
||||
@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int user_mode_vm(struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
|
||||
USER_RPL;
|
||||
#else
|
||||
return user_mode(regs);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int v8086_mode(struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||
#endif
|
||||
}
|
||||
|
||||
#define current_user_stack_pointer() this_cpu_read(old_rsp)
|
||||
/* ia32 vs. x32 difference */
|
||||
#define compat_user_stack_pointer() \
|
||||
(test_thread_flag(TIF_IA32) \
|
||||
? current_pt_regs()->sp \
|
||||
: this_cpu_read(old_rsp))
|
||||
#define current_user_stack_pointer() current_pt_regs()->sp
|
||||
#define compat_user_stack_pointer() current_pt_regs()->sp
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
|
||||
*/
|
||||
#define arch_ptrace_stop_needed(code, info) \
|
||||
({ \
|
||||
set_thread_flag(TIF_NOTIFY_RESUME); \
|
||||
force_iret(); \
|
||||
false; \
|
||||
})
|
||||
|
||||
|
@ -3,8 +3,10 @@
|
||||
|
||||
#include <linux/const.h>
|
||||
|
||||
/* Constructor for a conventional segment GDT (or LDT) entry */
|
||||
/* This is a macro so it can be used in initializers */
|
||||
/*
|
||||
* Constructor for a conventional segment GDT (or LDT) entry.
|
||||
* This is a macro so it can be used in initializers.
|
||||
*/
|
||||
#define GDT_ENTRY(flags, base, limit) \
|
||||
((((base) & _AC(0xff000000,ULL)) << (56-24)) | \
|
||||
(((flags) & _AC(0x0000f0ff,ULL)) << 40) | \
|
||||
@ -12,198 +14,228 @@
|
||||
(((base) & _AC(0x00ffffff,ULL)) << 16) | \
|
||||
(((limit) & _AC(0x0000ffff,ULL))))
|
||||
|
||||
/* Simple and small GDT entries for booting only */
|
||||
/* Simple and small GDT entries for booting only: */
|
||||
|
||||
#define GDT_ENTRY_BOOT_CS 2
|
||||
#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
|
||||
#define GDT_ENTRY_BOOT_DS 3
|
||||
#define GDT_ENTRY_BOOT_TSS 4
|
||||
#define __BOOT_CS (GDT_ENTRY_BOOT_CS*8)
|
||||
#define __BOOT_DS (GDT_ENTRY_BOOT_DS*8)
|
||||
#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8)
|
||||
|
||||
#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1)
|
||||
#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
|
||||
/*
|
||||
* Bottom two bits of selector give the ring
|
||||
* privilege level
|
||||
*/
|
||||
#define SEGMENT_RPL_MASK 0x3
|
||||
|
||||
#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2)
|
||||
#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8)
|
||||
/* User mode is privilege level 3: */
|
||||
#define USER_RPL 0x3
|
||||
|
||||
#define SEGMENT_RPL_MASK 0x3 /*
|
||||
* Bottom two bits of selector give the ring
|
||||
* privilege level
|
||||
*/
|
||||
#define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */
|
||||
#define USER_RPL 0x3 /* User mode is privilege level 3 */
|
||||
#define SEGMENT_LDT 0x4 /* LDT segment has TI set... */
|
||||
#define SEGMENT_GDT 0x0 /* ... GDT has it cleared */
|
||||
/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
|
||||
#define SEGMENT_TI_MASK 0x4
|
||||
/* LDT segment has TI set ... */
|
||||
#define SEGMENT_LDT 0x4
|
||||
/* ... GDT has it cleared */
|
||||
#define SEGMENT_GDT 0x0
|
||||
|
||||
#define GDT_ENTRY_INVALID_SEG 0
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* The layout of the per-CPU GDT under Linux:
|
||||
*
|
||||
* 0 - null
|
||||
* 0 - null <=== cacheline #1
|
||||
* 1 - reserved
|
||||
* 2 - reserved
|
||||
* 3 - reserved
|
||||
*
|
||||
* 4 - unused <==== new cacheline
|
||||
* 4 - unused <=== cacheline #2
|
||||
* 5 - unused
|
||||
*
|
||||
* ------- start of TLS (Thread-Local Storage) segments:
|
||||
*
|
||||
* 6 - TLS segment #1 [ glibc's TLS segment ]
|
||||
* 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
|
||||
* 8 - TLS segment #3
|
||||
* 8 - TLS segment #3 <=== cacheline #3
|
||||
* 9 - reserved
|
||||
* 10 - reserved
|
||||
* 11 - reserved
|
||||
*
|
||||
* ------- start of kernel segments:
|
||||
*
|
||||
* 12 - kernel code segment <==== new cacheline
|
||||
* 12 - kernel code segment <=== cacheline #4
|
||||
* 13 - kernel data segment
|
||||
* 14 - default user CS
|
||||
* 15 - default user DS
|
||||
* 16 - TSS
|
||||
* 16 - TSS <=== cacheline #5
|
||||
* 17 - LDT
|
||||
* 18 - PNPBIOS support (16->32 gate)
|
||||
* 19 - PNPBIOS support
|
||||
* 20 - PNPBIOS support
|
||||
* 20 - PNPBIOS support <=== cacheline #6
|
||||
* 21 - PNPBIOS support
|
||||
* 22 - PNPBIOS support
|
||||
* 23 - APM BIOS support
|
||||
* 24 - APM BIOS support
|
||||
* 24 - APM BIOS support <=== cacheline #7
|
||||
* 25 - APM BIOS support
|
||||
*
|
||||
* 26 - ESPFIX small SS
|
||||
* 27 - per-cpu [ offset to per-cpu data area ]
|
||||
* 28 - stack_canary-20 [ for stack protector ]
|
||||
* 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
|
||||
* 29 - unused
|
||||
* 30 - unused
|
||||
* 31 - TSS for double fault handler
|
||||
*/
|
||||
#define GDT_ENTRY_TLS_MIN 6
|
||||
#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
|
||||
#define GDT_ENTRY_TLS_MIN 6
|
||||
#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
|
||||
|
||||
#define GDT_ENTRY_KERNEL_CS 12
|
||||
#define GDT_ENTRY_KERNEL_DS 13
|
||||
#define GDT_ENTRY_DEFAULT_USER_CS 14
|
||||
|
||||
#define GDT_ENTRY_DEFAULT_USER_DS 15
|
||||
#define GDT_ENTRY_TSS 16
|
||||
#define GDT_ENTRY_LDT 17
|
||||
#define GDT_ENTRY_PNPBIOS_CS32 18
|
||||
#define GDT_ENTRY_PNPBIOS_CS16 19
|
||||
#define GDT_ENTRY_PNPBIOS_DS 20
|
||||
#define GDT_ENTRY_PNPBIOS_TS1 21
|
||||
#define GDT_ENTRY_PNPBIOS_TS2 22
|
||||
#define GDT_ENTRY_APMBIOS_BASE 23
|
||||
|
||||
#define GDT_ENTRY_KERNEL_BASE (12)
|
||||
|
||||
#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0)
|
||||
|
||||
#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1)
|
||||
|
||||
#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4)
|
||||
#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE+5)
|
||||
|
||||
#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE+6)
|
||||
#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE+11)
|
||||
|
||||
#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE+14)
|
||||
#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
|
||||
|
||||
#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE+15)
|
||||
#ifdef CONFIG_SMP
|
||||
#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
|
||||
#else
|
||||
#define __KERNEL_PERCPU 0
|
||||
#endif
|
||||
|
||||
#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE+16)
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
|
||||
#else
|
||||
#define __KERNEL_STACK_CANARY 0
|
||||
#endif
|
||||
#define GDT_ENTRY_ESPFIX_SS 26
|
||||
#define GDT_ENTRY_PERCPU 27
|
||||
#define GDT_ENTRY_STACK_CANARY 28
|
||||
|
||||
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
|
||||
|
||||
/*
|
||||
* The GDT has 32 entries
|
||||
* Number of entries in the GDT table:
|
||||
*/
|
||||
#define GDT_ENTRIES 32
|
||||
|
||||
/* The PnP BIOS entries in the GDT */
|
||||
#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
|
||||
#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1)
|
||||
#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2)
|
||||
#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3)
|
||||
#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4)
|
||||
|
||||
/* The PnP BIOS selectors */
|
||||
#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */
|
||||
#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */
|
||||
#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */
|
||||
#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
|
||||
#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
|
||||
|
||||
#define GDT_ENTRIES 32
|
||||
|
||||
/*
|
||||
* Matching rules for certain types of segments.
|
||||
* Segment selector values corresponding to the above entries:
|
||||
*/
|
||||
|
||||
/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
|
||||
#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
|
||||
#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
|
||||
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
|
||||
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
|
||||
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
|
||||
#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
|
||||
|
||||
/* segment for calling fn: */
|
||||
#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8)
|
||||
/* code segment for BIOS: */
|
||||
#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8)
|
||||
|
||||
/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
|
||||
#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32)
|
||||
|
||||
/* data segment for BIOS: */
|
||||
#define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8)
|
||||
/* transfer data segment: */
|
||||
#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8)
|
||||
/* another data segment: */
|
||||
#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
# define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8)
|
||||
#else
|
||||
# define __KERNEL_PERCPU 0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
|
||||
#else
|
||||
# define __KERNEL_STACK_CANARY 0
|
||||
#endif
|
||||
|
||||
#else /* 64-bit: */
|
||||
|
||||
#include <asm/cache.h>
|
||||
|
||||
#define GDT_ENTRY_KERNEL32_CS 1
|
||||
#define GDT_ENTRY_KERNEL_CS 2
|
||||
#define GDT_ENTRY_KERNEL_DS 3
|
||||
|
||||
#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8)
|
||||
#define GDT_ENTRY_KERNEL32_CS 1
|
||||
#define GDT_ENTRY_KERNEL_CS 2
|
||||
#define GDT_ENTRY_KERNEL_DS 3
|
||||
|
||||
/*
|
||||
* we cannot use the same code segment descriptor for user and kernel
|
||||
* -- not even in the long flat mode, because of different DPL /kkeil
|
||||
* The segment offset needs to contain a RPL. Grr. -AK
|
||||
* GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
|
||||
* We cannot use the same code segment descriptor for user and kernel mode,
|
||||
* not even in long flat mode, because of different DPL.
|
||||
*
|
||||
* GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
|
||||
* selectors:
|
||||
*
|
||||
* if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
|
||||
* if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
|
||||
*
|
||||
* ss = STAR.SYSRET_CS+8 (in either case)
|
||||
*
|
||||
* thus USER_DS should be between 32-bit and 64-bit code selectors:
|
||||
*/
|
||||
#define GDT_ENTRY_DEFAULT_USER32_CS 4
|
||||
#define GDT_ENTRY_DEFAULT_USER_DS 5
|
||||
#define GDT_ENTRY_DEFAULT_USER_CS 6
|
||||
#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
|
||||
#define __USER32_DS __USER_DS
|
||||
#define GDT_ENTRY_DEFAULT_USER32_CS 4
|
||||
#define GDT_ENTRY_DEFAULT_USER_DS 5
|
||||
#define GDT_ENTRY_DEFAULT_USER_CS 6
|
||||
|
||||
#define GDT_ENTRY_TSS 8 /* needs two entries */
|
||||
#define GDT_ENTRY_LDT 10 /* needs two entries */
|
||||
#define GDT_ENTRY_TLS_MIN 12
|
||||
#define GDT_ENTRY_TLS_MAX 14
|
||||
/* Needs two entries */
|
||||
#define GDT_ENTRY_TSS 8
|
||||
/* Needs two entries */
|
||||
#define GDT_ENTRY_LDT 10
|
||||
|
||||
#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
|
||||
#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
|
||||
#define GDT_ENTRY_TLS_MIN 12
|
||||
#define GDT_ENTRY_TLS_MAX 14
|
||||
|
||||
/* TLS indexes for 64bit - hardcoded in arch_prctl */
|
||||
#define FS_TLS 0
|
||||
#define GS_TLS 1
|
||||
/* Abused to load per CPU data from limit */
|
||||
#define GDT_ENTRY_PER_CPU 15
|
||||
|
||||
#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
|
||||
#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
|
||||
/*
|
||||
* Number of entries in the GDT table:
|
||||
*/
|
||||
#define GDT_ENTRIES 16
|
||||
|
||||
#define GDT_ENTRIES 16
|
||||
/*
|
||||
* Segment selector values corresponding to the above entries:
|
||||
*
|
||||
* Note, selectors also need to have a correct RPL,
|
||||
* expressed with the +3 value for user-space selectors:
|
||||
*/
|
||||
#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8)
|
||||
#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
|
||||
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
|
||||
#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
|
||||
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
|
||||
#define __USER32_DS __USER_DS
|
||||
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
|
||||
#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3)
|
||||
|
||||
/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
|
||||
#define FS_TLS 0
|
||||
#define GS_TLS 1
|
||||
|
||||
#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
|
||||
#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
|
||||
|
||||
#endif
|
||||
|
||||
#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
|
||||
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
|
||||
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3)
|
||||
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3)
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
#define get_kernel_rpl() 0
|
||||
# define get_kernel_rpl() 0
|
||||
#endif
|
||||
|
||||
#define IDT_ENTRIES 256
|
||||
#define NUM_EXCEPTION_VECTORS 32
|
||||
/* Bitmask of exception vectors which push an error code on the stack */
|
||||
#define EXCEPTION_ERRCODE_MASK 0x00027d00
|
||||
#define GDT_SIZE (GDT_ENTRIES * 8)
|
||||
#define GDT_ENTRY_TLS_ENTRIES 3
|
||||
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
|
||||
#define IDT_ENTRIES 256
|
||||
#define NUM_EXCEPTION_VECTORS 32
|
||||
|
||||
/* Bitmask of exception vectors which push an error code on the stack: */
|
||||
#define EXCEPTION_ERRCODE_MASK 0x00027d00
|
||||
|
||||
#define GDT_SIZE (GDT_ENTRIES*8)
|
||||
#define GDT_ENTRY_TLS_ENTRIES 3
|
||||
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
|
||||
#ifdef CONFIG_TRACING
|
||||
#define trace_early_idt_handlers early_idt_handlers
|
||||
# define trace_early_idt_handlers early_idt_handlers
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -228,37 +260,30 @@ do { \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Save a segment register away
|
||||
* Save a segment register away:
|
||||
*/
|
||||
#define savesegment(seg, value) \
|
||||
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
|
||||
|
||||
/*
|
||||
* x86_32 user gs accessors.
|
||||
* x86-32 user GS accessors:
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
#ifdef CONFIG_X86_32_LAZY_GS
|
||||
#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
|
||||
#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
|
||||
#define task_user_gs(tsk) ((tsk)->thread.gs)
|
||||
#define lazy_save_gs(v) savesegment(gs, (v))
|
||||
#define lazy_load_gs(v) loadsegment(gs, (v))
|
||||
#else /* X86_32_LAZY_GS */
|
||||
#define get_user_gs(regs) (u16)((regs)->gs)
|
||||
#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
|
||||
#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
|
||||
#define lazy_save_gs(v) do { } while (0)
|
||||
#define lazy_load_gs(v) do { } while (0)
|
||||
#endif /* X86_32_LAZY_GS */
|
||||
# ifdef CONFIG_X86_32_LAZY_GS
|
||||
# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
|
||||
# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
|
||||
# define task_user_gs(tsk) ((tsk)->thread.gs)
|
||||
# define lazy_save_gs(v) savesegment(gs, (v))
|
||||
# define lazy_load_gs(v) loadsegment(gs, (v))
|
||||
# else /* X86_32_LAZY_GS */
|
||||
# define get_user_gs(regs) (u16)((regs)->gs)
|
||||
# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
|
||||
# define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
|
||||
# define lazy_save_gs(v) do { } while (0)
|
||||
# define lazy_load_gs(v) do { } while (0)
|
||||
# endif /* X86_32_LAZY_GS */
|
||||
#endif /* X86_32 */
|
||||
|
||||
static inline unsigned long get_limit(unsigned long segment)
|
||||
{
|
||||
unsigned long __limit;
|
||||
asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
|
||||
return __limit + 1;
|
||||
}
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
|
@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { }
|
||||
*/
|
||||
extern struct boot_params boot_params;
|
||||
|
||||
static inline bool kaslr_enabled(void)
|
||||
{
|
||||
return !!(boot_params.hdr.loadflags & KASLR_FLAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do NOT EVER look at the BIOS memory size location.
|
||||
* It does not work on many machines.
|
||||
|
@ -57,9 +57,9 @@ struct sigcontext {
|
||||
unsigned long ip;
|
||||
unsigned long flags;
|
||||
unsigned short cs;
|
||||
unsigned short gs;
|
||||
unsigned short fs;
|
||||
unsigned short __pad0;
|
||||
unsigned short __pad2; /* Was called gs, but was always zero. */
|
||||
unsigned short __pad1; /* Was called fs, but was always zero. */
|
||||
unsigned short ss;
|
||||
unsigned long err;
|
||||
unsigned long trapno;
|
||||
unsigned long oldmask;
|
||||
|
@ -13,9 +13,7 @@
|
||||
X86_EFLAGS_CF | X86_EFLAGS_RF)
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
|
||||
|
||||
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
|
||||
unsigned long *pax);
|
||||
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
|
||||
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
|
||||
struct pt_regs *regs, unsigned long mask);
|
||||
|
||||
|
@ -27,23 +27,11 @@
|
||||
|
||||
#ifdef CONFIG_X86_SMAP
|
||||
|
||||
#define ASM_CLAC \
|
||||
661: ASM_NOP3 ; \
|
||||
.pushsection .altinstr_replacement, "ax" ; \
|
||||
662: __ASM_CLAC ; \
|
||||
.popsection ; \
|
||||
.pushsection .altinstructions, "a" ; \
|
||||
altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
|
||||
.popsection
|
||||
#define ASM_CLAC \
|
||||
ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
|
||||
|
||||
#define ASM_STAC \
|
||||
661: ASM_NOP3 ; \
|
||||
.pushsection .altinstr_replacement, "ax" ; \
|
||||
662: __ASM_STAC ; \
|
||||
.popsection ; \
|
||||
.pushsection .altinstructions, "a" ; \
|
||||
altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
|
||||
.popsection
|
||||
#define ASM_STAC \
|
||||
ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
|
||||
|
||||
#else /* CONFIG_X86_SMAP */
|
||||
|
||||
@ -61,20 +49,20 @@
|
||||
static __always_inline void clac(void)
|
||||
{
|
||||
/* Note: a barrier is implicit in alternative() */
|
||||
alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
|
||||
alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
|
||||
}
|
||||
|
||||
static __always_inline void stac(void)
|
||||
{
|
||||
/* Note: a barrier is implicit in alternative() */
|
||||
alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
|
||||
alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
|
||||
}
|
||||
|
||||
/* These macros can be used in asm() statements */
|
||||
#define ASM_CLAC \
|
||||
ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
|
||||
ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
|
||||
#define ASM_STAC \
|
||||
ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
|
||||
ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
|
||||
|
||||
#else /* CONFIG_X86_SMAP */
|
||||
|
||||
|
@ -154,6 +154,7 @@ void cpu_die_common(unsigned int cpu);
|
||||
void native_smp_prepare_boot_cpu(void);
|
||||
void native_smp_prepare_cpus(unsigned int max_cpus);
|
||||
void native_smp_cpus_done(unsigned int max_cpus);
|
||||
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
||||
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
||||
int native_cpu_disable(void);
|
||||
void native_cpu_die(unsigned int cpu);
|
||||
|
@ -4,6 +4,8 @@
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#include <asm/nops.h>
|
||||
|
||||
static inline void native_clts(void)
|
||||
{
|
||||
asm volatile("clts");
|
||||
@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p)
|
||||
"+m" (*(volatile char __force *)__p));
|
||||
}
|
||||
|
||||
static inline void clwb(volatile void *__p)
|
||||
{
|
||||
volatile struct { char x[64]; } *p = __p;
|
||||
|
||||
asm volatile(ALTERNATIVE_2(
|
||||
".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
|
||||
".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
|
||||
X86_FEATURE_CLFLUSHOPT,
|
||||
".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
|
||||
X86_FEATURE_CLWB)
|
||||
: [p] "+m" (*p)
|
||||
: [pax] "a" (p));
|
||||
}
|
||||
|
||||
static inline void pcommit_sfence(void)
|
||||
{
|
||||
alternative(ASM_NOP7,
|
||||
".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
|
||||
"sfence",
|
||||
X86_FEATURE_PCOMMIT);
|
||||
}
|
||||
|
||||
#define nop() asm volatile ("nop")
|
||||
|
||||
|
||||
|
@ -12,6 +12,33 @@
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/types.h>
|
||||
|
||||
/*
|
||||
* TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
|
||||
* reserve at the top of the kernel stack. We do it because of a nasty
|
||||
* 32-bit corner case. On x86_32, the hardware stack frame is
|
||||
* variable-length. Except for vm86 mode, struct pt_regs assumes a
|
||||
* maximum-length frame. If we enter from CPL 0, the top 8 bytes of
|
||||
* pt_regs don't actually exist. Ordinarily this doesn't matter, but it
|
||||
* does in at least one case:
|
||||
*
|
||||
* If we take an NMI early enough in SYSENTER, then we can end up with
|
||||
* pt_regs that extends above sp0. On the way out, in the espfix code,
|
||||
* we can read the saved SS value, but that value will be above sp0.
|
||||
* Without this offset, that can result in a page fault. (We are
|
||||
* careful that, in this case, the value we read doesn't matter.)
|
||||
*
|
||||
* In vm86 mode, the hardware frame is much longer still, but we neither
|
||||
* access the extra members from NMI context, nor do we write such a
|
||||
* frame at sp0 at all.
|
||||
*
|
||||
* x86_64 has a fixed-length stack frame.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
# define TOP_OF_KERNEL_STACK_PADDING 8
|
||||
#else
|
||||
# define TOP_OF_KERNEL_STACK_PADDING 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* low level task data that entry.S needs immediate access to
|
||||
* - this struct should fit entirely inside of one cache line
|
||||
@ -145,7 +172,6 @@ struct thread_info {
|
||||
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
|
||||
|
||||
#define STACK_WARN (THREAD_SIZE/8)
|
||||
#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8))
|
||||
|
||||
/*
|
||||
* macros/functions for gaining access to the thread information structure
|
||||
@ -158,10 +184,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
|
||||
|
||||
static inline struct thread_info *current_thread_info(void)
|
||||
{
|
||||
struct thread_info *ti;
|
||||
ti = (void *)(this_cpu_read_stable(kernel_stack) +
|
||||
KERNEL_STACK_OFFSET - THREAD_SIZE);
|
||||
return ti;
|
||||
return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
|
||||
}
|
||||
|
||||
static inline unsigned long current_stack_pointer(void)
|
||||
@ -177,16 +200,37 @@ static inline unsigned long current_stack_pointer(void)
|
||||
|
||||
#else /* !__ASSEMBLY__ */
|
||||
|
||||
/* how to get the thread information struct from ASM */
|
||||
/* Load thread_info address into "reg" */
|
||||
#define GET_THREAD_INFO(reg) \
|
||||
_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
|
||||
_ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
|
||||
_ASM_SUB $(THREAD_SIZE),reg ;
|
||||
|
||||
/*
|
||||
* Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
|
||||
* a certain register (to be used in assembler memory operands).
|
||||
* ASM operand which evaluates to a 'thread_info' address of
|
||||
* the current task, if it is known that "reg" is exactly "off"
|
||||
* bytes below the top of the stack currently.
|
||||
*
|
||||
* ( The kernel stack's size is known at build time, it is usually
|
||||
* 2 or 4 pages, and the bottom of the kernel stack contains
|
||||
* the thread_info structure. So to access the thread_info very
|
||||
* quickly from assembly code we can calculate down from the
|
||||
* top of the kernel stack to the bottom, using constant,
|
||||
* build-time calculations only. )
|
||||
*
|
||||
* For example, to fetch the current thread_info->flags value into %eax
|
||||
* on x86-64 defconfig kernels, in syscall entry code where RSP is
|
||||
* currently at exactly SIZEOF_PTREGS bytes away from the top of the
|
||||
* stack:
|
||||
*
|
||||
* mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
|
||||
*
|
||||
* will translate to:
|
||||
*
|
||||
* 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
|
||||
*
|
||||
* which is below the current RSP by almost 16K.
|
||||
*/
|
||||
#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
|
||||
#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
|
||||
|
||||
#endif
|
||||
|
||||
@ -236,6 +280,16 @@ static inline bool is_ia32_task(void)
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Force syscall return via IRET by making it look as if there was
|
||||
* some work pending. IRET is our most capable (but slowest) syscall
|
||||
* return path, which is able to restore modified SS, CS and certain
|
||||
* EFLAGS values that other (fast) syscall return instructions
|
||||
* are not able to restore properly.
|
||||
*/
|
||||
#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
/* loadflags */
|
||||
#define LOADED_HIGH (1<<0)
|
||||
#define KASLR_FLAG (1<<1)
|
||||
#define QUIET_FLAG (1<<5)
|
||||
#define KEEP_SEGMENTS (1<<6)
|
||||
#define CAN_USE_HEAP (1<<7)
|
||||
|
@ -25,13 +25,17 @@
|
||||
#else /* __i386__ */
|
||||
|
||||
#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
|
||||
/*
|
||||
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
|
||||
* unless syscall needs a complete, fully filled "struct pt_regs".
|
||||
*/
|
||||
#define R15 0
|
||||
#define R14 8
|
||||
#define R13 16
|
||||
#define R12 24
|
||||
#define RBP 32
|
||||
#define RBX 40
|
||||
/* arguments: interrupts/non tracing syscalls only save up to here*/
|
||||
/* These regs are callee-clobbered. Always saved on kernel entry. */
|
||||
#define R11 48
|
||||
#define R10 56
|
||||
#define R9 64
|
||||
@ -41,15 +45,17 @@
|
||||
#define RDX 96
|
||||
#define RSI 104
|
||||
#define RDI 112
|
||||
#define ORIG_RAX 120 /* = ERROR */
|
||||
/* end of arguments */
|
||||
/* cpu exception frame or undefined in case of fast syscall. */
|
||||
/*
|
||||
* On syscall entry, this is syscall#. On CPU exception, this is error code.
|
||||
* On hw interrupt, it's IRQ number:
|
||||
*/
|
||||
#define ORIG_RAX 120
|
||||
/* Return frame for iretq */
|
||||
#define RIP 128
|
||||
#define CS 136
|
||||
#define EFLAGS 144
|
||||
#define RSP 152
|
||||
#define SS 160
|
||||
#define ARGOFFSET R11
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
/* top of stack page */
|
||||
|
@ -41,13 +41,17 @@ struct pt_regs {
|
||||
#ifndef __KERNEL__
|
||||
|
||||
struct pt_regs {
|
||||
/*
|
||||
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
|
||||
* unless syscall needs a complete, fully filled "struct pt_regs".
|
||||
*/
|
||||
unsigned long r15;
|
||||
unsigned long r14;
|
||||
unsigned long r13;
|
||||
unsigned long r12;
|
||||
unsigned long rbp;
|
||||
unsigned long rbx;
|
||||
/* arguments: non interrupts/non tracing syscalls only save up to here*/
|
||||
/* These regs are callee-clobbered. Always saved on kernel entry. */
|
||||
unsigned long r11;
|
||||
unsigned long r10;
|
||||
unsigned long r9;
|
||||
@ -57,9 +61,12 @@ struct pt_regs {
|
||||
unsigned long rdx;
|
||||
unsigned long rsi;
|
||||
unsigned long rdi;
|
||||
/*
|
||||
* On syscall entry, this is syscall#. On CPU exception, this is error code.
|
||||
* On hw interrupt, it's IRQ number:
|
||||
*/
|
||||
unsigned long orig_rax;
|
||||
/* end of arguments */
|
||||
/* cpu exception frame or undefined */
|
||||
/* Return frame for iretq */
|
||||
unsigned long rip;
|
||||
unsigned long cs;
|
||||
unsigned long eflags;
|
||||
|
@ -177,9 +177,24 @@ struct sigcontext {
|
||||
__u64 rip;
|
||||
__u64 eflags; /* RFLAGS */
|
||||
__u16 cs;
|
||||
__u16 gs;
|
||||
__u16 fs;
|
||||
__u16 __pad0;
|
||||
|
||||
/*
|
||||
* Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
|
||||
* Linux saved and restored fs and gs in these slots. This
|
||||
* was counterproductive, as fsbase and gsbase were never
|
||||
* saved, so arch_prctl was presumably unreliable.
|
||||
*
|
||||
* If these slots are ever needed for any other purpose, there
|
||||
* is some risk that very old 64-bit binaries could get
|
||||
* confused. I doubt that many such binaries still work,
|
||||
* though, since the same patch in 2.5.64 also removed the
|
||||
* 64-bit set_thread_area syscall, so it appears that there is
|
||||
* no TLS API that works in both pre- and post-2.5.64 kernels.
|
||||
*/
|
||||
__u16 __pad2; /* Was gs. */
|
||||
__u16 __pad1; /* Was fs. */
|
||||
|
||||
__u16 ss;
|
||||
__u64 err;
|
||||
__u64 trapno;
|
||||
__u64 oldmask;
|
||||
|
@ -32,6 +32,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o
|
||||
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
|
||||
obj-$(CONFIG_X86_64) += mcount_64.o
|
||||
obj-y += syscall_$(BITS).o vsyscall_gtod.o
|
||||
obj-$(CONFIG_IA32_EMULATION) += syscall_32.o
|
||||
obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
|
||||
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
|
||||
obj-$(CONFIG_SYSFS) += ksysfs.o
|
||||
|
@ -52,10 +52,25 @@ static int __init setup_noreplace_paravirt(char *str)
|
||||
__setup("noreplace-paravirt", setup_noreplace_paravirt);
|
||||
#endif
|
||||
|
||||
#define DPRINTK(fmt, ...) \
|
||||
do { \
|
||||
if (debug_alternative) \
|
||||
printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
|
||||
#define DPRINTK(fmt, args...) \
|
||||
do { \
|
||||
if (debug_alternative) \
|
||||
printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \
|
||||
} while (0)
|
||||
|
||||
#define DUMP_BYTES(buf, len, fmt, args...) \
|
||||
do { \
|
||||
if (unlikely(debug_alternative)) { \
|
||||
int j; \
|
||||
\
|
||||
if (!(len)) \
|
||||
break; \
|
||||
\
|
||||
printk(KERN_DEBUG fmt, ##args); \
|
||||
for (j = 0; j < (len) - 1; j++) \
|
||||
printk(KERN_CONT "%02hhx ", buf[j]); \
|
||||
printk(KERN_CONT "%02hhx\n", buf[j]); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
@ -243,12 +258,89 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
|
||||
extern s32 __smp_locks[], __smp_locks_end[];
|
||||
void *text_poke_early(void *addr, const void *opcode, size_t len);
|
||||
|
||||
/* Replace instructions with better alternatives for this CPU type.
|
||||
This runs before SMP is initialized to avoid SMP problems with
|
||||
self modifying code. This implies that asymmetric systems where
|
||||
APs have less capabilities than the boot processor are not handled.
|
||||
Tough. Make sure you disable such features by hand. */
|
||||
/*
|
||||
* Are we looking at a near JMP with a 1 or 4-byte displacement.
|
||||
*/
|
||||
static inline bool is_jmp(const u8 opcode)
|
||||
{
|
||||
return opcode == 0xeb || opcode == 0xe9;
|
||||
}
|
||||
|
||||
static void __init_or_module
|
||||
recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
|
||||
{
|
||||
u8 *next_rip, *tgt_rip;
|
||||
s32 n_dspl, o_dspl;
|
||||
int repl_len;
|
||||
|
||||
if (a->replacementlen != 5)
|
||||
return;
|
||||
|
||||
o_dspl = *(s32 *)(insnbuf + 1);
|
||||
|
||||
/* next_rip of the replacement JMP */
|
||||
next_rip = repl_insn + a->replacementlen;
|
||||
/* target rip of the replacement JMP */
|
||||
tgt_rip = next_rip + o_dspl;
|
||||
n_dspl = tgt_rip - orig_insn;
|
||||
|
||||
DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
|
||||
|
||||
if (tgt_rip - orig_insn >= 0) {
|
||||
if (n_dspl - 2 <= 127)
|
||||
goto two_byte_jmp;
|
||||
else
|
||||
goto five_byte_jmp;
|
||||
/* negative offset */
|
||||
} else {
|
||||
if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
|
||||
goto two_byte_jmp;
|
||||
else
|
||||
goto five_byte_jmp;
|
||||
}
|
||||
|
||||
two_byte_jmp:
|
||||
n_dspl -= 2;
|
||||
|
||||
insnbuf[0] = 0xeb;
|
||||
insnbuf[1] = (s8)n_dspl;
|
||||
add_nops(insnbuf + 2, 3);
|
||||
|
||||
repl_len = 2;
|
||||
goto done;
|
||||
|
||||
five_byte_jmp:
|
||||
n_dspl -= 5;
|
||||
|
||||
insnbuf[0] = 0xe9;
|
||||
*(s32 *)&insnbuf[1] = n_dspl;
|
||||
|
||||
repl_len = 5;
|
||||
|
||||
done:
|
||||
|
||||
DPRINTK("final displ: 0x%08x, JMP 0x%lx",
|
||||
n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
|
||||
}
|
||||
|
||||
static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
|
||||
{
|
||||
if (instr[0] != 0x90)
|
||||
return;
|
||||
|
||||
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
|
||||
|
||||
DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
|
||||
instr, a->instrlen - a->padlen, a->padlen);
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace instructions with better alternatives for this CPU type. This runs
|
||||
* before SMP is initialized to avoid SMP problems with self modifying code.
|
||||
* This implies that asymmetric systems where APs have less capabilities than
|
||||
* the boot processor are not handled. Tough. Make sure you disable such
|
||||
* features by hand.
|
||||
*/
|
||||
void __init_or_module apply_alternatives(struct alt_instr *start,
|
||||
struct alt_instr *end)
|
||||
{
|
||||
@ -256,10 +348,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
|
||||
u8 *instr, *replacement;
|
||||
u8 insnbuf[MAX_PATCH_LEN];
|
||||
|
||||
DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
|
||||
DPRINTK("alt table %p -> %p", start, end);
|
||||
/*
|
||||
* The scan order should be from start to end. A later scanned
|
||||
* alternative code can overwrite a previous scanned alternative code.
|
||||
* alternative code can overwrite previously scanned alternative code.
|
||||
* Some kernel functions (e.g. memcpy, memset, etc) use this order to
|
||||
* patch code.
|
||||
*
|
||||
@ -267,29 +359,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
|
||||
* order.
|
||||
*/
|
||||
for (a = start; a < end; a++) {
|
||||
int insnbuf_sz = 0;
|
||||
|
||||
instr = (u8 *)&a->instr_offset + a->instr_offset;
|
||||
replacement = (u8 *)&a->repl_offset + a->repl_offset;
|
||||
BUG_ON(a->replacementlen > a->instrlen);
|
||||
BUG_ON(a->instrlen > sizeof(insnbuf));
|
||||
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
|
||||
if (!boot_cpu_has(a->cpuid))
|
||||
if (!boot_cpu_has(a->cpuid)) {
|
||||
if (a->padlen > 1)
|
||||
optimize_nops(a, instr);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
|
||||
a->cpuid >> 5,
|
||||
a->cpuid & 0x1f,
|
||||
instr, a->instrlen,
|
||||
replacement, a->replacementlen, a->padlen);
|
||||
|
||||
DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
|
||||
DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
|
||||
|
||||
memcpy(insnbuf, replacement, a->replacementlen);
|
||||
insnbuf_sz = a->replacementlen;
|
||||
|
||||
/* 0xe8 is a relative jump; fix the offset. */
|
||||
if (*insnbuf == 0xe8 && a->replacementlen == 5)
|
||||
*(s32 *)(insnbuf + 1) += replacement - instr;
|
||||
if (*insnbuf == 0xe8 && a->replacementlen == 5) {
|
||||
*(s32 *)(insnbuf + 1) += replacement - instr;
|
||||
DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
|
||||
*(s32 *)(insnbuf + 1),
|
||||
(unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
|
||||
}
|
||||
|
||||
add_nops(insnbuf + a->replacementlen,
|
||||
a->instrlen - a->replacementlen);
|
||||
if (a->replacementlen && is_jmp(replacement[0]))
|
||||
recompute_jump(a, instr, replacement, insnbuf);
|
||||
|
||||
text_poke_early(instr, insnbuf, a->instrlen);
|
||||
if (a->instrlen > a->replacementlen) {
|
||||
add_nops(insnbuf + a->replacementlen,
|
||||
a->instrlen - a->replacementlen);
|
||||
insnbuf_sz += a->instrlen - a->replacementlen;
|
||||
}
|
||||
DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
|
||||
|
||||
text_poke_early(instr, insnbuf, insnbuf_sz);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static void alternatives_smp_lock(const s32 *start, const s32 *end,
|
||||
u8 *text, u8 *text_end)
|
||||
{
|
||||
@ -371,8 +488,8 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
|
||||
smp->locks_end = locks_end;
|
||||
smp->text = text;
|
||||
smp->text_end = text_end;
|
||||
DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
|
||||
__func__, smp->locks, smp->locks_end,
|
||||
DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
|
||||
smp->locks, smp->locks_end,
|
||||
smp->text, smp->text_end, smp->name);
|
||||
|
||||
list_add_tail(&smp->next, &smp_alt_modules);
|
||||
@ -440,7 +557,7 @@ int alternatives_text_reserved(void *start, void *end)
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
|
||||
@ -601,7 +718,7 @@ int poke_int3_handler(struct pt_regs *regs)
|
||||
if (likely(!bp_patching_in_progress))
|
||||
return 0;
|
||||
|
||||
if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
|
||||
if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
|
||||
return 0;
|
||||
|
||||
/* set up the specified breakpoint handler */
|
||||
|
@ -68,7 +68,7 @@ void foo(void)
|
||||
|
||||
/* Offset from the sysenter stack to tss.sp0 */
|
||||
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
|
||||
sizeof(struct tss_struct));
|
||||
offsetofend(struct tss_struct, SYSENTER_stack));
|
||||
|
||||
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
|
||||
BLANK();
|
||||
|
@ -81,6 +81,7 @@ int main(void)
|
||||
#undef ENTRY
|
||||
|
||||
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
|
||||
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
|
||||
BLANK();
|
||||
|
||||
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
|
||||
|
@ -711,6 +711,11 @@ static void init_amd(struct cpuinfo_x86 *c)
|
||||
set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
|
||||
|
||||
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
|
||||
|
||||
/* 3DNow or LM implies PREFETCHW */
|
||||
if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
|
||||
if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
|
||||
set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@ -959,38 +959,37 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
/* May not be __init: called during resume */
|
||||
static void syscall32_cpu_init(void)
|
||||
{
|
||||
/* Load these always in case some future AMD CPU supports
|
||||
SYSENTER from compat mode too. */
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
|
||||
|
||||
wrmsrl(MSR_CSTAR, ia32_cstar_target);
|
||||
}
|
||||
#endif /* CONFIG_IA32_EMULATION */
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
|
||||
* on 32-bit kernels:
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
void enable_sep_cpu(void)
|
||||
{
|
||||
int cpu = get_cpu();
|
||||
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
||||
struct tss_struct *tss;
|
||||
int cpu;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_SEP)) {
|
||||
put_cpu();
|
||||
return;
|
||||
}
|
||||
cpu = get_cpu();
|
||||
tss = &per_cpu(cpu_tss, cpu);
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_SEP))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
|
||||
* see the big comment in struct x86_hw_tss's definition.
|
||||
*/
|
||||
|
||||
tss->x86_tss.ss1 = __KERNEL_CS;
|
||||
tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
|
||||
|
||||
wrmsr(MSR_IA32_SYSENTER_ESP,
|
||||
(unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
|
||||
0);
|
||||
|
||||
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0);
|
||||
|
||||
out:
|
||||
put_cpu();
|
||||
}
|
||||
#endif
|
||||
@ -1118,7 +1117,7 @@ static __init int setup_disablecpuid(char *arg)
|
||||
__setup("clearcpuid=", setup_disablecpuid);
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, kernel_stack) =
|
||||
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
|
||||
(unsigned long)&init_thread_union + THREAD_SIZE;
|
||||
EXPORT_PER_CPU_SYMBOL(kernel_stack);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -1130,8 +1129,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
||||
irq_stack_union) __aligned(PAGE_SIZE) __visible;
|
||||
|
||||
/*
|
||||
* The following four percpu variables are hot. Align current_task to
|
||||
* cacheline size such that all four fall in the same cacheline.
|
||||
* The following percpu variables are hot. Align current_task to
|
||||
* cacheline size such that they fall in the same cacheline.
|
||||
*/
|
||||
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
|
||||
&init_task;
|
||||
@ -1171,10 +1170,23 @@ void syscall_init(void)
|
||||
*/
|
||||
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
|
||||
wrmsrl(MSR_LSTAR, system_call);
|
||||
wrmsrl(MSR_CSTAR, ignore_sysret);
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
syscall32_cpu_init();
|
||||
wrmsrl(MSR_CSTAR, ia32_cstar_target);
|
||||
/*
|
||||
* This only works on Intel CPUs.
|
||||
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
|
||||
* This does not cause SYSENTER to jump to the wrong location, because
|
||||
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
|
||||
*/
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
|
||||
#else
|
||||
wrmsrl(MSR_CSTAR, ignore_sysret);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
|
||||
#endif
|
||||
|
||||
/* Flags to clear on syscall */
|
||||
@ -1226,6 +1238,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
|
||||
EXPORT_PER_CPU_SYMBOL(__preempt_count);
|
||||
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
|
||||
|
||||
/*
|
||||
* On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
|
||||
* the top of the kernel stack. Use an extra percpu variable to track the
|
||||
* top of the kernel stack directly.
|
||||
*/
|
||||
DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
|
||||
(unsigned long)&init_thread_union + THREAD_SIZE;
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
|
||||
#endif
|
||||
@ -1307,7 +1328,7 @@ void cpu_init(void)
|
||||
*/
|
||||
load_ucode_ap();
|
||||
|
||||
t = &per_cpu(init_tss, cpu);
|
||||
t = &per_cpu(cpu_tss, cpu);
|
||||
oist = &per_cpu(orig_ist, cpu);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
@ -1391,7 +1412,7 @@ void cpu_init(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct task_struct *curr = current;
|
||||
struct tss_struct *t = &per_cpu(init_tss, cpu);
|
||||
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
||||
struct thread_struct *thread = &curr->thread;
|
||||
|
||||
wait_for_master_cpu(cpu);
|
||||
|
@ -2146,6 +2146,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
*/
|
||||
static unsigned long code_segment_base(struct pt_regs *regs)
|
||||
{
|
||||
/*
|
||||
* For IA32 we look at the GDT/LDT segment base to convert the
|
||||
* effective IP to a linear address.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* If we are in VM86 mode, add the segment offset to convert to a
|
||||
* linear address.
|
||||
@ -2153,18 +2159,12 @@ static unsigned long code_segment_base(struct pt_regs *regs)
|
||||
if (regs->flags & X86_VM_MASK)
|
||||
return 0x10 * regs->cs;
|
||||
|
||||
/*
|
||||
* For IA32 we look at the GDT/LDT segment base to convert the
|
||||
* effective IP to a linear address.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
if (user_mode(regs) && regs->cs != __USER_CS)
|
||||
return get_segment_base(regs->cs);
|
||||
#else
|
||||
if (test_thread_flag(TIF_IA32)) {
|
||||
if (user_mode(regs) && regs->cs != __USER32_CS)
|
||||
return get_segment_base(regs->cs);
|
||||
}
|
||||
if (user_mode(regs) && !user_64bit_mode(regs) &&
|
||||
regs->cs != __USER32_CS)
|
||||
return get_segment_base(regs->cs);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
@ -105,7 +105,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
|
||||
#ifdef CONFIG_X86_32
|
||||
struct pt_regs fixed_regs;
|
||||
|
||||
if (!user_mode_vm(regs)) {
|
||||
if (!user_mode(regs)) {
|
||||
crash_fixup_ss_esp(&fixed_regs, regs);
|
||||
regs = &fixed_regs;
|
||||
}
|
||||
|
@ -278,7 +278,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
|
||||
print_modules();
|
||||
show_regs(regs);
|
||||
#ifdef CONFIG_X86_32
|
||||
if (user_mode_vm(regs)) {
|
||||
if (user_mode(regs)) {
|
||||
sp = regs->sp;
|
||||
ss = regs->ss & 0xffff;
|
||||
} else {
|
||||
@ -307,7 +307,7 @@ void die(const char *str, struct pt_regs *regs, long err)
|
||||
unsigned long flags = oops_begin();
|
||||
int sig = SIGSEGV;
|
||||
|
||||
if (!user_mode_vm(regs))
|
||||
if (!user_mode(regs))
|
||||
report_bug(regs->ip, regs);
|
||||
|
||||
if (__die(str, regs, err))
|
||||
|
@ -123,13 +123,13 @@ void show_regs(struct pt_regs *regs)
|
||||
int i;
|
||||
|
||||
show_regs_print_info(KERN_EMERG);
|
||||
__show_regs(regs, !user_mode_vm(regs));
|
||||
__show_regs(regs, !user_mode(regs));
|
||||
|
||||
/*
|
||||
* When in-kernel, we also print out the stack and code at the
|
||||
* time of the fault..
|
||||
*/
|
||||
if (!user_mode_vm(regs)) {
|
||||
if (!user_mode(regs)) {
|
||||
unsigned int code_prologue = code_bytes * 43 / 64;
|
||||
unsigned int code_len = code_bytes;
|
||||
unsigned char c;
|
||||
|
@ -395,10 +395,13 @@ sysenter_past_esp:
|
||||
/*CFI_REL_OFFSET cs, 0*/
|
||||
/*
|
||||
* Push current_thread_info()->sysenter_return to the stack.
|
||||
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
|
||||
* pushed above; +8 corresponds to copy_thread's esp0 setting.
|
||||
* A tiny bit of offset fixup is necessary: TI_sysenter_return
|
||||
* is relative to thread_info, which is at the bottom of the
|
||||
* kernel stack page. 4*4 means the 4 words pushed above;
|
||||
* TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
|
||||
* and THREAD_SIZE takes us to the bottom.
|
||||
*/
|
||||
pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
|
||||
pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
|
||||
CFI_REL_OFFSET eip, 0
|
||||
|
||||
pushl_cfi %eax
|
||||
@ -432,7 +435,7 @@ sysenter_after_call:
|
||||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%ebp), %ecx
|
||||
testl $_TIF_ALLWORK_MASK, %ecx
|
||||
jne sysexit_audit
|
||||
jnz sysexit_audit
|
||||
sysenter_exit:
|
||||
/* if something modifies registers it must also disable sysexit */
|
||||
movl PT_EIP(%esp), %edx
|
||||
@ -460,7 +463,7 @@ sysenter_audit:
|
||||
|
||||
sysexit_audit:
|
||||
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
|
||||
jne syscall_exit_work
|
||||
jnz syscall_exit_work
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_ANY)
|
||||
movl %eax,%edx /* second arg, syscall return value */
|
||||
@ -472,7 +475,7 @@ sysexit_audit:
|
||||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%ebp), %ecx
|
||||
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
|
||||
jne syscall_exit_work
|
||||
jnz syscall_exit_work
|
||||
movl PT_EAX(%esp),%eax /* reload syscall return value */
|
||||
jmp sysenter_exit
|
||||
#endif
|
||||
@ -510,7 +513,7 @@ syscall_exit:
|
||||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%ebp), %ecx
|
||||
testl $_TIF_ALLWORK_MASK, %ecx # current->work
|
||||
jne syscall_exit_work
|
||||
jnz syscall_exit_work
|
||||
|
||||
restore_all:
|
||||
TRACE_IRQS_IRET
|
||||
@ -612,7 +615,7 @@ work_notifysig: # deal with pending signals and
|
||||
#ifdef CONFIG_VM86
|
||||
testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
|
||||
movl %esp, %eax
|
||||
jne work_notifysig_v86 # returning to kernel-space or
|
||||
jnz work_notifysig_v86 # returning to kernel-space or
|
||||
# vm86-space
|
||||
1:
|
||||
#else
|
||||
@ -720,43 +723,22 @@ END(sysenter_badsys)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Build the entry stubs and pointer table with some assembler magic.
|
||||
* We pack 7 stubs into a single 32-byte chunk, which will fit in a
|
||||
* single cache line on all modern x86 implementations.
|
||||
* Build the entry stubs with some assembler magic.
|
||||
* We pack 1 stub into every 8-byte block.
|
||||
*/
|
||||
.section .init.rodata,"a"
|
||||
ENTRY(interrupt)
|
||||
.section .entry.text, "ax"
|
||||
.p2align 5
|
||||
.p2align CONFIG_X86_L1_CACHE_SHIFT
|
||||
.align 8
|
||||
ENTRY(irq_entries_start)
|
||||
RING0_INT_FRAME
|
||||
vector=FIRST_EXTERNAL_VECTOR
|
||||
.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
|
||||
.balign 32
|
||||
.rept 7
|
||||
.if vector < FIRST_SYSTEM_VECTOR
|
||||
.if vector <> FIRST_EXTERNAL_VECTOR
|
||||
vector=FIRST_EXTERNAL_VECTOR
|
||||
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
|
||||
pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
|
||||
vector=vector+1
|
||||
jmp common_interrupt
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
.endif
|
||||
1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
|
||||
.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
|
||||
jmp 2f
|
||||
.endif
|
||||
.previous
|
||||
.long 1b
|
||||
.section .entry.text, "ax"
|
||||
vector=vector+1
|
||||
.endif
|
||||
.endr
|
||||
2: jmp common_interrupt
|
||||
.endr
|
||||
.align 8
|
||||
.endr
|
||||
END(irq_entries_start)
|
||||
|
||||
.previous
|
||||
END(interrupt)
|
||||
.previous
|
||||
|
||||
/*
|
||||
* the CPU automatically disables interrupts when executing an IRQ vector,
|
||||
* so IRQ-flags tracing has to follow that:
|
||||
@ -816,15 +798,9 @@ ENTRY(simd_coprocessor_error)
|
||||
pushl_cfi $0
|
||||
#ifdef CONFIG_X86_INVD_BUG
|
||||
/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
|
||||
661: pushl_cfi $do_general_protection
|
||||
662:
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
|
||||
.previous
|
||||
.section .altinstr_replacement,"ax"
|
||||
663: pushl $do_simd_coprocessor_error
|
||||
664:
|
||||
.previous
|
||||
ALTERNATIVE "pushl_cfi $do_general_protection", \
|
||||
"pushl $do_simd_coprocessor_error", \
|
||||
X86_FEATURE_XMM
|
||||
#else
|
||||
pushl_cfi $do_simd_coprocessor_error
|
||||
#endif
|
||||
@ -1240,20 +1216,13 @@ error_code:
|
||||
/*CFI_REL_OFFSET es, 0*/
|
||||
pushl_cfi %ds
|
||||
/*CFI_REL_OFFSET ds, 0*/
|
||||
pushl_cfi %eax
|
||||
CFI_REL_OFFSET eax, 0
|
||||
pushl_cfi %ebp
|
||||
CFI_REL_OFFSET ebp, 0
|
||||
pushl_cfi %edi
|
||||
CFI_REL_OFFSET edi, 0
|
||||
pushl_cfi %esi
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl_cfi %edx
|
||||
CFI_REL_OFFSET edx, 0
|
||||
pushl_cfi %ecx
|
||||
CFI_REL_OFFSET ecx, 0
|
||||
pushl_cfi %ebx
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl_cfi_reg eax
|
||||
pushl_cfi_reg ebp
|
||||
pushl_cfi_reg edi
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg edx
|
||||
pushl_cfi_reg ecx
|
||||
pushl_cfi_reg ebx
|
||||
cld
|
||||
movl $(__KERNEL_PERCPU), %ecx
|
||||
movl %ecx, %fs
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -22,6 +22,7 @@
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/nops.h>
|
||||
#include <asm/bootparam.h>
|
||||
|
||||
/* Physical address */
|
||||
#define pa(X) ((X) - __PAGE_OFFSET)
|
||||
@ -90,7 +91,7 @@ ENTRY(startup_32)
|
||||
|
||||
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
|
||||
us to not reload segments */
|
||||
testb $(1<<6), BP_loadflags(%esi)
|
||||
testb $KEEP_SEGMENTS, BP_loadflags(%esi)
|
||||
jnz 2f
|
||||
|
||||
/*
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
|
||||
* linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
|
||||
*
|
||||
* Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
|
||||
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
|
||||
@ -56,7 +56,7 @@ startup_64:
|
||||
* %rsi holds a physical pointer to real_mode_data.
|
||||
*
|
||||
* We come here either directly from a 64bit bootloader, or from
|
||||
* arch/x86_64/boot/compressed/head.S.
|
||||
* arch/x86/boot/compressed/head_64.S.
|
||||
*
|
||||
* We only come here initially at boot nothing else comes here.
|
||||
*
|
||||
@ -146,7 +146,7 @@ startup_64:
|
||||
leaq level2_kernel_pgt(%rip), %rdi
|
||||
leaq 4096(%rdi), %r8
|
||||
/* See if it is a valid page table entry */
|
||||
1: testq $1, 0(%rdi)
|
||||
1: testb $1, 0(%rdi)
|
||||
jz 2f
|
||||
addq %rbp, 0(%rdi)
|
||||
/* Go to the next page */
|
||||
|
@ -68,7 +68,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
|
||||
static inline bool interrupted_user_mode(void)
|
||||
{
|
||||
struct pt_regs *regs = get_irq_regs();
|
||||
return regs && user_mode_vm(regs);
|
||||
return regs && user_mode(regs);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -54,7 +54,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
||||
* because the ->io_bitmap_max value must match the bitmap
|
||||
* contents:
|
||||
*/
|
||||
tss = &per_cpu(init_tss, get_cpu());
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
|
||||
if (turn_on)
|
||||
bitmap_clear(t->io_bitmap_ptr, from, num);
|
||||
|
@ -165,7 +165,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
|
||||
if (unlikely(!desc))
|
||||
return false;
|
||||
|
||||
if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
|
||||
if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
|
||||
if (unlikely(overflow))
|
||||
print_stack_overflow();
|
||||
desc->handle_irq(irq, desc);
|
||||
|
@ -44,7 +44,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
|
||||
u64 estack_top, estack_bottom;
|
||||
u64 curbase = (u64)task_stack_page(current);
|
||||
|
||||
if (user_mode_vm(regs))
|
||||
if (user_mode(regs))
|
||||
return;
|
||||
|
||||
if (regs->sp >= curbase + sizeof(struct thread_info) +
|
||||
|
@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
|
||||
#endif
|
||||
for_each_clear_bit_from(i, used_vectors, first_system_vector) {
|
||||
/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
|
||||
set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
|
||||
set_intr_gate(i, irq_entries_start +
|
||||
8 * (i - FIRST_EXTERNAL_VECTOR));
|
||||
}
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
|
||||
|
@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
|
||||
#ifdef CONFIG_X86_32
|
||||
switch (regno) {
|
||||
case GDB_SS:
|
||||
if (!user_mode_vm(regs))
|
||||
if (!user_mode(regs))
|
||||
*(unsigned long *)mem = __KERNEL_DS;
|
||||
break;
|
||||
case GDB_SP:
|
||||
if (!user_mode_vm(regs))
|
||||
if (!user_mode(regs))
|
||||
*(unsigned long *)mem = kernel_stack_pointer(regs);
|
||||
break;
|
||||
case GDB_GS:
|
||||
|
@ -602,7 +602,7 @@ int kprobe_int3_handler(struct pt_regs *regs)
|
||||
struct kprobe *p;
|
||||
struct kprobe_ctlblk *kcb;
|
||||
|
||||
if (user_mode_vm(regs))
|
||||
if (user_mode(regs))
|
||||
return 0;
|
||||
|
||||
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
|
||||
@ -1007,7 +1007,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
|
||||
struct die_args *args = data;
|
||||
int ret = NOTIFY_DONE;
|
||||
|
||||
if (args->regs && user_mode_vm(args->regs))
|
||||
if (args->regs && user_mode(args->regs))
|
||||
return ret;
|
||||
|
||||
if (val == DIE_GPF) {
|
||||
|
@ -33,6 +33,7 @@
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#if 0
|
||||
#define DEBUGP(fmt, ...) \
|
||||
@ -47,21 +48,13 @@ do { \
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
static unsigned long module_load_offset;
|
||||
static int randomize_modules = 1;
|
||||
|
||||
/* Mutex protects the module_load_offset. */
|
||||
static DEFINE_MUTEX(module_kaslr_mutex);
|
||||
|
||||
static int __init parse_nokaslr(char *p)
|
||||
{
|
||||
randomize_modules = 0;
|
||||
return 0;
|
||||
}
|
||||
early_param("nokaslr", parse_nokaslr);
|
||||
|
||||
static unsigned long int get_module_load_offset(void)
|
||||
{
|
||||
if (randomize_modules) {
|
||||
if (kaslr_enabled()) {
|
||||
mutex_lock(&module_kaslr_mutex);
|
||||
/*
|
||||
* Calculate the module_load_offset the first time this
|
||||
|
@ -131,10 +131,11 @@ void perf_get_regs_user(struct perf_regs *regs_user,
|
||||
}
|
||||
|
||||
/*
|
||||
* RIP, flags, and the argument registers are usually saved.
|
||||
* orig_ax is probably okay, too.
|
||||
* These registers are always saved on 64-bit syscall entry.
|
||||
* On 32-bit entry points, they are saved too except r8..r11.
|
||||
*/
|
||||
regs_user_copy->ip = user_regs->ip;
|
||||
regs_user_copy->ax = user_regs->ax;
|
||||
regs_user_copy->cx = user_regs->cx;
|
||||
regs_user_copy->dx = user_regs->dx;
|
||||
regs_user_copy->si = user_regs->si;
|
||||
@ -145,9 +146,12 @@ void perf_get_regs_user(struct perf_regs *regs_user,
|
||||
regs_user_copy->r11 = user_regs->r11;
|
||||
regs_user_copy->orig_ax = user_regs->orig_ax;
|
||||
regs_user_copy->flags = user_regs->flags;
|
||||
regs_user_copy->sp = user_regs->sp;
|
||||
regs_user_copy->cs = user_regs->cs;
|
||||
regs_user_copy->ss = user_regs->ss;
|
||||
|
||||
/*
|
||||
* Don't even try to report the "rest" regs.
|
||||
* Most system calls don't save these registers, don't report them.
|
||||
*/
|
||||
regs_user_copy->bx = -1;
|
||||
regs_user_copy->bp = -1;
|
||||
@ -158,37 +162,13 @@ void perf_get_regs_user(struct perf_regs *regs_user,
|
||||
|
||||
/*
|
||||
* For this to be at all useful, we need a reasonable guess for
|
||||
* sp and the ABI. Be careful: we're in NMI context, and we're
|
||||
* the ABI. Be careful: we're in NMI context, and we're
|
||||
* considering current to be the current task, so we should
|
||||
* be careful not to look at any other percpu variables that might
|
||||
* change during context switches.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
|
||||
task_thread_info(current)->status & TS_COMPAT) {
|
||||
/* Easy case: we're in a compat syscall. */
|
||||
regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
|
||||
regs_user_copy->sp = user_regs->sp;
|
||||
regs_user_copy->cs = user_regs->cs;
|
||||
regs_user_copy->ss = user_regs->ss;
|
||||
} else if (user_regs->orig_ax != -1) {
|
||||
/*
|
||||
* We're probably in a 64-bit syscall.
|
||||
* Warning: this code is severely racy. At least it's better
|
||||
* than just blindly copying user_regs.
|
||||
*/
|
||||
regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
|
||||
regs_user_copy->sp = this_cpu_read(old_rsp);
|
||||
regs_user_copy->cs = __USER_CS;
|
||||
regs_user_copy->ss = __USER_DS;
|
||||
regs_user_copy->cx = -1; /* usually contains garbage */
|
||||
} else {
|
||||
/* We're probably in an interrupt or exception. */
|
||||
regs_user->abi = user_64bit_mode(user_regs) ?
|
||||
PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
|
||||
regs_user_copy->sp = user_regs->sp;
|
||||
regs_user_copy->cs = user_regs->cs;
|
||||
regs_user_copy->ss = user_regs->ss;
|
||||
}
|
||||
regs_user->abi = user_64bit_mode(user_regs) ?
|
||||
PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
|
||||
|
||||
regs_user->regs = regs_user_copy;
|
||||
}
|
||||
|
@ -38,7 +38,26 @@
|
||||
* section. Since TSS's are completely CPU-local, we want them
|
||||
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
|
||||
*/
|
||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
|
||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
||||
.x86_tss = {
|
||||
.sp0 = TOP_OF_INIT_STACK,
|
||||
#ifdef CONFIG_X86_32
|
||||
.ss0 = __KERNEL_DS,
|
||||
.ss1 = __KERNEL_CS,
|
||||
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
|
||||
#endif
|
||||
},
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Note that the .io_bitmap member must be extra-big. This is because
|
||||
* the CPU will access an additional byte beyond the end of the IO
|
||||
* permission bitmap. The extra byte must be all 1 bits, and must
|
||||
* be within the limit.
|
||||
*/
|
||||
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
|
||||
#endif
|
||||
};
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static DEFINE_PER_CPU(unsigned char, is_idle);
|
||||
@ -110,7 +129,7 @@ void exit_thread(void)
|
||||
unsigned long *bp = t->io_bitmap_ptr;
|
||||
|
||||
if (bp) {
|
||||
struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
|
||||
|
||||
t->io_bitmap_ptr = NULL;
|
||||
clear_thread_flag(TIF_IO_BITMAP);
|
||||
|
@ -73,7 +73,7 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
unsigned long sp;
|
||||
unsigned short ss, gs;
|
||||
|
||||
if (user_mode_vm(regs)) {
|
||||
if (user_mode(regs)) {
|
||||
sp = regs->sp;
|
||||
ss = regs->ss & 0xffff;
|
||||
gs = get_user_gs(regs);
|
||||
@ -206,11 +206,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
||||
regs->ip = new_ip;
|
||||
regs->sp = new_sp;
|
||||
regs->flags = X86_EFLAGS_IF;
|
||||
/*
|
||||
* force it to the iret return path by making it look as if there was
|
||||
* some work pending.
|
||||
*/
|
||||
set_thread_flag(TIF_NOTIFY_RESUME);
|
||||
force_iret();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(start_thread);
|
||||
|
||||
@ -248,18 +244,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
struct thread_struct *prev = &prev_p->thread,
|
||||
*next = &next_p->thread;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
|
||||
fpu_switch_t fpu;
|
||||
|
||||
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
|
||||
|
||||
fpu = switch_fpu_prepare(prev_p, next_p, cpu);
|
||||
|
||||
/*
|
||||
* Reload esp0.
|
||||
*/
|
||||
load_sp0(tss, next);
|
||||
|
||||
/*
|
||||
* Save away %gs. No need to save %fs, as it was saved on the
|
||||
* stack on entry. No need to save %es and %ds, as those are
|
||||
@ -310,9 +301,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
*/
|
||||
arch_end_context_switch(next_p);
|
||||
|
||||
/*
|
||||
* Reload esp0, kernel_stack, and current_top_of_stack. This changes
|
||||
* current_thread_info().
|
||||
*/
|
||||
load_sp0(tss, next);
|
||||
this_cpu_write(kernel_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
THREAD_SIZE - KERNEL_STACK_OFFSET);
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
THREAD_SIZE);
|
||||
this_cpu_write(cpu_current_top_of_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
THREAD_SIZE);
|
||||
|
||||
/*
|
||||
* Restore %gs if needed (which is common)
|
||||
|
@ -52,7 +52,7 @@
|
||||
|
||||
asmlinkage extern void ret_from_fork(void);
|
||||
|
||||
__visible DEFINE_PER_CPU(unsigned long, old_rsp);
|
||||
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
|
||||
|
||||
/* Prints also some state that isn't saved in the pt_regs */
|
||||
void __show_regs(struct pt_regs *regs, int all)
|
||||
@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
|
||||
childregs = task_pt_regs(p);
|
||||
p->thread.sp = (unsigned long) childregs;
|
||||
p->thread.usersp = me->thread.usersp;
|
||||
set_tsk_thread_flag(p, TIF_FORK);
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
|
||||
@ -207,7 +206,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
||||
*/
|
||||
if (clone_flags & CLONE_SETTLS) {
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
if (test_thread_flag(TIF_IA32))
|
||||
if (is_ia32_task())
|
||||
err = do_set_thread_area(p, -1,
|
||||
(struct user_desc __user *)childregs->si, 0);
|
||||
else
|
||||
@ -235,13 +234,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
|
||||
loadsegment(es, _ds);
|
||||
loadsegment(ds, _ds);
|
||||
load_gs_index(0);
|
||||
current->thread.usersp = new_sp;
|
||||
regs->ip = new_ip;
|
||||
regs->sp = new_sp;
|
||||
this_cpu_write(old_rsp, new_sp);
|
||||
regs->cs = _cs;
|
||||
regs->ss = _ss;
|
||||
regs->flags = X86_EFLAGS_IF;
|
||||
force_iret();
|
||||
}
|
||||
|
||||
void
|
||||
@ -277,15 +275,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
struct thread_struct *prev = &prev_p->thread;
|
||||
struct thread_struct *next = &next_p->thread;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
|
||||
unsigned fsindex, gsindex;
|
||||
fpu_switch_t fpu;
|
||||
|
||||
fpu = switch_fpu_prepare(prev_p, next_p, cpu);
|
||||
|
||||
/* Reload esp0 and ss1. */
|
||||
load_sp0(tss, next);
|
||||
|
||||
/* We must save %fs and %gs before load_TLS() because
|
||||
* %fs and %gs may be cleared by load_TLS().
|
||||
*
|
||||
@ -401,8 +396,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
/*
|
||||
* Switch the PDA and FPU contexts.
|
||||
*/
|
||||
prev->usersp = this_cpu_read(old_rsp);
|
||||
this_cpu_write(old_rsp, next->usersp);
|
||||
this_cpu_write(current_task, next_p);
|
||||
|
||||
/*
|
||||
@ -413,9 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
|
||||
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
|
||||
|
||||
/* Reload esp0 and ss1. This changes current_thread_info(). */
|
||||
load_sp0(tss, next);
|
||||
|
||||
this_cpu_write(kernel_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
THREAD_SIZE - KERNEL_STACK_OFFSET);
|
||||
(unsigned long)task_stack_page(next_p) + THREAD_SIZE);
|
||||
|
||||
/*
|
||||
* Now maybe reload the debug registers and handle I/O bitmaps
|
||||
@ -602,6 +597,5 @@ long sys_arch_prctl(int code, unsigned long addr)
|
||||
|
||||
unsigned long KSTK_ESP(struct task_struct *task)
|
||||
{
|
||||
return (test_tsk_thread_flag(task, TIF_IA32)) ?
|
||||
(task_pt_regs(task)->sp) : ((task)->thread.usersp);
|
||||
return task_pt_regs(task)->sp;
|
||||
}
|
||||
|
@ -364,18 +364,12 @@ static int set_segment_reg(struct task_struct *task,
|
||||
case offsetof(struct user_regs_struct,cs):
|
||||
if (unlikely(value == 0))
|
||||
return -EIO;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
if (test_tsk_thread_flag(task, TIF_IA32))
|
||||
task_pt_regs(task)->cs = value;
|
||||
#endif
|
||||
task_pt_regs(task)->cs = value;
|
||||
break;
|
||||
case offsetof(struct user_regs_struct,ss):
|
||||
if (unlikely(value == 0))
|
||||
return -EIO;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
if (test_tsk_thread_flag(task, TIF_IA32))
|
||||
task_pt_regs(task)->ss = value;
|
||||
#endif
|
||||
task_pt_regs(task)->ss = value;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1421,7 +1415,7 @@ static void fill_sigtrap_info(struct task_struct *tsk,
|
||||
memset(info, 0, sizeof(*info));
|
||||
info->si_signo = SIGTRAP;
|
||||
info->si_code = si_code;
|
||||
info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
|
||||
info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
|
||||
}
|
||||
|
||||
void user_single_step_siginfo(struct task_struct *tsk,
|
||||
|
@ -226,23 +226,23 @@ swap_pages:
|
||||
movl (%ebx), %ecx
|
||||
addl $4, %ebx
|
||||
1:
|
||||
testl $0x1, %ecx /* is it a destination page */
|
||||
testb $0x1, %cl /* is it a destination page */
|
||||
jz 2f
|
||||
movl %ecx, %edi
|
||||
andl $0xfffff000, %edi
|
||||
jmp 0b
|
||||
2:
|
||||
testl $0x2, %ecx /* is it an indirection page */
|
||||
testb $0x2, %cl /* is it an indirection page */
|
||||
jz 2f
|
||||
movl %ecx, %ebx
|
||||
andl $0xfffff000, %ebx
|
||||
jmp 0b
|
||||
2:
|
||||
testl $0x4, %ecx /* is it the done indicator */
|
||||
testb $0x4, %cl /* is it the done indicator */
|
||||
jz 2f
|
||||
jmp 3f
|
||||
2:
|
||||
testl $0x8, %ecx /* is it the source indicator */
|
||||
testb $0x8, %cl /* is it the source indicator */
|
||||
jz 0b /* Ignore it otherwise */
|
||||
movl %ecx, %esi /* For every source page do a copy */
|
||||
andl $0xfffff000, %esi
|
||||
|
@ -123,7 +123,7 @@ identity_mapped:
|
||||
* Set cr4 to a known state:
|
||||
* - physical address extension enabled
|
||||
*/
|
||||
movq $X86_CR4_PAE, %rax
|
||||
movl $X86_CR4_PAE, %eax
|
||||
movq %rax, %cr4
|
||||
|
||||
jmp 1f
|
||||
@ -221,23 +221,23 @@ swap_pages:
|
||||
movq (%rbx), %rcx
|
||||
addq $8, %rbx
|
||||
1:
|
||||
testq $0x1, %rcx /* is it a destination page? */
|
||||
testb $0x1, %cl /* is it a destination page? */
|
||||
jz 2f
|
||||
movq %rcx, %rdi
|
||||
andq $0xfffffffffffff000, %rdi
|
||||
jmp 0b
|
||||
2:
|
||||
testq $0x2, %rcx /* is it an indirection page? */
|
||||
testb $0x2, %cl /* is it an indirection page? */
|
||||
jz 2f
|
||||
movq %rcx, %rbx
|
||||
andq $0xfffffffffffff000, %rbx
|
||||
jmp 0b
|
||||
2:
|
||||
testq $0x4, %rcx /* is it the done indicator? */
|
||||
testb $0x4, %cl /* is it the done indicator? */
|
||||
jz 2f
|
||||
jmp 3f
|
||||
2:
|
||||
testq $0x8, %rcx /* is it the source indicator? */
|
||||
testb $0x8, %cl /* is it the source indicator? */
|
||||
jz 0b /* Ignore it otherwise */
|
||||
movq %rcx, %rsi /* For ever source page do a copy */
|
||||
andq $0xfffffffffffff000, %rsi
|
||||
@ -246,17 +246,17 @@ swap_pages:
|
||||
movq %rsi, %rax
|
||||
|
||||
movq %r10, %rdi
|
||||
movq $512, %rcx
|
||||
movl $512, %ecx
|
||||
rep ; movsq
|
||||
|
||||
movq %rax, %rdi
|
||||
movq %rdx, %rsi
|
||||
movq $512, %rcx
|
||||
movl $512, %ecx
|
||||
rep ; movsq
|
||||
|
||||
movq %rdx, %rdi
|
||||
movq %r10, %rsi
|
||||
movq $512, %rcx
|
||||
movl $512, %ecx
|
||||
rep ; movsq
|
||||
|
||||
lea PAGE_SIZE(%rax), %rsi
|
||||
|
@ -832,10 +832,15 @@ static void __init trim_low_memory_range(void)
|
||||
static int
|
||||
dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
|
||||
{
|
||||
pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
|
||||
"(relocation range: 0x%lx-0x%lx)\n",
|
||||
(unsigned long)&_text - __START_KERNEL, __START_KERNEL,
|
||||
__START_KERNEL_map, MODULES_VADDR-1);
|
||||
if (kaslr_enabled()) {
|
||||
pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
|
||||
(unsigned long)&_text - __START_KERNEL,
|
||||
__START_KERNEL,
|
||||
__START_KERNEL_map,
|
||||
MODULES_VADDR-1);
|
||||
} else {
|
||||
pr_emerg("Kernel Offset: disabled\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -61,8 +61,7 @@
|
||||
regs->seg = GET_SEG(seg) | 3; \
|
||||
} while (0)
|
||||
|
||||
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
|
||||
unsigned long *pax)
|
||||
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
|
||||
{
|
||||
void __user *buf;
|
||||
unsigned int tmpflags;
|
||||
@ -81,7 +80,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
|
||||
COPY(dx); COPY(cx); COPY(ip);
|
||||
COPY(dx); COPY(cx); COPY(ip); COPY(ax);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
COPY(r8);
|
||||
@ -94,27 +93,20 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
|
||||
COPY(r15);
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
COPY_SEG_CPL3(cs);
|
||||
COPY_SEG_CPL3(ss);
|
||||
#else /* !CONFIG_X86_32 */
|
||||
/* Kernel saves and restores only the CS segment register on signals,
|
||||
* which is the bare minimum needed to allow mixed 32/64-bit code.
|
||||
* App's signal handler can save/restore other segments if needed. */
|
||||
COPY_SEG_CPL3(cs);
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
get_user_ex(tmpflags, &sc->flags);
|
||||
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
|
||||
regs->orig_ax = -1; /* disable syscall checks */
|
||||
|
||||
get_user_ex(buf, &sc->fpstate);
|
||||
|
||||
get_user_ex(*pax, &sc->ax);
|
||||
} get_user_catch(err);
|
||||
|
||||
err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
|
||||
|
||||
force_iret();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -162,8 +154,9 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
|
||||
#else /* !CONFIG_X86_32 */
|
||||
put_user_ex(regs->flags, &sc->flags);
|
||||
put_user_ex(regs->cs, &sc->cs);
|
||||
put_user_ex(0, &sc->gs);
|
||||
put_user_ex(0, &sc->fs);
|
||||
put_user_ex(0, &sc->__pad2);
|
||||
put_user_ex(0, &sc->__pad1);
|
||||
put_user_ex(regs->ss, &sc->ss);
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
put_user_ex(fpstate, &sc->fpstate);
|
||||
@ -457,9 +450,19 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
|
||||
|
||||
regs->sp = (unsigned long)frame;
|
||||
|
||||
/* Set up the CS register to run signal handlers in 64-bit mode,
|
||||
even if the handler happens to be interrupting 32-bit code. */
|
||||
/*
|
||||
* Set up the CS and SS registers to run signal handlers in
|
||||
* 64-bit mode, even if the handler happens to be interrupting
|
||||
* 32-bit or 16-bit code.
|
||||
*
|
||||
* SS is subtle. In 64-bit mode, we don't need any particular
|
||||
* SS descriptor, but we do need SS to be valid. It's possible
|
||||
* that the old SS is entirely bogus -- this can happen if the
|
||||
* signal we're trying to deliver is #GP or #SS caused by a bad
|
||||
* SS value.
|
||||
*/
|
||||
regs->cs = __USER_CS;
|
||||
regs->ss = __USER_DS;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -539,7 +542,6 @@ asmlinkage unsigned long sys_sigreturn(void)
|
||||
{
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
struct sigframe __user *frame;
|
||||
unsigned long ax;
|
||||
sigset_t set;
|
||||
|
||||
frame = (struct sigframe __user *)(regs->sp - 8);
|
||||
@ -553,9 +555,9 @@ asmlinkage unsigned long sys_sigreturn(void)
|
||||
|
||||
set_current_blocked(&set);
|
||||
|
||||
if (restore_sigcontext(regs, &frame->sc, &ax))
|
||||
if (restore_sigcontext(regs, &frame->sc))
|
||||
goto badframe;
|
||||
return ax;
|
||||
return regs->ax;
|
||||
|
||||
badframe:
|
||||
signal_fault(regs, frame, "sigreturn");
|
||||
@ -568,7 +570,6 @@ asmlinkage long sys_rt_sigreturn(void)
|
||||
{
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
struct rt_sigframe __user *frame;
|
||||
unsigned long ax;
|
||||
sigset_t set;
|
||||
|
||||
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
|
||||
@ -579,13 +580,13 @@ asmlinkage long sys_rt_sigreturn(void)
|
||||
|
||||
set_current_blocked(&set);
|
||||
|
||||
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
|
||||
if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
|
||||
goto badframe;
|
||||
|
||||
if (restore_altstack(&frame->uc.uc_stack))
|
||||
goto badframe;
|
||||
|
||||
return ax;
|
||||
return regs->ax;
|
||||
|
||||
badframe:
|
||||
signal_fault(regs, frame, "rt_sigreturn");
|
||||
@ -780,7 +781,6 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
|
||||
struct pt_regs *regs = current_pt_regs();
|
||||
struct rt_sigframe_x32 __user *frame;
|
||||
sigset_t set;
|
||||
unsigned long ax;
|
||||
|
||||
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
|
||||
|
||||
@ -791,13 +791,13 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
|
||||
|
||||
set_current_blocked(&set);
|
||||
|
||||
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
|
||||
if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
|
||||
goto badframe;
|
||||
|
||||
if (compat_restore_altstack(&frame->uc.uc_stack))
|
||||
goto badframe;
|
||||
|
||||
return ax;
|
||||
return regs->ax;
|
||||
|
||||
badframe:
|
||||
signal_fault(regs, frame, "x32 rt_sigreturn");
|
||||
|
@ -779,6 +779,26 @@ out:
|
||||
return boot_error;
|
||||
}
|
||||
|
||||
void common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||
{
|
||||
/* Just in case we booted with a single CPU. */
|
||||
alternatives_enable_smp();
|
||||
|
||||
per_cpu(current_task, cpu) = idle;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Stack for startup_32 can be just as for start_secondary onwards */
|
||||
irq_ctx_init(cpu);
|
||||
per_cpu(cpu_current_top_of_stack, cpu) =
|
||||
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
|
||||
#else
|
||||
clear_tsk_thread_flag(idle, TIF_FORK);
|
||||
initial_gs = per_cpu_offset(cpu);
|
||||
#endif
|
||||
per_cpu(kernel_stack, cpu) =
|
||||
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
|
||||
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
|
||||
@ -796,23 +816,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
int cpu0_nmi_registered = 0;
|
||||
unsigned long timeout;
|
||||
|
||||
/* Just in case we booted with a single CPU. */
|
||||
alternatives_enable_smp();
|
||||
|
||||
idle->thread.sp = (unsigned long) (((struct pt_regs *)
|
||||
(THREAD_SIZE + task_stack_page(idle))) - 1);
|
||||
per_cpu(current_task, cpu) = idle;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Stack for startup_32 can be just as for start_secondary onwards */
|
||||
irq_ctx_init(cpu);
|
||||
#else
|
||||
clear_tsk_thread_flag(idle, TIF_FORK);
|
||||
initial_gs = per_cpu_offset(cpu);
|
||||
#endif
|
||||
per_cpu(kernel_stack, cpu) =
|
||||
(unsigned long)task_stack_page(idle) -
|
||||
KERNEL_STACK_OFFSET + THREAD_SIZE;
|
||||
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
|
||||
initial_code = (unsigned long)start_secondary;
|
||||
stack_start = idle->thread.sp;
|
||||
@ -953,6 +959,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
|
||||
/* the FPU context is blank, nobody can own it */
|
||||
__cpu_disable_lazy_restore(cpu);
|
||||
|
||||
common_cpu_up(cpu, tidle);
|
||||
|
||||
err = do_boot_cpu(apicid, cpu, tidle);
|
||||
if (err) {
|
||||
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
|
||||
|
@ -5,21 +5,29 @@
|
||||
#include <linux/cache.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
#define SYM(sym, compat) compat
|
||||
#else
|
||||
#define SYM(sym, compat) sym
|
||||
#define ia32_sys_call_table sys_call_table
|
||||
#define __NR_ia32_syscall_max __NR_syscall_max
|
||||
#endif
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
|
||||
#include <asm/syscalls_32.h>
|
||||
#undef __SYSCALL_I386
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) [nr] = sym,
|
||||
#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
|
||||
|
||||
typedef asmlinkage void (*sys_call_ptr_t)(void);
|
||||
|
||||
extern asmlinkage void sys_ni_syscall(void);
|
||||
|
||||
__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
|
||||
__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
|
||||
/*
|
||||
* Smells like a compiler bug -- it doesn't work
|
||||
* when the & below is removed.
|
||||
*/
|
||||
[0 ... __NR_syscall_max] = &sys_ni_syscall,
|
||||
[0 ... __NR_ia32_syscall_max] = &sys_ni_syscall,
|
||||
#include <asm/syscalls_32.h>
|
||||
};
|
||||
|
@ -30,7 +30,7 @@ unsigned long profile_pc(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long pc = instruction_pointer(regs);
|
||||
|
||||
if (!user_mode_vm(regs) && in_lock_functions(pc)) {
|
||||
if (!user_mode(regs) && in_lock_functions(pc)) {
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
return *(unsigned long *)(regs->bp + sizeof(long));
|
||||
#else
|
||||
|
@ -112,7 +112,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
if (user_mode_vm(regs)) {
|
||||
if (user_mode(regs)) {
|
||||
/* Other than that, we're just an exception. */
|
||||
prev_state = exception_enter();
|
||||
} else {
|
||||
@ -146,7 +146,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
|
||||
/* Must be before exception_exit. */
|
||||
preempt_count_sub(HARDIRQ_OFFSET);
|
||||
|
||||
if (user_mode_vm(regs))
|
||||
if (user_mode(regs))
|
||||
return exception_exit(prev_state);
|
||||
else
|
||||
rcu_nmi_exit();
|
||||
@ -158,7 +158,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
|
||||
*
|
||||
* IST exception handlers normally cannot schedule. As a special
|
||||
* exception, if the exception interrupted userspace code (i.e.
|
||||
* user_mode_vm(regs) would return true) and the exception was not
|
||||
* user_mode(regs) would return true) and the exception was not
|
||||
* a double fault, it can be safe to schedule. ist_begin_non_atomic()
|
||||
* begins a non-atomic section within an ist_enter()/ist_exit() region.
|
||||
* Callers are responsible for enabling interrupts themselves inside
|
||||
@ -167,15 +167,15 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
|
||||
*/
|
||||
void ist_begin_non_atomic(struct pt_regs *regs)
|
||||
{
|
||||
BUG_ON(!user_mode_vm(regs));
|
||||
BUG_ON(!user_mode(regs));
|
||||
|
||||
/*
|
||||
* Sanity check: we need to be on the normal thread stack. This
|
||||
* will catch asm bugs and any attempt to use ist_preempt_enable
|
||||
* from double_fault.
|
||||
*/
|
||||
BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
|
||||
& ~(THREAD_SIZE - 1)) != 0);
|
||||
BUG_ON((unsigned long)(current_top_of_stack() -
|
||||
current_stack_pointer()) >= THREAD_SIZE);
|
||||
|
||||
preempt_count_sub(HARDIRQ_OFFSET);
|
||||
}
|
||||
@ -194,8 +194,7 @@ static nokprobe_inline int
|
||||
do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
|
||||
struct pt_regs *regs, long error_code)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
if (regs->flags & X86_VM_MASK) {
|
||||
if (v8086_mode(regs)) {
|
||||
/*
|
||||
* Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
|
||||
* On nmi (interrupt 2), do_trap should not be called.
|
||||
@ -207,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!user_mode(regs)) {
|
||||
if (!fixup_exception(regs)) {
|
||||
tsk->thread.error_code = error_code;
|
||||
@ -384,7 +383,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
||||
goto exit;
|
||||
conditional_sti(regs);
|
||||
|
||||
if (!user_mode_vm(regs))
|
||||
if (!user_mode(regs))
|
||||
die("bounds", regs, error_code);
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
|
||||
@ -462,13 +461,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
||||
prev_state = exception_enter();
|
||||
conditional_sti(regs);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
if (regs->flags & X86_VM_MASK) {
|
||||
if (v8086_mode(regs)) {
|
||||
local_irq_enable();
|
||||
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
||||
goto exit;
|
||||
}
|
||||
#endif
|
||||
|
||||
tsk = current;
|
||||
if (!user_mode(regs)) {
|
||||
@ -587,7 +584,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
|
||||
/* Copy the remainder of the stack from the current stack. */
|
||||
memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
|
||||
|
||||
BUG_ON(!user_mode_vm(&new_stack->regs));
|
||||
BUG_ON(!user_mode(&new_stack->regs));
|
||||
return new_stack;
|
||||
}
|
||||
NOKPROBE_SYMBOL(fixup_bad_iret);
|
||||
@ -637,7 +634,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
* then it's very likely the result of an icebp/int01 trap.
|
||||
* User wants a sigtrap for that.
|
||||
*/
|
||||
if (!dr6 && user_mode_vm(regs))
|
||||
if (!dr6 && user_mode(regs))
|
||||
user_icebp = 1;
|
||||
|
||||
/* Catch kmemcheck conditions first of all! */
|
||||
@ -673,7 +670,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
/* It's safe to allow irq's after DR6 has been saved */
|
||||
preempt_conditional_sti(regs);
|
||||
|
||||
if (regs->flags & X86_VM_MASK) {
|
||||
if (v8086_mode(regs)) {
|
||||
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
|
||||
X86_TRAP_DB);
|
||||
preempt_conditional_cli(regs);
|
||||
@ -721,7 +718,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
||||
return;
|
||||
conditional_sti(regs);
|
||||
|
||||
if (!user_mode_vm(regs))
|
||||
if (!user_mode(regs))
|
||||
{
|
||||
if (!fixup_exception(regs)) {
|
||||
task->thread.error_code = error_code;
|
||||
@ -925,9 +922,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
/* Set of traps needed for early debugging. */
|
||||
void __init early_trap_init(void)
|
||||
{
|
||||
set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
|
||||
/*
|
||||
* Don't use IST to set DEBUG_STACK as it doesn't work until TSS
|
||||
* is ready in cpu_init() <-- trap_init(). Before trap_init(),
|
||||
* CPU runs at ring 0 so it is impossible to hit an invalid
|
||||
* stack. Using the original stack works well enough at this
|
||||
* early stage. DEBUG_STACK will be equipped after cpu_init() in
|
||||
* trap_init().
|
||||
*
|
||||
* We don't need to set trace_idt_table like set_intr_gate(),
|
||||
* since we don't have trace_debug and it will be reset to
|
||||
* 'debug' in trap_init() by set_intr_gate_ist().
|
||||
*/
|
||||
set_intr_gate_notrace(X86_TRAP_DB, debug);
|
||||
/* int3 can be called from all */
|
||||
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
|
||||
set_system_intr_gate(X86_TRAP_BP, &int3);
|
||||
#ifdef CONFIG_X86_32
|
||||
set_intr_gate(X86_TRAP_PF, page_fault);
|
||||
#endif
|
||||
@ -1005,6 +1014,15 @@ void __init trap_init(void)
|
||||
*/
|
||||
cpu_init();
|
||||
|
||||
/*
|
||||
* X86_TRAP_DB and X86_TRAP_BP have been set
|
||||
* in early_trap_init(). However, ITS works only after
|
||||
* cpu_init() loads TSS. See comments in early_trap_init().
|
||||
*/
|
||||
set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
|
||||
/* int3 can be called from all */
|
||||
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
|
||||
|
||||
x86_init.irqs.trap_init();
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -912,7 +912,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
|
||||
int ret = NOTIFY_DONE;
|
||||
|
||||
/* We are only interested in userspace traps */
|
||||
if (regs && !user_mode_vm(regs))
|
||||
if (regs && !user_mode(regs))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
switch (val) {
|
||||
|
@ -150,7 +150,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
tss = &per_cpu(init_tss, get_cpu());
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
current->thread.sp0 = current->thread.saved_sp0;
|
||||
current->thread.sysenter_cs = __KERNEL_CS;
|
||||
load_sp0(tss, ¤t->thread);
|
||||
@ -318,7 +318,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
|
||||
tsk->thread.saved_fs = info->regs32->fs;
|
||||
tsk->thread.saved_gs = get_user_gs(info->regs32);
|
||||
|
||||
tss = &per_cpu(init_tss, get_cpu());
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
|
||||
if (cpu_has_sep)
|
||||
tsk->thread.sysenter_cs = 0;
|
||||
|
@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
|
||||
/* Some systems map "vectors" to interrupts weirdly. Not us! */
|
||||
__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
|
||||
if (i != SYSCALL_VECTOR)
|
||||
set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
|
||||
set_intr_gate(i, irq_entries_start +
|
||||
8 * (i - FIRST_EXTERNAL_VECTOR));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1076,6 +1077,7 @@ static void lguest_load_sp0(struct tss_struct *tss,
|
||||
{
|
||||
lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
|
||||
THREAD_SIZE / PAGE_SIZE);
|
||||
tss->x86_tss.sp0 = thread->sp0;
|
||||
}
|
||||
|
||||
/* Let's just say, I wouldn't do debugging under a Guest. */
|
||||
|
@ -13,16 +13,6 @@
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/dwarf2.h>
|
||||
|
||||
.macro SAVE reg
|
||||
pushl_cfi %\reg
|
||||
CFI_REL_OFFSET \reg, 0
|
||||
.endm
|
||||
|
||||
.macro RESTORE reg
|
||||
popl_cfi %\reg
|
||||
CFI_RESTORE \reg
|
||||
.endm
|
||||
|
||||
.macro read64 reg
|
||||
movl %ebx, %eax
|
||||
movl %ecx, %edx
|
||||
@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
|
||||
.macro addsub_return func ins insc
|
||||
ENTRY(atomic64_\func\()_return_cx8)
|
||||
CFI_STARTPROC
|
||||
SAVE ebp
|
||||
SAVE ebx
|
||||
SAVE esi
|
||||
SAVE edi
|
||||
pushl_cfi_reg ebp
|
||||
pushl_cfi_reg ebx
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg edi
|
||||
|
||||
movl %eax, %esi
|
||||
movl %edx, %edi
|
||||
@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
|
||||
10:
|
||||
movl %ebx, %eax
|
||||
movl %ecx, %edx
|
||||
RESTORE edi
|
||||
RESTORE esi
|
||||
RESTORE ebx
|
||||
RESTORE ebp
|
||||
popl_cfi_reg edi
|
||||
popl_cfi_reg esi
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg ebp
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(atomic64_\func\()_return_cx8)
|
||||
@ -104,7 +94,7 @@ addsub_return sub sub sbb
|
||||
.macro incdec_return func ins insc
|
||||
ENTRY(atomic64_\func\()_return_cx8)
|
||||
CFI_STARTPROC
|
||||
SAVE ebx
|
||||
pushl_cfi_reg ebx
|
||||
|
||||
read64 %esi
|
||||
1:
|
||||
@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
|
||||
10:
|
||||
movl %ebx, %eax
|
||||
movl %ecx, %edx
|
||||
RESTORE ebx
|
||||
popl_cfi_reg ebx
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(atomic64_\func\()_return_cx8)
|
||||
@ -130,7 +120,7 @@ incdec_return dec sub sbb
|
||||
|
||||
ENTRY(atomic64_dec_if_positive_cx8)
|
||||
CFI_STARTPROC
|
||||
SAVE ebx
|
||||
pushl_cfi_reg ebx
|
||||
|
||||
read64 %esi
|
||||
1:
|
||||
@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
|
||||
2:
|
||||
movl %ebx, %eax
|
||||
movl %ecx, %edx
|
||||
RESTORE ebx
|
||||
popl_cfi_reg ebx
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(atomic64_dec_if_positive_cx8)
|
||||
|
||||
ENTRY(atomic64_add_unless_cx8)
|
||||
CFI_STARTPROC
|
||||
SAVE ebp
|
||||
SAVE ebx
|
||||
pushl_cfi_reg ebp
|
||||
pushl_cfi_reg ebx
|
||||
/* these just push these two parameters on the stack */
|
||||
SAVE edi
|
||||
SAVE ecx
|
||||
pushl_cfi_reg edi
|
||||
pushl_cfi_reg ecx
|
||||
|
||||
movl %eax, %ebp
|
||||
movl %edx, %edi
|
||||
@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
|
||||
3:
|
||||
addl $8, %esp
|
||||
CFI_ADJUST_CFA_OFFSET -8
|
||||
RESTORE ebx
|
||||
RESTORE ebp
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg ebp
|
||||
ret
|
||||
4:
|
||||
cmpl %edx, 4(%esp)
|
||||
@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
|
||||
|
||||
ENTRY(atomic64_inc_not_zero_cx8)
|
||||
CFI_STARTPROC
|
||||
SAVE ebx
|
||||
pushl_cfi_reg ebx
|
||||
|
||||
read64 %esi
|
||||
1:
|
||||
@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
|
||||
|
||||
movl $1, %eax
|
||||
3:
|
||||
RESTORE ebx
|
||||
popl_cfi_reg ebx
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(atomic64_inc_not_zero_cx8)
|
||||
|
@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
|
||||
*/
|
||||
ENTRY(csum_partial)
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %esi
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl_cfi %ebx
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg ebx
|
||||
movl 20(%esp),%eax # Function arg: unsigned int sum
|
||||
movl 16(%esp),%ecx # Function arg: int len
|
||||
movl 12(%esp),%esi # Function arg: unsigned char *buff
|
||||
@ -127,14 +125,12 @@ ENTRY(csum_partial)
|
||||
6: addl %ecx,%eax
|
||||
adcl $0, %eax
|
||||
7:
|
||||
testl $1, 12(%esp)
|
||||
testb $1, 12(%esp)
|
||||
jz 8f
|
||||
roll $8, %eax
|
||||
8:
|
||||
popl_cfi %ebx
|
||||
CFI_RESTORE ebx
|
||||
popl_cfi %esi
|
||||
CFI_RESTORE esi
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg esi
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial)
|
||||
@ -145,10 +141,8 @@ ENDPROC(csum_partial)
|
||||
|
||||
ENTRY(csum_partial)
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %esi
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl_cfi %ebx
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg ebx
|
||||
movl 20(%esp),%eax # Function arg: unsigned int sum
|
||||
movl 16(%esp),%ecx # Function arg: int len
|
||||
movl 12(%esp),%esi # Function arg: const unsigned char *buf
|
||||
@ -251,14 +245,12 @@ ENTRY(csum_partial)
|
||||
addl %ebx,%eax
|
||||
adcl $0,%eax
|
||||
80:
|
||||
testl $1, 12(%esp)
|
||||
testb $1, 12(%esp)
|
||||
jz 90f
|
||||
roll $8, %eax
|
||||
90:
|
||||
popl_cfi %ebx
|
||||
CFI_RESTORE ebx
|
||||
popl_cfi %esi
|
||||
CFI_RESTORE esi
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg esi
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial)
|
||||
@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
|
||||
CFI_STARTPROC
|
||||
subl $4,%esp
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
pushl_cfi %edi
|
||||
CFI_REL_OFFSET edi, 0
|
||||
pushl_cfi %esi
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl_cfi %ebx
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl_cfi_reg edi
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg ebx
|
||||
movl ARGBASE+16(%esp),%eax # sum
|
||||
movl ARGBASE+12(%esp),%ecx # len
|
||||
movl ARGBASE+4(%esp),%esi # src
|
||||
@ -412,12 +401,9 @@ DST( movb %cl, (%edi) )
|
||||
|
||||
.previous
|
||||
|
||||
popl_cfi %ebx
|
||||
CFI_RESTORE ebx
|
||||
popl_cfi %esi
|
||||
CFI_RESTORE esi
|
||||
popl_cfi %edi
|
||||
CFI_RESTORE edi
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg esi
|
||||
popl_cfi_reg edi
|
||||
popl_cfi %ecx # equivalent to addl $4,%esp
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
|
||||
|
||||
ENTRY(csum_partial_copy_generic)
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %ebx
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl_cfi %edi
|
||||
CFI_REL_OFFSET edi, 0
|
||||
pushl_cfi %esi
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl_cfi_reg ebx
|
||||
pushl_cfi_reg edi
|
||||
pushl_cfi_reg esi
|
||||
movl ARGBASE+4(%esp),%esi #src
|
||||
movl ARGBASE+8(%esp),%edi #dst
|
||||
movl ARGBASE+12(%esp),%ecx #len
|
||||
@ -506,12 +489,9 @@ DST( movb %dl, (%edi) )
|
||||
jmp 7b
|
||||
.previous
|
||||
|
||||
popl_cfi %esi
|
||||
CFI_RESTORE esi
|
||||
popl_cfi %edi
|
||||
CFI_RESTORE edi
|
||||
popl_cfi %ebx
|
||||
CFI_RESTORE ebx
|
||||
popl_cfi_reg esi
|
||||
popl_cfi_reg edi
|
||||
popl_cfi_reg ebx
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial_copy_generic)
|
||||
|
@ -1,31 +1,35 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
* Zero a page.
|
||||
* rdi page
|
||||
*/
|
||||
ENTRY(clear_page_c)
|
||||
* Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
|
||||
* recommended to use this when possible and we do use them by default.
|
||||
* If enhanced REP MOVSB/STOSB is not available, try to use fast string.
|
||||
* Otherwise, use original.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Zero a page.
|
||||
* %rdi - page
|
||||
*/
|
||||
ENTRY(clear_page)
|
||||
CFI_STARTPROC
|
||||
|
||||
ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
|
||||
"jmp clear_page_c_e", X86_FEATURE_ERMS
|
||||
|
||||
movl $4096/8,%ecx
|
||||
xorl %eax,%eax
|
||||
rep stosq
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(clear_page_c)
|
||||
ENDPROC(clear_page)
|
||||
|
||||
ENTRY(clear_page_c_e)
|
||||
ENTRY(clear_page_orig)
|
||||
CFI_STARTPROC
|
||||
movl $4096,%ecx
|
||||
xorl %eax,%eax
|
||||
rep stosb
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(clear_page_c_e)
|
||||
|
||||
ENTRY(clear_page)
|
||||
CFI_STARTPROC
|
||||
xorl %eax,%eax
|
||||
movl $4096/64,%ecx
|
||||
.p2align 4
|
||||
@ -45,29 +49,13 @@ ENTRY(clear_page)
|
||||
nop
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
.Lclear_page_end:
|
||||
ENDPROC(clear_page)
|
||||
ENDPROC(clear_page_orig)
|
||||
|
||||
/*
|
||||
* Some CPUs support enhanced REP MOVSB/STOSB instructions.
|
||||
* It is recommended to use this when possible.
|
||||
* If enhanced REP MOVSB/STOSB is not available, try to use fast string.
|
||||
* Otherwise, use original function.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
.section .altinstr_replacement,"ax"
|
||||
1: .byte 0xeb /* jmp <disp8> */
|
||||
.byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
|
||||
2: .byte 0xeb /* jmp <disp8> */
|
||||
.byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
|
||||
3:
|
||||
.previous
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
|
||||
.Lclear_page_end-clear_page, 2b-1b
|
||||
altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
|
||||
.Lclear_page_end-clear_page,3b-2b
|
||||
.previous
|
||||
ENTRY(clear_page_c_e)
|
||||
CFI_STARTPROC
|
||||
movl $4096,%ecx
|
||||
xorl %eax,%eax
|
||||
rep stosb
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(clear_page_c_e)
|
||||
|
@ -2,23 +2,26 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
* Some CPUs run faster using the string copy instructions (sane microcode).
|
||||
* It is also a lot simpler. Use this when possible. But, don't use streaming
|
||||
* copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
|
||||
* prefetch distance based on SMP/UP.
|
||||
*/
|
||||
ALIGN
|
||||
copy_page_rep:
|
||||
ENTRY(copy_page)
|
||||
CFI_STARTPROC
|
||||
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
|
||||
movl $4096/8, %ecx
|
||||
rep movsq
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(copy_page_rep)
|
||||
ENDPROC(copy_page)
|
||||
|
||||
/*
|
||||
* Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
|
||||
* Could vary the prefetch distance based on SMP/UP.
|
||||
*/
|
||||
|
||||
ENTRY(copy_page)
|
||||
ENTRY(copy_page_regs)
|
||||
CFI_STARTPROC
|
||||
subq $2*8, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET 2*8
|
||||
@ -90,21 +93,5 @@ ENTRY(copy_page)
|
||||
addq $2*8, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -2*8
|
||||
ret
|
||||
.Lcopy_page_end:
|
||||
CFI_ENDPROC
|
||||
ENDPROC(copy_page)
|
||||
|
||||
/* Some CPUs run faster using the string copy instructions.
|
||||
It is also a lot simpler. Use this when possible */
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
.section .altinstr_replacement,"ax"
|
||||
1: .byte 0xeb /* jmp <disp8> */
|
||||
.byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
|
||||
2:
|
||||
.previous
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
|
||||
.Lcopy_page_end-copy_page, 2b-1b
|
||||
.previous
|
||||
ENDPROC(copy_page_regs)
|
||||
|
@ -8,9 +8,6 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
|
||||
#define FIX_ALIGNMENT 1
|
||||
|
||||
#include <asm/current.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/thread_info.h>
|
||||
@ -19,33 +16,7 @@
|
||||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
|
||||
/*
|
||||
* By placing feature2 after feature1 in altinstructions section, we logically
|
||||
* implement:
|
||||
* If CPU has feature2, jmp to alt2 is used
|
||||
* else if CPU has feature1, jmp to alt1 is used
|
||||
* else jmp to orig is used.
|
||||
*/
|
||||
.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
|
||||
0:
|
||||
.byte 0xe9 /* 32bit jump */
|
||||
.long \orig-1f /* by default jump to orig */
|
||||
1:
|
||||
.section .altinstr_replacement,"ax"
|
||||
2: .byte 0xe9 /* near jump with 32bit immediate */
|
||||
.long \alt1-1b /* offset */ /* or alternatively to alt1 */
|
||||
3: .byte 0xe9 /* near jump with 32bit immediate */
|
||||
.long \alt2-1b /* offset */ /* or alternatively to alt2 */
|
||||
.previous
|
||||
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry 0b,2b,\feature1,5,5
|
||||
altinstruction_entry 0b,3b,\feature2,5,5
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.macro ALIGN_DESTINATION
|
||||
#ifdef FIX_ALIGNMENT
|
||||
/* check for bad alignment of destination */
|
||||
movl %edi,%ecx
|
||||
andl $7,%ecx
|
||||
@ -67,7 +38,6 @@
|
||||
|
||||
_ASM_EXTABLE(100b,103b)
|
||||
_ASM_EXTABLE(101b,103b)
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* Standard copy_to_user with segment limit checking */
|
||||
@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
|
||||
jc bad_to_user
|
||||
cmpq TI_addr_limit(%rax),%rcx
|
||||
ja bad_to_user
|
||||
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
|
||||
copy_user_generic_unrolled,copy_user_generic_string, \
|
||||
copy_user_enhanced_fast_string
|
||||
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
|
||||
"jmp copy_user_generic_string", \
|
||||
X86_FEATURE_REP_GOOD, \
|
||||
"jmp copy_user_enhanced_fast_string", \
|
||||
X86_FEATURE_ERMS
|
||||
CFI_ENDPROC
|
||||
ENDPROC(_copy_to_user)
|
||||
|
||||
@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
|
||||
jc bad_from_user
|
||||
cmpq TI_addr_limit(%rax),%rcx
|
||||
ja bad_from_user
|
||||
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
|
||||
copy_user_generic_unrolled,copy_user_generic_string, \
|
||||
copy_user_enhanced_fast_string
|
||||
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
|
||||
"jmp copy_user_generic_string", \
|
||||
X86_FEATURE_REP_GOOD, \
|
||||
"jmp copy_user_enhanced_fast_string", \
|
||||
X86_FEATURE_ERMS
|
||||
CFI_ENDPROC
|
||||
ENDPROC(_copy_from_user)
|
||||
|
||||
|
@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)
|
||||
|
||||
/* handle last odd byte */
|
||||
.Lhandle_1:
|
||||
testl $1, %r10d
|
||||
testb $1, %r10b
|
||||
jz .Lende
|
||||
xorl %ebx, %ebx
|
||||
source
|
||||
|
@ -52,6 +52,13 @@
|
||||
*/
|
||||
void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
|
||||
{
|
||||
/*
|
||||
* Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
|
||||
* even if the input buffer is long enough to hold them.
|
||||
*/
|
||||
if (buf_len > MAX_INSN_SIZE)
|
||||
buf_len = MAX_INSN_SIZE;
|
||||
|
||||
memset(insn, 0, sizeof(*insn));
|
||||
insn->kaddr = kaddr;
|
||||
insn->end_kaddr = kaddr + buf_len;
|
||||
@ -164,6 +171,12 @@ found:
|
||||
/* VEX.W overrides opnd_size */
|
||||
insn->opnd_bytes = 8;
|
||||
} else {
|
||||
/*
|
||||
* For VEX2, fake VEX3-like byte#2.
|
||||
* Makes it easier to decode vex.W, vex.vvvv,
|
||||
* vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
|
||||
*/
|
||||
insn->vex_prefix.bytes[2] = b2 & 0x7f;
|
||||
insn->vex_prefix.nbytes = 2;
|
||||
insn->next_byte += 2;
|
||||
}
|
||||
|
@ -1,11 +1,19 @@
|
||||
/* Copyright 2002 Andi Kleen */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
* We build a jump to memcpy_orig by default which gets NOPped out on
|
||||
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
|
||||
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
|
||||
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
|
||||
*/
|
||||
|
||||
.weak memcpy
|
||||
|
||||
/*
|
||||
* memcpy - Copy a memory block.
|
||||
*
|
||||
@ -17,15 +25,11 @@
|
||||
* Output:
|
||||
* rax original destination
|
||||
*/
|
||||
ENTRY(__memcpy)
|
||||
ENTRY(memcpy)
|
||||
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
|
||||
"jmp memcpy_erms", X86_FEATURE_ERMS
|
||||
|
||||
/*
|
||||
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
|
||||
*
|
||||
* This gets patched over the unrolled variant (below) via the
|
||||
* alternative instructions framework:
|
||||
*/
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemcpy_c:
|
||||
movq %rdi, %rax
|
||||
movq %rdx, %rcx
|
||||
shrq $3, %rcx
|
||||
@ -34,29 +38,21 @@
|
||||
movl %edx, %ecx
|
||||
rep movsb
|
||||
ret
|
||||
.Lmemcpy_e:
|
||||
.previous
|
||||
ENDPROC(memcpy)
|
||||
ENDPROC(__memcpy)
|
||||
|
||||
/*
|
||||
* memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
|
||||
* memcpy_c. Use memcpy_c_e when possible.
|
||||
*
|
||||
* This gets patched over the unrolled variant (below) via the
|
||||
* alternative instructions framework:
|
||||
* memcpy_erms() - enhanced fast string memcpy. This is faster and
|
||||
* simpler than memcpy. Use memcpy_erms when possible.
|
||||
*/
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemcpy_c_e:
|
||||
ENTRY(memcpy_erms)
|
||||
movq %rdi, %rax
|
||||
movq %rdx, %rcx
|
||||
rep movsb
|
||||
ret
|
||||
.Lmemcpy_e_e:
|
||||
.previous
|
||||
ENDPROC(memcpy_erms)
|
||||
|
||||
.weak memcpy
|
||||
|
||||
ENTRY(__memcpy)
|
||||
ENTRY(memcpy)
|
||||
ENTRY(memcpy_orig)
|
||||
CFI_STARTPROC
|
||||
movq %rdi, %rax
|
||||
|
||||
@ -183,26 +179,4 @@ ENTRY(memcpy)
|
||||
.Lend:
|
||||
retq
|
||||
CFI_ENDPROC
|
||||
ENDPROC(memcpy)
|
||||
ENDPROC(__memcpy)
|
||||
|
||||
/*
|
||||
* Some CPUs are adding enhanced REP MOVSB/STOSB feature
|
||||
* If the feature is supported, memcpy_c_e() is the first choice.
|
||||
* If enhanced rep movsb copy is not available, use fast string copy
|
||||
* memcpy_c() when possible. This is faster and code is simpler than
|
||||
* original memcpy().
|
||||
* Otherwise, original memcpy() is used.
|
||||
* In .altinstructions section, ERMS feature is placed after REG_GOOD
|
||||
* feature to implement the right patch order.
|
||||
*
|
||||
* Replace only beginning, memcpy is used to apply alternatives,
|
||||
* so it is silly to overwrite itself with nops - reboot is the
|
||||
* only outcome...
|
||||
*/
|
||||
.section .altinstructions, "a"
|
||||
altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
|
||||
.Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
|
||||
altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
|
||||
.Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
|
||||
.previous
|
||||
ENDPROC(memcpy_orig)
|
||||
|
@ -5,7 +5,6 @@
|
||||
* This assembly file is re-written from memmove_64.c file.
|
||||
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
|
||||
*/
|
||||
#define _STRING_C
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/cpufeature.h>
|
||||
@ -44,6 +43,8 @@ ENTRY(__memmove)
|
||||
jg 2f
|
||||
|
||||
.Lmemmove_begin_forward:
|
||||
ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
|
||||
|
||||
/*
|
||||
* movsq instruction have many startup latency
|
||||
* so we handle small size by general register.
|
||||
@ -207,21 +208,5 @@ ENTRY(__memmove)
|
||||
13:
|
||||
retq
|
||||
CFI_ENDPROC
|
||||
|
||||
.section .altinstr_replacement,"ax"
|
||||
.Lmemmove_begin_forward_efs:
|
||||
/* Forward moving data. */
|
||||
movq %rdx, %rcx
|
||||
rep movsb
|
||||
retq
|
||||
.Lmemmove_end_forward_efs:
|
||||
.previous
|
||||
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry .Lmemmove_begin_forward, \
|
||||
.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
|
||||
.Lmemmove_end_forward-.Lmemmove_begin_forward, \
|
||||
.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
|
||||
.previous
|
||||
ENDPROC(__memmove)
|
||||
ENDPROC(memmove)
|
||||
|
@ -5,19 +5,30 @@
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
.weak memset
|
||||
|
||||
/*
|
||||
* ISO C memset - set a memory block to a byte value. This function uses fast
|
||||
* string to get better performance than the original function. The code is
|
||||
* simpler and shorter than the orignal function as well.
|
||||
*
|
||||
*
|
||||
* rdi destination
|
||||
* rsi value (char)
|
||||
* rdx count (bytes)
|
||||
*
|
||||
* rsi value (char)
|
||||
* rdx count (bytes)
|
||||
*
|
||||
* rax original destination
|
||||
*/
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemset_c:
|
||||
*/
|
||||
ENTRY(memset)
|
||||
ENTRY(__memset)
|
||||
/*
|
||||
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
|
||||
* to use it when possible. If not available, use fast string instructions.
|
||||
*
|
||||
* Otherwise, use original memset function.
|
||||
*/
|
||||
ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
|
||||
"jmp memset_erms", X86_FEATURE_ERMS
|
||||
|
||||
movq %rdi,%r9
|
||||
movq %rdx,%rcx
|
||||
andl $7,%edx
|
||||
@ -31,8 +42,8 @@
|
||||
rep stosb
|
||||
movq %r9,%rax
|
||||
ret
|
||||
.Lmemset_e:
|
||||
.previous
|
||||
ENDPROC(memset)
|
||||
ENDPROC(__memset)
|
||||
|
||||
/*
|
||||
* ISO C memset - set a memory block to a byte value. This function uses
|
||||
@ -45,21 +56,16 @@
|
||||
*
|
||||
* rax original destination
|
||||
*/
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemset_c_e:
|
||||
ENTRY(memset_erms)
|
||||
movq %rdi,%r9
|
||||
movb %sil,%al
|
||||
movq %rdx,%rcx
|
||||
rep stosb
|
||||
movq %r9,%rax
|
||||
ret
|
||||
.Lmemset_e_e:
|
||||
.previous
|
||||
ENDPROC(memset_erms)
|
||||
|
||||
.weak memset
|
||||
|
||||
ENTRY(memset)
|
||||
ENTRY(__memset)
|
||||
ENTRY(memset_orig)
|
||||
CFI_STARTPROC
|
||||
movq %rdi,%r10
|
||||
|
||||
@ -134,23 +140,4 @@ ENTRY(__memset)
|
||||
jmp .Lafter_bad_alignment
|
||||
.Lfinal:
|
||||
CFI_ENDPROC
|
||||
ENDPROC(memset)
|
||||
ENDPROC(__memset)
|
||||
|
||||
/* Some CPUs support enhanced REP MOVSB/STOSB feature.
|
||||
* It is recommended to use this when possible.
|
||||
*
|
||||
* If enhanced REP MOVSB/STOSB feature is not available, use fast string
|
||||
* instructions.
|
||||
*
|
||||
* Otherwise, use original memset function.
|
||||
*
|
||||
* In .altinstructions section, ERMS feature is placed after REG_GOOD
|
||||
* feature to implement the right patch order.
|
||||
*/
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
|
||||
.Lfinal-__memset,.Lmemset_e-.Lmemset_c
|
||||
altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
|
||||
.Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
|
||||
.previous
|
||||
ENDPROC(memset_orig)
|
||||
|
@ -14,8 +14,8 @@
|
||||
.macro op_safe_regs op
|
||||
ENTRY(\op\()_safe_regs)
|
||||
CFI_STARTPROC
|
||||
pushq_cfi %rbx
|
||||
pushq_cfi %rbp
|
||||
pushq_cfi_reg rbx
|
||||
pushq_cfi_reg rbp
|
||||
movq %rdi, %r10 /* Save pointer */
|
||||
xorl %r11d, %r11d /* Return value */
|
||||
movl (%rdi), %eax
|
||||
@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
|
||||
movl %ebp, 20(%r10)
|
||||
movl %esi, 24(%r10)
|
||||
movl %edi, 28(%r10)
|
||||
popq_cfi %rbp
|
||||
popq_cfi %rbx
|
||||
popq_cfi_reg rbp
|
||||
popq_cfi_reg rbx
|
||||
ret
|
||||
3:
|
||||
CFI_RESTORE_STATE
|
||||
@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
|
||||
.macro op_safe_regs op
|
||||
ENTRY(\op\()_safe_regs)
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %ebx
|
||||
pushl_cfi %ebp
|
||||
pushl_cfi %esi
|
||||
pushl_cfi %edi
|
||||
pushl_cfi_reg ebx
|
||||
pushl_cfi_reg ebp
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg edi
|
||||
pushl_cfi $0 /* Return value */
|
||||
pushl_cfi %eax
|
||||
movl 4(%eax), %ecx
|
||||
@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
|
||||
movl %esi, 24(%eax)
|
||||
movl %edi, 28(%eax)
|
||||
popl_cfi %eax
|
||||
popl_cfi %edi
|
||||
popl_cfi %esi
|
||||
popl_cfi %ebp
|
||||
popl_cfi %ebx
|
||||
popl_cfi_reg edi
|
||||
popl_cfi_reg esi
|
||||
popl_cfi_reg ebp
|
||||
popl_cfi_reg ebx
|
||||
ret
|
||||
3:
|
||||
CFI_RESTORE_STATE
|
||||
|
@ -34,10 +34,10 @@
|
||||
*/
|
||||
|
||||
#define save_common_regs \
|
||||
pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
|
||||
pushl_cfi_reg ecx
|
||||
|
||||
#define restore_common_regs \
|
||||
popl_cfi %ecx; CFI_RESTORE ecx
|
||||
popl_cfi_reg ecx
|
||||
|
||||
/* Avoid uglifying the argument copying x86-64 needs to do. */
|
||||
.macro movq src, dst
|
||||
@ -64,22 +64,22 @@
|
||||
*/
|
||||
|
||||
#define save_common_regs \
|
||||
pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
|
||||
pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
|
||||
pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
|
||||
pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
|
||||
pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
|
||||
pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
|
||||
pushq_cfi %r11; CFI_REL_OFFSET r11, 0
|
||||
pushq_cfi_reg rdi; \
|
||||
pushq_cfi_reg rsi; \
|
||||
pushq_cfi_reg rcx; \
|
||||
pushq_cfi_reg r8; \
|
||||
pushq_cfi_reg r9; \
|
||||
pushq_cfi_reg r10; \
|
||||
pushq_cfi_reg r11
|
||||
|
||||
#define restore_common_regs \
|
||||
popq_cfi %r11; CFI_RESTORE r11; \
|
||||
popq_cfi %r10; CFI_RESTORE r10; \
|
||||
popq_cfi %r9; CFI_RESTORE r9; \
|
||||
popq_cfi %r8; CFI_RESTORE r8; \
|
||||
popq_cfi %rcx; CFI_RESTORE rcx; \
|
||||
popq_cfi %rsi; CFI_RESTORE rsi; \
|
||||
popq_cfi %rdi; CFI_RESTORE rdi
|
||||
popq_cfi_reg r11; \
|
||||
popq_cfi_reg r10; \
|
||||
popq_cfi_reg r9; \
|
||||
popq_cfi_reg r8; \
|
||||
popq_cfi_reg rcx; \
|
||||
popq_cfi_reg rsi; \
|
||||
popq_cfi_reg rdi
|
||||
|
||||
#endif
|
||||
|
||||
@ -87,12 +87,10 @@
|
||||
ENTRY(call_rwsem_down_read_failed)
|
||||
CFI_STARTPROC
|
||||
save_common_regs
|
||||
__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
|
||||
CFI_REL_OFFSET __ASM_REG(dx), 0
|
||||
__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
|
||||
movq %rax,%rdi
|
||||
call rwsem_down_read_failed
|
||||
__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
|
||||
CFI_RESTORE __ASM_REG(dx)
|
||||
__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
|
||||
restore_common_regs
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
|
||||
ENTRY(call_rwsem_downgrade_wake)
|
||||
CFI_STARTPROC
|
||||
save_common_regs
|
||||
__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
|
||||
CFI_REL_OFFSET __ASM_REG(dx), 0
|
||||
__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
|
||||
movq %rax,%rdi
|
||||
call rwsem_downgrade_wake
|
||||
__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
|
||||
CFI_RESTORE __ASM_REG(dx)
|
||||
__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
|
||||
restore_common_regs
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
|
@ -13,12 +13,9 @@
|
||||
.globl \name
|
||||
\name:
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %eax
|
||||
CFI_REL_OFFSET eax, 0
|
||||
pushl_cfi %ecx
|
||||
CFI_REL_OFFSET ecx, 0
|
||||
pushl_cfi %edx
|
||||
CFI_REL_OFFSET edx, 0
|
||||
pushl_cfi_reg eax
|
||||
pushl_cfi_reg ecx
|
||||
pushl_cfi_reg edx
|
||||
|
||||
.if \put_ret_addr_in_eax
|
||||
/* Place EIP in the arg1 */
|
||||
@ -26,12 +23,9 @@
|
||||
.endif
|
||||
|
||||
call \func
|
||||
popl_cfi %edx
|
||||
CFI_RESTORE edx
|
||||
popl_cfi %ecx
|
||||
CFI_RESTORE ecx
|
||||
popl_cfi %eax
|
||||
CFI_RESTORE eax
|
||||
popl_cfi_reg edx
|
||||
popl_cfi_reg ecx
|
||||
popl_cfi_reg eax
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
_ASM_NOKPROBE(\name)
|
||||
|
@ -17,9 +17,18 @@
|
||||
CFI_STARTPROC
|
||||
|
||||
/* this one pushes 9 elems, the next one would be %rIP */
|
||||
SAVE_ARGS
|
||||
pushq_cfi_reg rdi
|
||||
pushq_cfi_reg rsi
|
||||
pushq_cfi_reg rdx
|
||||
pushq_cfi_reg rcx
|
||||
pushq_cfi_reg rax
|
||||
pushq_cfi_reg r8
|
||||
pushq_cfi_reg r9
|
||||
pushq_cfi_reg r10
|
||||
pushq_cfi_reg r11
|
||||
|
||||
.if \put_ret_addr_in_rdi
|
||||
/* 9*8(%rsp) is return addr on stack */
|
||||
movq_cfi_restore 9*8, rdi
|
||||
.endif
|
||||
|
||||
@ -45,11 +54,22 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* SAVE_ARGS below is used only for the .cfi directives it contains. */
|
||||
#if defined(CONFIG_TRACE_IRQFLAGS) \
|
||||
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
|
||||
|| defined(CONFIG_PREEMPT)
|
||||
CFI_STARTPROC
|
||||
SAVE_ARGS
|
||||
CFI_ADJUST_CFA_OFFSET 9*8
|
||||
restore:
|
||||
RESTORE_ARGS
|
||||
popq_cfi_reg r11
|
||||
popq_cfi_reg r10
|
||||
popq_cfi_reg r9
|
||||
popq_cfi_reg r8
|
||||
popq_cfi_reg rax
|
||||
popq_cfi_reg rcx
|
||||
popq_cfi_reg rdx
|
||||
popq_cfi_reg rsi
|
||||
popq_cfi_reg rdi
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
_ASM_NOKPROBE(restore)
|
||||
#endif
|
||||
|
@ -273,6 +273,9 @@ dd: ESC
|
||||
de: ESC
|
||||
df: ESC
|
||||
# 0xe0 - 0xef
|
||||
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
|
||||
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
|
||||
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
|
||||
e0: LOOPNE/LOOPNZ Jb (f64)
|
||||
e1: LOOPE/LOOPZ Jb (f64)
|
||||
e2: LOOP Jb (f64)
|
||||
@ -281,6 +284,10 @@ e4: IN AL,Ib
|
||||
e5: IN eAX,Ib
|
||||
e6: OUT Ib,AL
|
||||
e7: OUT Ib,eAX
|
||||
# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
|
||||
# in "near" jumps and calls is 16-bit. For CALL,
|
||||
# push of return address is 16-bit wide, RSP is decremented by 2
|
||||
# but is not truncated to 16 bits, unlike RIP.
|
||||
e8: CALL Jz (f64)
|
||||
e9: JMP-near Jz (f64)
|
||||
ea: JMP-far Ap (i64)
|
||||
@ -456,6 +463,7 @@ AVXcode: 1
|
||||
7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
|
||||
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
|
||||
# 0x0f 0x80-0x8f
|
||||
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
|
||||
80: JO Jz (f64)
|
||||
81: JNO Jz (f64)
|
||||
82: JB/JC/JNAE Jz (f64)
|
||||
@ -842,6 +850,7 @@ EndTable
|
||||
GrpTable: Grp5
|
||||
0: INC Ev
|
||||
1: DEC Ev
|
||||
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
|
||||
2: CALLN Ev (f64)
|
||||
3: CALLF Ep
|
||||
4: JMPN Ev (f64)
|
||||
|
@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
|
||||
int ret = 0;
|
||||
|
||||
/* kprobe_running() needs smp_processor_id() */
|
||||
if (kprobes_built_in() && !user_mode_vm(regs)) {
|
||||
if (kprobes_built_in() && !user_mode(regs)) {
|
||||
preempt_disable();
|
||||
if (kprobe_running() && kprobe_fault_handler(regs, 14))
|
||||
ret = 1;
|
||||
@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||
instr = (void *)convert_ip_to_linear(current, regs);
|
||||
max_instr = instr + 15;
|
||||
|
||||
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
|
||||
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
|
||||
return 0;
|
||||
|
||||
while (instr < max_instr) {
|
||||
@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
|
||||
if (error_code & PF_USER)
|
||||
return false;
|
||||
|
||||
if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
|
||||
if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
* User-mode registers count as a user access even for any
|
||||
* potential system fault or CPU buglet:
|
||||
*/
|
||||
if (user_mode_vm(regs)) {
|
||||
if (user_mode(regs)) {
|
||||
local_irq_enable();
|
||||
error_code |= PF_USER;
|
||||
flags |= FAULT_FLAG_USER;
|
||||
|
@ -179,7 +179,8 @@ static void __init probe_page_size_mask(void)
|
||||
if (cpu_has_pge) {
|
||||
cr4_set_bits_and_update_boot(X86_CR4_PGE);
|
||||
__supported_pte_mask |= _PAGE_GLOBAL;
|
||||
}
|
||||
} else
|
||||
__supported_pte_mask &= ~_PAGE_GLOBAL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
|
||||
{
|
||||
struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
|
||||
|
||||
if (!user_mode_vm(regs)) {
|
||||
if (!user_mode(regs)) {
|
||||
unsigned long stack = kernel_stack_pointer(regs);
|
||||
if (depth)
|
||||
dump_trace(NULL, regs, (unsigned long *)stack, 0,
|
||||
|
@ -134,7 +134,7 @@ static void do_fpu_end(void)
|
||||
static void fix_processor_context(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *t = &per_cpu(init_tss, cpu);
|
||||
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
||||
#ifdef CONFIG_X86_64
|
||||
struct desc_struct *desc = get_cpu_gdt_table(cpu);
|
||||
tss_desc tss;
|
||||
|
@ -119,7 +119,7 @@
|
||||
110 i386 iopl sys_iopl
|
||||
111 i386 vhangup sys_vhangup
|
||||
112 i386 idle
|
||||
113 i386 vm86old sys_vm86old sys32_vm86_warning
|
||||
113 i386 vm86old sys_vm86old sys_ni_syscall
|
||||
114 i386 wait4 sys_wait4 compat_sys_wait4
|
||||
115 i386 swapoff sys_swapoff
|
||||
116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
|
||||
@ -172,7 +172,7 @@
|
||||
163 i386 mremap sys_mremap
|
||||
164 i386 setresuid sys_setresuid16
|
||||
165 i386 getresuid sys_getresuid16
|
||||
166 i386 vm86 sys_vm86 sys32_vm86_warning
|
||||
166 i386 vm86 sys_vm86 sys_ni_syscall
|
||||
167 i386 query_module
|
||||
168 i386 poll sys_poll
|
||||
169 i386 nfsservctl
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user