s390: add support for BEAR enhancement facility

The Breaking-Event-Address-Register (BEAR) stores the address of the
last breaking event instruction. Breaking events are usually instructions
that change the program flow - for example branches, and instructions
that modify the address in the PSW like lpswe. This is useful for debugging
wild branches, because one could easily figure out where the wild branch
was originating from.

What is problematic is that lpswe is considered a breaking event, and
therefore overwrites BEAR on kernel exit. The BEAR enhancement facility
adds new instructions that allow to save/restore BEAR and also an lpswey
instruction that doesn't cause a breaking event. So we can save BEAR on
kernel entry and restore it on exit to user space.

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
This commit is contained in:
Sven Schnelle 2021-04-07 09:20:17 +02:00 committed by Vasily Gorbik
parent 5d17d4ed7e
commit 3b051e89da
11 changed files with 87 additions and 24 deletions

View File

@ -12,6 +12,7 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <linux/jump_label.h>
struct cpuid
{
@ -21,5 +22,7 @@ struct cpuid
unsigned int unused : 16;
} __attribute__ ((packed, aligned(8)));
DECLARE_STATIC_KEY_FALSE(cpu_has_bear);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_CPU_H */

View File

@ -93,9 +93,10 @@ struct lowcore {
psw_t return_psw; /* 0x0290 */
psw_t return_mcck_psw; /* 0x02a0 */
__u64 last_break; /* 0x02b0 */
/* CPU accounting and timing values. */
__u64 sys_enter_timer; /* 0x02b0 */
__u8 pad_0x02b8[0x02c0-0x02b8]; /* 0x02b8 */
__u64 sys_enter_timer; /* 0x02b8 */
__u64 mcck_enter_timer; /* 0x02c0 */
__u64 exit_timer; /* 0x02c8 */
__u64 user_timer; /* 0x02d0 */
@ -188,7 +189,7 @@ struct lowcore {
__u32 tod_progreg_save_area; /* 0x1324 */
__u32 cpu_timer_save_area[2]; /* 0x1328 */
__u32 clock_comp_save_area[2]; /* 0x1330 */
__u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */
__u64 last_break_save_area; /* 0x1338 */
__u32 access_regs_save_area[16]; /* 0x1340 */
__u64 cregs_save_area[16]; /* 0x1380 */
__u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */

View File

@ -35,6 +35,7 @@ int main(void)
OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
OFFSET(__PT_FLAGS, pt_regs, flags);
OFFSET(__PT_CR1, pt_regs, cr1);
OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
/* stack_frame offsets */
@ -127,6 +128,7 @@ int main(void)
OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
OFFSET(__LC_GMAP, lowcore, gmap);
OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline);
OFFSET(__LC_LAST_BREAK, lowcore, last_break);
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
/* hardware defined lowcore locations 0x1000 - 0x18ff */
@ -140,6 +142,7 @@ int main(void)
OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area);
OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area);
OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area);
OFFSET(__LC_LAST_BREAK_SAVE_AREA, lowcore, last_break_save_area);
OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area);
OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area);
OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);

View File

@ -52,6 +52,22 @@ STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
_LPP_OFFSET = __LC_LPP
.macro STBEAR address
ALTERNATIVE "", ".insn s,0xb2010000,\address", 193
.endm
.macro LBEAR address
ALTERNATIVE "", ".insn s,0xb2000000,\address", 193
.endm
.macro LPSWEY address,lpswe
ALTERNATIVE "b \lpswe", ".insn siy,0xeb0000000071,\address,0", 193
.endm
.macro MBEAR reg
ALTERNATIVE "", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
.endm
.macro CHECK_STACK savearea
#ifdef CONFIG_CHECK_STACK
tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
@ -302,6 +318,7 @@ ENTRY(system_call)
BPOFF
lghi %r14,0
.Lsysc_per:
STBEAR __LC_LAST_BREAK
lctlg %c1,%c1,__LC_KERNEL_ASCE
lg %r12,__LC_CURRENT
lg %r15,__LC_KERNEL_STACK
@ -321,14 +338,16 @@ ENTRY(system_call)
xgr %r11,%r11
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
MBEAR %r2
lgr %r3,%r14
brasl %r14,__do_syscall
lctlg %c1,%c1,__LC_USER_ASCE
mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
stpt __LC_EXIT_TIMER
b __LC_RETURN_LPSWE
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
ENDPROC(system_call)
#
@ -340,9 +359,10 @@ ENTRY(ret_from_fork)
lctlg %c1,%c1,__LC_USER_ASCE
mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
stpt __LC_EXIT_TIMER
b __LC_RETURN_LPSWE
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
ENDPROC(ret_from_fork)
/*
@ -382,6 +402,7 @@ ENTRY(pgm_check_handler)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
stmg %r8,%r9,__PT_PSW(%r11)
# clear user controlled registers to prevent speculative use
@ -401,8 +422,9 @@ ENTRY(pgm_check_handler)
stpt __LC_EXIT_TIMER
.Lpgm_exit_kernel:
mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
b __LC_RETURN_LPSWE
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
#
# single stepped system call
@ -412,7 +434,8 @@ ENTRY(pgm_check_handler)
larl %r14,.Lsysc_per
stg %r14,__LC_RETURN_PSW+8
lghi %r14,1
lpswe __LC_RETURN_PSW # branch to .Lsysc_per
LBEAR __LC_PGM_LAST_BREAK
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
ENDPROC(pgm_check_handler)
/*
@ -422,6 +445,7 @@ ENDPROC(pgm_check_handler)
ENTRY(\name)
STCK __LC_INT_CLOCK
stpt __LC_SYS_ENTER_TIMER
STBEAR __LC_LAST_BREAK
BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r12,__LC_CURRENT
@ -453,6 +477,7 @@ ENTRY(\name)
xgr %r10,%r10
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
MBEAR %r11
stmg %r8,%r9,__PT_PSW(%r11)
tm %r8,0x0001 # coming from user space?
jno 1f
@ -465,8 +490,9 @@ ENTRY(\name)
lctlg %c1,%c1,__LC_USER_ASCE
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
stpt __LC_EXIT_TIMER
2: lmg %r0,%r15,__PT_R0(%r11)
b __LC_RETURN_LPSWE
2: LBEAR __PT_LAST_BREAK(%r11)
lmg %r0,%r15,__PT_R0(%r11)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
ENDPROC(\name)
.endm
@ -505,6 +531,7 @@ ENTRY(mcck_int_handler)
BPOFF
la %r1,4095 # validate r1
spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer
LBEAR __LC_LAST_BREAK_SAVE_AREA-4095(%r1) # validate bear
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs
lg %r12,__LC_CURRENT
lmg %r8,%r9,__LC_MCK_OLD_PSW
@ -591,8 +618,10 @@ ENTRY(mcck_int_handler)
jno 0f
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
stpt __LC_EXIT_TIMER
0: lmg %r11,%r15,__PT_R11(%r11)
b __LC_RETURN_MCCK_LPSWE
0: ALTERNATIVE "", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
LBEAR 0(%r12)
lmg %r11,%r15,__PT_R11(%r11)
LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
.Lmcck_panic:
/*

View File

@ -140,8 +140,11 @@ void noinstr do_io_irq(struct pt_regs *regs)
irq_enter();
if (user_mode(regs))
if (user_mode(regs)) {
update_timer_sys();
if (static_branch_likely(&cpu_has_bear))
current->thread.last_break = regs->last_break;
}
from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
if (from_idle)
@ -171,8 +174,11 @@ void noinstr do_ext_irq(struct pt_regs *regs)
irq_enter();
if (user_mode(regs))
if (user_mode(regs)) {
update_timer_sys();
if (static_branch_likely(&cpu_has_bear))
current->thread.last_break = regs->last_break;
}
regs->int_code = S390_lowcore.ext_int_code_addr;
regs->int_parm = S390_lowcore.ext_params;

View File

@ -141,7 +141,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
frame->childregs.gprs[10] = arg;
frame->childregs.gprs[11] = (unsigned long)do_exit;
frame->childregs.orig_gpr2 = -1;
frame->childregs.last_break = 1;
return 0;
}
frame->childregs = *current_pt_regs();

View File

@ -174,6 +174,8 @@ unsigned long MODULES_END;
struct lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);
DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
/*
* The Write Back bit position in the physaddr is given by the SLPC PCI.
* Leaving the mask zero always uses write through which is safe
@ -1038,6 +1040,9 @@ void __init setup_arch(char **cmdline_p)
smp_detect_cpus();
topology_init_early();
if (test_facility(193))
static_branch_enable(&cpu_has_bear);
/*
* Create kernel page tables and switch to virtual addressing.
*/

View File

@ -154,6 +154,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
regs->psw = S390_lowcore.svc_old_psw;
regs->int_code = S390_lowcore.svc_int_code;
update_timer_sys();
if (static_branch_likely(&cpu_has_bear))
current->thread.last_break = regs->last_break;
local_irq_enable();
regs->orig_gpr2 = regs->gprs[2];

View File

@ -300,7 +300,6 @@ static void (*pgm_check_table[128])(struct pt_regs *regs);
void noinstr __do_pgm_check(struct pt_regs *regs)
{
unsigned long last_break = S390_lowcore.pgm_last_break;
unsigned int trapnr;
irqentry_state_t state;
@ -311,10 +310,11 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
if (user_mode(regs)) {
update_timer_sys();
if (last_break < 4096)
last_break = 1;
current->thread.last_break = last_break;
regs->last_break = last_break;
if (!static_branch_likely(&cpu_has_bear)) {
if (regs->last_break < 4096)
regs->last_break = 1;
}
current->thread.last_break = regs->last_break;
}
if (S390_lowcore.pgm_code & 0x0200) {

View File

@ -8,6 +8,7 @@
#include <linux/kasan.h>
#include <asm/ptdump.h>
#include <asm/kasan.h>
#include <asm/nospec-branch.h>
#include <asm/sections.h>
static unsigned long max_addr;
@ -116,8 +117,13 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
return;
if (st->current_prot & _PAGE_NOEXEC)
return;
/* The first lowcore page is currently still W+X. */
if (addr == PAGE_SIZE)
/*
* The first lowcore page is W+X if spectre mitigations are using
* trampolines or the BEAR enhancements facility is not installed,
* in which case we have two lpswe instructions in lowcore that need
* to be executable.
*/
if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)))
return;
WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n",
(void *)st->start_address);
@ -203,7 +209,9 @@ void ptdump_check_wx(void)
if (st.wx_pages)
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages);
else
pr_info("Checked W+X mappings: passed, no unexpected W+X pages found\n");
pr_info("Checked W+X mappings: passed, no %sW+X pages found\n",
(nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ?
"unexpected " : "");
}
#endif /* CONFIG_DEBUG_WX */

View File

@ -13,6 +13,7 @@
#include <linux/hugetlb.h>
#include <linux/slab.h>
#include <asm/cacheflush.h>
#include <asm/nospec-branch.h>
#include <asm/pgalloc.h>
#include <asm/setup.h>
#include <asm/tlbflush.h>
@ -584,8 +585,13 @@ void __init vmem_map_init(void)
__set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
/* we need lowcore executable for our LPSWE instructions */
if (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) {
/*
* Lowcore must be executable for LPSWE
* and expoline trampoline branch instructions.
*/
set_memory_x(0, 1);
}
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);