2018-08-16 22:16:58 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* This code fills the used part of the kernel stack with a poison value
|
|
|
|
* before returning to userspace. It's part of the STACKLEAK feature
|
|
|
|
* ported from grsecurity/PaX.
|
|
|
|
*
|
|
|
|
* Author: Alexander Popov <alex.popov@linux.com>
|
|
|
|
*
|
|
|
|
* STACKLEAK reduces the information which kernel stack leak bugs can
|
|
|
|
* reveal and blocks some uninitialized stack variable attacks.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/stackleak.h>
|
2018-11-12 21:08:48 +00:00
|
|
|
#include <linux/kprobes.h>
|
2018-08-16 22:16:58 +00:00
|
|
|
|
2018-08-16 22:17:03 +00:00
|
|
|
#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
|
|
|
|
#include <linux/jump_label.h>
|
|
|
|
#include <linux/sysctl.h>
|
2022-01-22 06:12:43 +00:00
|
|
|
#include <linux/init.h>
|
2018-08-16 22:17:03 +00:00
|
|
|
|
|
|
|
static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
|
|
|
|
|
2022-01-22 06:12:43 +00:00
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
static int stack_erasing_sysctl(struct ctl_table *table, int write,
|
|
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
2018-08-16 22:17:03 +00:00
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
int state = !static_branch_unlikely(&stack_erasing_bypass);
|
|
|
|
int prev_state = state;
|
|
|
|
|
|
|
|
table->data = &state;
|
|
|
|
table->maxlen = sizeof(int);
|
|
|
|
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
|
|
state = !!state;
|
|
|
|
if (ret || !write || state == prev_state)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (state)
|
|
|
|
static_branch_disable(&stack_erasing_bypass);
|
|
|
|
else
|
|
|
|
static_branch_enable(&stack_erasing_bypass);
|
|
|
|
|
|
|
|
pr_warn("stackleak: kernel stack erasing is %s\n",
|
|
|
|
state ? "enabled" : "disabled");
|
|
|
|
return ret;
|
|
|
|
}
|
2022-01-22 06:12:43 +00:00
|
|
|
static struct ctl_table stackleak_sysctls[] = {
|
|
|
|
{
|
|
|
|
.procname = "stack_erasing",
|
|
|
|
.data = NULL,
|
|
|
|
.maxlen = sizeof(int),
|
|
|
|
.mode = 0600,
|
|
|
|
.proc_handler = stack_erasing_sysctl,
|
|
|
|
.extra1 = SYSCTL_ZERO,
|
|
|
|
.extra2 = SYSCTL_ONE,
|
|
|
|
},
|
|
|
|
{}
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init stackleak_sysctls_init(void)
|
|
|
|
{
|
|
|
|
register_sysctl_init("kernel", stackleak_sysctls);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
late_initcall(stackleak_sysctls_init);
|
|
|
|
#endif /* CONFIG_SYSCTL */
|
2018-08-16 22:17:03 +00:00
|
|
|
|
|
|
|
#define skip_erasing() static_branch_unlikely(&stack_erasing_bypass)
|
|
|
|
#else
|
|
|
|
#define skip_erasing() false
|
|
|
|
#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
|
|
|
|
|
stackleak: move skip_erasing() check earlier
In stackleak_erase() we check skip_erasing() after accessing some fields
from current. As generating the address of current uses asm which
hazards with the static branch asm, this work is always performed, even
when the static branch is patched to jump to the return at the end of the
function.
This patch avoids this redundant work by moving the skip_erasing() check
earlier.
To avoid complicating initialization within stackleak_erase(), the body
of the function is split out into a __stackleak_erase() helper, with the
check left in a wrapper function. The __stackleak_erase() helper is
marked __always_inline to ensure that this is inlined into
stackleak_erase() and not instrumented.
Before this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:
<stackleak_erase>:
65 48 8b 04 25 00 00 mov %gs:0x0,%rax
00 00
48 8b 48 20 mov 0x20(%rax),%rcx
48 8b 80 98 0a 00 00 mov 0xa98(%rax),%rax
66 90 xchg %ax,%ax <------------ static branch
48 89 c2 mov %rax,%rdx
48 29 ca sub %rcx,%rdx
48 81 fa ff 3f 00 00 cmp $0x3fff,%rdx
After this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:
<stackleak_erase>:
0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) <--- static branch
65 48 8b 04 25 00 00 mov %gs:0x0,%rax
00 00
48 8b 48 20 mov 0x20(%rax),%rcx
48 8b 80 98 0a 00 00 mov 0xa98(%rax),%rax
48 89 c2 mov %rax,%rdx
48 29 ca sub %rcx,%rdx
48 81 fa ff 3f 00 00 cmp $0x3fff,%rdx
Before this patch, on arm64 w/ GCC 11.1.0 the start of the function is:
<stackleak_erase>:
d503245f bti c
d5384100 mrs x0, sp_el0
f9401003 ldr x3, [x0, #32]
f9451000 ldr x0, [x0, #2592]
d503201f nop <------------------------------- static branch
d503233f paciasp
cb030002 sub x2, x0, x3
d287ffe1 mov x1, #0x3fff
eb01005f cmp x2, x1
After this patch, on arm64 w/ GCC 11.1.0 the start of the function is:
<stackleak_erase>:
d503245f bti c
d503201f nop <------------------------------- static branch
d503233f paciasp
d5384100 mrs x0, sp_el0
f9401003 ldr x3, [x0, #32]
d287ffe1 mov x1, #0x3fff
f9451000 ldr x0, [x0, #2592]
cb030002 sub x2, x0, x3
eb01005f cmp x2, x1
While this may not be a huge win on its own, moving the static branch
will permit further optimization of the body of the function in
subsequent patches.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220427173128.2603085-3-mark.rutland@arm.com
2022-04-27 17:31:17 +00:00
|
|
|
static __always_inline void __stackleak_erase(void)
|
2018-08-16 22:16:58 +00:00
|
|
|
{
|
stackleak: rework stack low bound handling
In stackleak_task_init(), stackleak_track_stack(), and
__stackleak_erase(), we open-code skipping the STACK_END_MAGIC at the
bottom of the stack. Each case is implemented slightly differently, and
only the __stackleak_erase() case is commented.
In stackleak_task_init() and stackleak_track_stack() we unconditionally
add sizeof(unsigned long) to the lowest stack address. In
stackleak_task_init() we use end_of_stack() for this, and in
stackleak_track_stack() we use task_stack_page(). In __stackleak_erase()
we handle this by detecting if `kstack_ptr` has hit the stack end
boundary, and if so, conditionally moving it above the magic.
This patch adds a new stackleak_task_low_bound() helper which is used in
all three cases, which unconditionally adds sizeof(unsigned long) to the
lowest address on the task stack, with commentary as to why. This uses
end_of_stack() as stackleak_task_init() did prior to this patch, as this
is consistent with the code in kernel/fork.c which initializes the
STACK_END_MAGIC value.
In __stackleak_erase() we no longer need to check whether we've spilled
into the STACK_END_MAGIC value, as stackleak_track_stack() ensures that
`current->lowest_stack` stops immediately above this, and similarly the
poison scan will stop immediately above this.
For stackleak_task_init() and stackleak_track_stack() this results in no
change to code generation. For __stackleak_erase() the generated
assembly is slightly simpler and shorter.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220427173128.2603085-5-mark.rutland@arm.com
2022-04-27 17:31:19 +00:00
|
|
|
const unsigned long task_stack_low = stackleak_task_low_bound(current);
|
2022-04-27 17:31:20 +00:00
|
|
|
unsigned long erase_low = current->lowest_stack;
|
|
|
|
unsigned long erase_high;
|
2018-08-16 22:16:58 +00:00
|
|
|
unsigned int poison_count = 0;
|
|
|
|
const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
|
|
|
|
|
|
|
|
/* Search for the poison value in the kernel stack */
|
2022-04-27 17:31:20 +00:00
|
|
|
while (erase_low > task_stack_low && poison_count <= depth) {
|
|
|
|
if (*(unsigned long *)erase_low == STACKLEAK_POISON)
|
2018-08-16 22:16:58 +00:00
|
|
|
poison_count++;
|
|
|
|
else
|
|
|
|
poison_count = 0;
|
|
|
|
|
2022-04-27 17:31:20 +00:00
|
|
|
erase_low -= sizeof(unsigned long);
|
2018-08-16 22:16:58 +00:00
|
|
|
}
|
|
|
|
|
2018-08-16 22:17:01 +00:00
|
|
|
#ifdef CONFIG_STACKLEAK_METRICS
|
2022-04-27 17:31:20 +00:00
|
|
|
current->prev_lowest_stack = erase_low;
|
2018-08-16 22:17:01 +00:00
|
|
|
#endif
|
|
|
|
|
2018-08-16 22:16:58 +00:00
|
|
|
/*
|
2022-04-27 17:31:20 +00:00
|
|
|
* Now write the poison value to the kernel stack between 'erase_low'
|
|
|
|
* and 'erase_high'. We assume that the stack pointer doesn't change
|
|
|
|
* when we write poison.
|
2018-08-16 22:16:58 +00:00
|
|
|
*/
|
|
|
|
if (on_thread_stack())
|
2022-04-27 17:31:20 +00:00
|
|
|
erase_high = current_stack_pointer;
|
2018-08-16 22:16:58 +00:00
|
|
|
else
|
2022-04-27 17:31:20 +00:00
|
|
|
erase_high = current_top_of_stack();
|
2018-08-16 22:16:58 +00:00
|
|
|
|
2022-04-27 17:31:20 +00:00
|
|
|
while (erase_low < erase_high) {
|
|
|
|
*(unsigned long *)erase_low = STACKLEAK_POISON;
|
|
|
|
erase_low += sizeof(unsigned long);
|
2018-08-16 22:16:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Reset the 'lowest_stack' value for the next syscall */
|
|
|
|
current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
|
|
|
|
}
|
|
|
|
|
stackleak: move skip_erasing() check earlier
In stackleak_erase() we check skip_erasing() after accessing some fields
from current. As generating the address of current uses asm which
hazards with the static branch asm, this work is always performed, even
when the static branch is patched to jump to the return at the end of the
function.
This patch avoids this redundant work by moving the skip_erasing() check
earlier.
To avoid complicating initialization within stackleak_erase(), the body
of the function is split out into a __stackleak_erase() helper, with the
check left in a wrapper function. The __stackleak_erase() helper is
marked __always_inline to ensure that this is inlined into
stackleak_erase() and not instrumented.
Before this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:
<stackleak_erase>:
65 48 8b 04 25 00 00 mov %gs:0x0,%rax
00 00
48 8b 48 20 mov 0x20(%rax),%rcx
48 8b 80 98 0a 00 00 mov 0xa98(%rax),%rax
66 90 xchg %ax,%ax <------------ static branch
48 89 c2 mov %rax,%rdx
48 29 ca sub %rcx,%rdx
48 81 fa ff 3f 00 00 cmp $0x3fff,%rdx
After this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:
<stackleak_erase>:
0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) <--- static branch
65 48 8b 04 25 00 00 mov %gs:0x0,%rax
00 00
48 8b 48 20 mov 0x20(%rax),%rcx
48 8b 80 98 0a 00 00 mov 0xa98(%rax),%rax
48 89 c2 mov %rax,%rdx
48 29 ca sub %rcx,%rdx
48 81 fa ff 3f 00 00 cmp $0x3fff,%rdx
Before this patch, on arm64 w/ GCC 11.1.0 the start of the function is:
<stackleak_erase>:
d503245f bti c
d5384100 mrs x0, sp_el0
f9401003 ldr x3, [x0, #32]
f9451000 ldr x0, [x0, #2592]
d503201f nop <------------------------------- static branch
d503233f paciasp
cb030002 sub x2, x0, x3
d287ffe1 mov x1, #0x3fff
eb01005f cmp x2, x1
After this patch, on arm64 w/ GCC 11.1.0 the start of the function is:
<stackleak_erase>:
d503245f bti c
d503201f nop <------------------------------- static branch
d503233f paciasp
d5384100 mrs x0, sp_el0
f9401003 ldr x3, [x0, #32]
d287ffe1 mov x1, #0x3fff
f9451000 ldr x0, [x0, #2592]
cb030002 sub x2, x0, x3
eb01005f cmp x2, x1
While this may not be a huge win on its own, moving the static branch
will permit further optimization of the body of the function in
subsequent patches.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220427173128.2603085-3-mark.rutland@arm.com
2022-04-27 17:31:17 +00:00
|
|
|
asmlinkage void noinstr stackleak_erase(void)
|
|
|
|
{
|
|
|
|
if (skip_erasing())
|
|
|
|
return;
|
|
|
|
|
|
|
|
__stackleak_erase();
|
|
|
|
}
|
|
|
|
|
2022-02-03 20:17:54 +00:00
|
|
|
void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
|
2018-08-16 22:16:59 +00:00
|
|
|
{
|
gcc-plugins/stackleak: Use asm instrumentation to avoid useless register saving
The kernel code instrumentation in stackleak gcc plugin works in two stages.
At first, stack tracking is added to GIMPLE representation of every function
(except some special cases). And later, when stack frame size info is
available, stack tracking is removed from the RTL representation of the
functions with small stack frame. There is an unwanted side-effect for these
functions: some of them do useless work with caller-saved registers.
As an example of such case, proc_sys_write without() instrumentation:
55 push %rbp
41 b8 01 00 00 00 mov $0x1,%r8d
48 89 e5 mov %rsp,%rbp
e8 11 ff ff ff callq ffffffff81284610 <proc_sys_call_handler>
5d pop %rbp
c3 retq
0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
00 00 00
proc_sys_write() with instrumentation:
55 push %rbp
48 89 e5 mov %rsp,%rbp
41 56 push %r14
41 55 push %r13
41 54 push %r12
53 push %rbx
49 89 f4 mov %rsi,%r12
48 89 fb mov %rdi,%rbx
49 89 d5 mov %rdx,%r13
49 89 ce mov %rcx,%r14
4c 89 f1 mov %r14,%rcx
4c 89 ea mov %r13,%rdx
4c 89 e6 mov %r12,%rsi
48 89 df mov %rbx,%rdi
41 b8 01 00 00 00 mov $0x1,%r8d
e8 f2 fe ff ff callq ffffffff81298e80 <proc_sys_call_handler>
5b pop %rbx
41 5c pop %r12
41 5d pop %r13
41 5e pop %r14
5d pop %rbp
c3 retq
66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
00 00
Let's improve the instrumentation to avoid this:
1. Make stackleak_track_stack() save all register that it works with.
Use no_caller_saved_registers attribute for that function. This attribute
is available for x86_64 and i386 starting from gcc-7.
2. Insert calling stackleak_track_stack() in asm:
asm volatile("call stackleak_track_stack" :: "r" (current_stack_pointer))
Here we use ASM_CALL_CONSTRAINT trick from arch/x86/include/asm/asm.h.
The input constraint is taken into account during gcc shrink-wrapping
optimization. It is needed to be sure that stackleak_track_stack() call is
inserted after the prologue of the containing function, when the stack
frame is prepared.
This work is a deep reengineering of the idea described on grsecurity blog
https://grsecurity.net/resolving_an_unfortunate_stackleak_interaction
Signed-off-by: Alexander Popov <alex.popov@linux.com>
Acked-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Link: https://lore.kernel.org/r/20200624123330.83226-5-alex.popov@linux.com
Signed-off-by: Kees Cook <keescook@chromium.org>
2020-06-24 12:33:29 +00:00
|
|
|
unsigned long sp = current_stack_pointer;
|
2018-08-16 22:16:59 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Having CONFIG_STACKLEAK_TRACK_MIN_SIZE larger than
|
|
|
|
* STACKLEAK_SEARCH_DEPTH makes the poison search in
|
|
|
|
* stackleak_erase() unreliable. Let's prevent that.
|
|
|
|
*/
|
|
|
|
BUILD_BUG_ON(CONFIG_STACKLEAK_TRACK_MIN_SIZE > STACKLEAK_SEARCH_DEPTH);
|
|
|
|
|
gcc-plugins/stackleak: Use asm instrumentation to avoid useless register saving
The kernel code instrumentation in stackleak gcc plugin works in two stages.
At first, stack tracking is added to GIMPLE representation of every function
(except some special cases). And later, when stack frame size info is
available, stack tracking is removed from the RTL representation of the
functions with small stack frame. There is an unwanted side-effect for these
functions: some of them do useless work with caller-saved registers.
As an example of such case, proc_sys_write without() instrumentation:
55 push %rbp
41 b8 01 00 00 00 mov $0x1,%r8d
48 89 e5 mov %rsp,%rbp
e8 11 ff ff ff callq ffffffff81284610 <proc_sys_call_handler>
5d pop %rbp
c3 retq
0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
00 00 00
proc_sys_write() with instrumentation:
55 push %rbp
48 89 e5 mov %rsp,%rbp
41 56 push %r14
41 55 push %r13
41 54 push %r12
53 push %rbx
49 89 f4 mov %rsi,%r12
48 89 fb mov %rdi,%rbx
49 89 d5 mov %rdx,%r13
49 89 ce mov %rcx,%r14
4c 89 f1 mov %r14,%rcx
4c 89 ea mov %r13,%rdx
4c 89 e6 mov %r12,%rsi
48 89 df mov %rbx,%rdi
41 b8 01 00 00 00 mov $0x1,%r8d
e8 f2 fe ff ff callq ffffffff81298e80 <proc_sys_call_handler>
5b pop %rbx
41 5c pop %r12
41 5d pop %r13
41 5e pop %r14
5d pop %rbp
c3 retq
66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
00 00
Let's improve the instrumentation to avoid this:
1. Make stackleak_track_stack() save all register that it works with.
Use no_caller_saved_registers attribute for that function. This attribute
is available for x86_64 and i386 starting from gcc-7.
2. Insert calling stackleak_track_stack() in asm:
asm volatile("call stackleak_track_stack" :: "r" (current_stack_pointer))
Here we use ASM_CALL_CONSTRAINT trick from arch/x86/include/asm/asm.h.
The input constraint is taken into account during gcc shrink-wrapping
optimization. It is needed to be sure that stackleak_track_stack() call is
inserted after the prologue of the containing function, when the stack
frame is prepared.
This work is a deep reengineering of the idea described on grsecurity blog
https://grsecurity.net/resolving_an_unfortunate_stackleak_interaction
Signed-off-by: Alexander Popov <alex.popov@linux.com>
Acked-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Link: https://lore.kernel.org/r/20200624123330.83226-5-alex.popov@linux.com
Signed-off-by: Kees Cook <keescook@chromium.org>
2020-06-24 12:33:29 +00:00
|
|
|
/* 'lowest_stack' should be aligned on the register width boundary */
|
|
|
|
sp = ALIGN(sp, sizeof(unsigned long));
|
2018-08-16 22:16:59 +00:00
|
|
|
if (sp < current->lowest_stack &&
|
stackleak: rework stack low bound handling
In stackleak_task_init(), stackleak_track_stack(), and
__stackleak_erase(), we open-code skipping the STACK_END_MAGIC at the
bottom of the stack. Each case is implemented slightly differently, and
only the __stackleak_erase() case is commented.
In stackleak_task_init() and stackleak_track_stack() we unconditionally
add sizeof(unsigned long) to the lowest stack address. In
stackleak_task_init() we use end_of_stack() for this, and in
stackleak_track_stack() we use task_stack_page(). In __stackleak_erase()
we handle this by detecting if `kstack_ptr` has hit the stack end
boundary, and if so, conditionally moving it above the magic.
This patch adds a new stackleak_task_low_bound() helper which is used in
all three cases, which unconditionally adds sizeof(unsigned long) to the
lowest address on the task stack, with commentary as to why. This uses
end_of_stack() as stackleak_task_init() did prior to this patch, as this
is consistent with the code in kernel/fork.c which initializes the
STACK_END_MAGIC value.
In __stackleak_erase() we no longer need to check whether we've spilled
into the STACK_END_MAGIC value, as stackleak_track_stack() ensures that
`current->lowest_stack` stops immediately above this, and similarly the
poison scan will stop immediately above this.
For stackleak_task_init() and stackleak_track_stack() this results in no
change to code generation. For __stackleak_erase() the generated
assembly is slightly simpler and shorter.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220427173128.2603085-5-mark.rutland@arm.com
2022-04-27 17:31:19 +00:00
|
|
|
sp >= stackleak_task_low_bound(current)) {
|
2018-08-16 22:16:59 +00:00
|
|
|
current->lowest_stack = sp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(stackleak_track_stack);
|