2018-08-17 01:16:58 +03:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef _LINUX_STACKLEAK_H
|
|
|
|
#define _LINUX_STACKLEAK_H
|
|
|
|
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/sched/task_stack.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check that the poison value points to the unused hole in the
|
|
|
|
* virtual memory map for your platform.
|
|
|
|
*/
|
|
|
|
#define STACKLEAK_POISON -0xBEEF
|
|
|
|
#define STACKLEAK_SEARCH_DEPTH 128
|
|
|
|
|
|
|
|
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
|
|
|
|
#include <asm/stacktrace.h>
|
2023-11-08 13:58:27 +01:00
|
|
|
#include <linux/linkage.h>
|
2018-08-17 01:16:58 +03:00
|
|
|
|
stackleak: rework stack low bound handling
In stackleak_task_init(), stackleak_track_stack(), and
__stackleak_erase(), we open-code skipping the STACK_END_MAGIC at the
bottom of the stack. Each case is implemented slightly differently, and
only the __stackleak_erase() case is commented.
In stackleak_task_init() and stackleak_track_stack() we unconditionally
add sizeof(unsigned long) to the lowest stack address. In
stackleak_task_init() we use end_of_stack() for this, and in
stackleak_track_stack() we use task_stack_page(). In __stackleak_erase()
we handle this by detecting if `kstack_ptr` has hit the stack end
boundary, and if so, conditionally moving it above the magic.
This patch adds a new stackleak_task_low_bound() helper which is used in
all three cases, which unconditionally adds sizeof(unsigned long) to the
lowest address on the task stack, with commentary as to why. This uses
end_of_stack() as stackleak_task_init() did prior to this patch, as this
is consistent with the code in kernel/fork.c which initializes the
STACK_END_MAGIC value.
In __stackleak_erase() we no longer need to check whether we've spilled
into the STACK_END_MAGIC value, as stackleak_track_stack() ensures that
`current->lowest_stack` stops immediately above this, and similarly the
poison scan will stop immediately above this.
For stackleak_task_init() and stackleak_track_stack() this results in no
change to code generation. For __stackleak_erase() the generated
assembly is slightly simpler and shorter.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220427173128.2603085-5-mark.rutland@arm.com
2022-04-27 18:31:19 +01:00
|
|
|
/*
|
|
|
|
* The lowest address on tsk's stack which we can plausibly erase.
|
|
|
|
*/
|
|
|
|
static __always_inline unsigned long
|
|
|
|
stackleak_task_low_bound(const struct task_struct *tsk)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The lowest unsigned long on the task stack contains STACK_END_MAGIC,
|
|
|
|
* which we must not corrupt.
|
|
|
|
*/
|
|
|
|
return (unsigned long)end_of_stack(tsk) + sizeof(unsigned long);
|
|
|
|
}
|
|
|
|
|
stackleak: rework stack high bound handling
Prior to returning to userspace, we reset current->lowest_stack to a
reasonable high bound. Currently we do this by subtracting the arbitrary
value `THREAD_SIZE/64` from the top of the stack, for reasons lost to
history.
Looking at configurations today:
* On i386 where THREAD_SIZE is 8K, the bound will be 128 bytes. The
pt_regs at the top of the stack is 68 bytes (with 0 to 16 bytes of
padding above), and so this covers an additional portion of 44 to 60
bytes.
* On x86_64 where THREAD_SIZE is at least 16K (up to 32K with KASAN) the
bound will be at least 256 bytes (up to 512 with KASAN). The pt_regs
at the top of the stack is 168 bytes, and so this cover an additional
88 bytes of stack (up to 344 with KASAN).
* On arm64 where THREAD_SIZE is at least 16K (up to 64K with 64K pages
and VMAP_STACK), the bound will be at least 256 bytes (up to 1024 with
KASAN). The pt_regs at the top of the stack is 336 bytes, so this can
fall within the pt_regs, or can cover an additional 688 bytes of
stack.
Clearly the `THREAD_SIZE/64` value doesn't make much sense -- in the
worst case, this will cause more than 600 bytes of stack to be erased
for every syscall, even if actual stack usage were substantially
smaller.
This patches makes this slightly less nonsensical by consistently
resetting current->lowest_stack to the base of the task pt_regs. For
clarity and for consistency with the handling of the low bound, the
generation of the high bound is split into a helper with commentary
explaining why.
Since the pt_regs at the top of the stack will be clobbered upon the
next exception entry, we don't need to poison these at exception exit.
By using task_pt_regs() as the high stack boundary instead of
current_top_of_stack() we avoid some redundant poisoning, and the
compiler can share the address generation between the poisoning and
resetting of `current->lowest_stack`, making the generated code more
optimal.
It's not clear to me whether the existing `THREAD_SIZE/64` offset was a
dodgy heuristic to skip the pt_regs, or whether it was attempting to
minimize the number of times stackleak_check_stack() would have to
update `current->lowest_stack` when stack usage was shallow at the cost
of unconditionally poisoning a small portion of the stack for every exit
to userspace.
For now I've simply removed the offset, and if we need/want to minimize
updates for shallow stack usage it should be easy to add a better
heuristic atop, with appropriate commentary so we know what's going on.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220427173128.2603085-7-mark.rutland@arm.com
2022-04-27 18:31:21 +01:00
|
|
|
/*
|
|
|
|
* The address immediately after the highest address on tsk's stack which we
|
|
|
|
* can plausibly erase.
|
|
|
|
*/
|
|
|
|
static __always_inline unsigned long
|
|
|
|
stackleak_task_high_bound(const struct task_struct *tsk)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The task's pt_regs lives at the top of the task stack and will be
|
|
|
|
* overwritten by exception entry, so there's no need to erase them.
|
|
|
|
*/
|
|
|
|
return (unsigned long)task_pt_regs(tsk);
|
|
|
|
}
|
|
|
|
|
stackleak: rework poison scanning
Currently we over-estimate the region of stack which must be erased.
To determine the region to be erased, we scan downwards for a contiguous
block of poison values (or the low bound of the stack). There are a few
minor problems with this today:
* When we find a block of poison values, we include this block within
the region to erase.
As this is included within the region to erase, this causes us to
redundantly overwrite 'STACKLEAK_SEARCH_DEPTH' (128) bytes with
poison.
* As the loop condition checks 'poison_count <= depth', it will run an
additional iteration after finding the contiguous block of poison,
decrementing 'erase_low' once more than necessary.
As this is included within the region to erase, this causes us to
redundantly overwrite an additional unsigned long with poison.
* As we always decrement 'erase_low' after checking an element on the
stack, we always include the element below this within the region to
erase.
As this is included within the region to erase, this causes us to
redundantly overwrite an additional unsigned long with poison.
Note that this is not a functional problem. As the loop condition
checks 'erase_low > task_stack_low', we'll never clobber the
STACK_END_MAGIC. As we always decrement 'erase_low' after this, we'll
never fail to erase the element immediately above the STACK_END_MAGIC.
In total, this can cause us to erase `128 + 2 * sizeof(unsigned long)`
bytes more than necessary, which is unfortunate.
This patch reworks the logic to find the address immediately above the
poisoned region, by finding the lowest non-poisoned address. This is
factored into a stackleak_find_top_of_poison() helper both for clarity
and so that this can be shared with the LKDTM test in subsequent
patches.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220427173128.2603085-8-mark.rutland@arm.com
2022-04-27 18:31:22 +01:00
|
|
|
/*
|
|
|
|
* Find the address immediately above the poisoned region of the stack, where
|
|
|
|
* that region falls between 'low' (inclusive) and 'high' (exclusive).
|
|
|
|
*/
|
|
|
|
static __always_inline unsigned long
|
|
|
|
stackleak_find_top_of_poison(const unsigned long low, const unsigned long high)
|
|
|
|
{
|
|
|
|
const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
|
|
|
|
unsigned int poison_count = 0;
|
|
|
|
unsigned long poison_high = high;
|
|
|
|
unsigned long sp = high;
|
|
|
|
|
|
|
|
while (sp > low && poison_count < depth) {
|
|
|
|
sp -= sizeof(unsigned long);
|
|
|
|
|
|
|
|
if (*(unsigned long *)sp == STACKLEAK_POISON) {
|
|
|
|
poison_count++;
|
|
|
|
} else {
|
|
|
|
poison_count = 0;
|
|
|
|
poison_high = sp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return poison_high;
|
|
|
|
}
|
|
|
|
|
2018-08-17 01:16:58 +03:00
|
|
|
static inline void stackleak_task_init(struct task_struct *t)
|
|
|
|
{
|
stackleak: rework stack low bound handling
In stackleak_task_init(), stackleak_track_stack(), and
__stackleak_erase(), we open-code skipping the STACK_END_MAGIC at the
bottom of the stack. Each case is implemented slightly differently, and
only the __stackleak_erase() case is commented.
In stackleak_task_init() and stackleak_track_stack() we unconditionally
add sizeof(unsigned long) to the lowest stack address. In
stackleak_task_init() we use end_of_stack() for this, and in
stackleak_track_stack() we use task_stack_page(). In __stackleak_erase()
we handle this by detecting if `kstack_ptr` has hit the stack end
boundary, and if so, conditionally moving it above the magic.
This patch adds a new stackleak_task_low_bound() helper which is used in
all three cases, which unconditionally adds sizeof(unsigned long) to the
lowest address on the task stack, with commentary as to why. This uses
end_of_stack() as stackleak_task_init() did prior to this patch, as this
is consistent with the code in kernel/fork.c which initializes the
STACK_END_MAGIC value.
In __stackleak_erase() we no longer need to check whether we've spilled
into the STACK_END_MAGIC value, as stackleak_track_stack() ensures that
`current->lowest_stack` stops immediately above this, and similarly the
poison scan will stop immediately above this.
For stackleak_task_init() and stackleak_track_stack() this results in no
change to code generation. For __stackleak_erase() the generated
assembly is slightly simpler and shorter.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220427173128.2603085-5-mark.rutland@arm.com
2022-04-27 18:31:19 +01:00
|
|
|
t->lowest_stack = stackleak_task_low_bound(t);
|
2018-08-17 01:17:01 +03:00
|
|
|
# ifdef CONFIG_STACKLEAK_METRICS
|
|
|
|
t->prev_lowest_stack = t->lowest_stack;
|
|
|
|
# endif
|
2018-08-17 01:16:58 +03:00
|
|
|
}
|
2018-08-17 01:17:03 +03:00
|
|
|
|
2023-11-08 13:58:27 +01:00
|
|
|
asmlinkage void noinstr stackleak_erase(void);
|
|
|
|
asmlinkage void noinstr stackleak_erase_on_task_stack(void);
|
|
|
|
asmlinkage void noinstr stackleak_erase_off_task_stack(void);
|
|
|
|
void __no_caller_saved_registers noinstr stackleak_track_stack(void);
|
|
|
|
|
2018-08-17 01:16:58 +03:00
|
|
|
#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
|
|
|
|
static inline void stackleak_task_init(struct task_struct *t) { }
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|