mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-29 17:23:36 +00:00
Add x86 shadow stack support
Convert IBT selftest to asm to fix objtool warning -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEV76QKkVc4xCGURexaDWVMHDJkrAFAmTv1QQACgkQaDWVMHDJ krAUwhAAn6TOwHJK8BSkHeiQhON1nrlP3c5cv0AyZ2NP8RYDrZrSZvhpYBJ6wgKC Cx5CGq5nn9twYsYS3KsktLKDfR3lRdsQ7K9qtyFtYiaeaVKo+7gEKl/K+klwai8/ gninQWHk0zmSCja8Vi77q52WOMkQKapT8+vaON9EVDO8dVEi+CvhAIfPwMafuiwO Rk4X86SzoZu9FP79LcCg9XyGC/XbM2OG9eNUTSCKT40qTTKm5y4gix687NvAlaHR ko5MTsdl0Wfp6Qk0ohT74LnoA2c1g/FluvZIM33ci/2rFpkf9Hw7ip3lUXqn6CPx rKiZ+pVRc0xikVWkraMfIGMJfUd2rhelp8OyoozD7DB7UZw40Q4RW4N5tgq9Fhe9 MQs3p1v9N8xHdRKl365UcOczUxNAmv4u0nV5gY/4FMC6VjldCl2V9fmqYXyzFS4/ Ogg4FSd7c2JyGFKPs+5uXyi+RY2qOX4+nzHOoKD7SY616IYqtgKoz5usxETLwZ6s VtJOmJL0h//z0A7tBliB0zd+SQ5UQQBDC2XouQH2fNX2isJMn0UDmWJGjaHgK6Hh 8jVp6LNqf+CEQS387UxckOyj7fu438hDky1Ggaw4YqowEOhQeqLVO4++x+HITrbp AupXfbJw9h9cMN63Yc0gVxXQ9IMZ+M7UxLtZ3Cd8/PVztNy/clA= =3UUm -----END PGP SIGNATURE----- Merge tag 'x86_shstk_for_6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 shadow stack support from Dave Hansen: "This is the long awaited x86 shadow stack support, part of Intel's Control-flow Enforcement Technology (CET). CET consists of two related security features: shadow stacks and indirect branch tracking. This series implements just the shadow stack part of this feature, and just for userspace. The main use case for shadow stack is providing protection against return oriented programming attacks. It works by maintaining a secondary (shadow) stack using a special memory type that has protections against modification. When executing a CALL instruction, the processor pushes the return address to both the normal stack and to the special permission shadow stack. Upon RET, the processor pops the shadow stack copy and compares it to the normal stack copy. For more information, refer to the links below for the earlier versions of this patch set" Link: https://lore.kernel.org/lkml/20220130211838.8382-1-rick.p.edgecombe@intel.com/ Link: https://lore.kernel.org/lkml/20230613001108.3040476-1-rick.p.edgecombe@intel.com/ * tag 'x86_shstk_for_6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits) x86/shstk: Change order of __user in type x86/ibt: Convert IBT selftest to asm x86/shstk: Don't retry vm_munmap() on -EINTR x86/kbuild: Fix Documentation/ reference x86/shstk: Move arch detail comment out of core mm x86/shstk: Add ARCH_SHSTK_STATUS x86/shstk: Add ARCH_SHSTK_UNLOCK x86: Add PTRACE interface for shadow stack selftests/x86: Add shadow stack test x86/cpufeatures: Enable CET CR4 bit for shadow stack x86/shstk: Wire in shadow stack interface x86: Expose thread features in /proc/$PID/status x86/shstk: Support WRSS for userspace x86/shstk: Introduce map_shadow_stack syscall x86/shstk: Check that signal frame is shadow stack mem x86/shstk: Check that SSP is aligned on sigreturn x86/shstk: Handle signals for shadow stack x86/shstk: Introduce routines modifying shstk x86/shstk: Handle thread shadow stack x86/shstk: Add user-mode shadow stack support ...
This commit is contained in:
commit
df57721f9a
@ -22,6 +22,7 @@ x86-specific Documentation
|
||||
mtrr
|
||||
pat
|
||||
intel-hfi
|
||||
shstk
|
||||
iommu
|
||||
intel_txt
|
||||
amd-memory-encryption
|
||||
|
179
Documentation/arch/x86/shstk.rst
Normal file
179
Documentation/arch/x86/shstk.rst
Normal file
@ -0,0 +1,179 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
======================================================
|
||||
Control-flow Enforcement Technology (CET) Shadow Stack
|
||||
======================================================
|
||||
|
||||
CET Background
|
||||
==============
|
||||
|
||||
Control-flow Enforcement Technology (CET) covers several related x86 processor
|
||||
features that provide protection against control flow hijacking attacks. CET
|
||||
can protect both applications and the kernel.
|
||||
|
||||
CET introduces shadow stack and indirect branch tracking (IBT). A shadow stack
|
||||
is a secondary stack allocated from memory which cannot be directly modified by
|
||||
applications. When executing a CALL instruction, the processor pushes the
|
||||
return address to both the normal stack and the shadow stack. Upon
|
||||
function return, the processor pops the shadow stack copy and compares it
|
||||
to the normal stack copy. If the two differ, the processor raises a
|
||||
control-protection fault. IBT verifies indirect CALL/JMP targets are intended
|
||||
as marked by the compiler with 'ENDBR' opcodes. Not all CPU's have both Shadow
|
||||
Stack and Indirect Branch Tracking. Today in the 64-bit kernel, only userspace
|
||||
shadow stack and kernel IBT are supported.
|
||||
|
||||
Requirements to use Shadow Stack
|
||||
================================
|
||||
|
||||
To use userspace shadow stack you need HW that supports it, a kernel
|
||||
configured with it and userspace libraries compiled with it.
|
||||
|
||||
The kernel Kconfig option is X86_USER_SHADOW_STACK. When compiled in, shadow
|
||||
stacks can be disabled at runtime with the kernel parameter: nousershstk.
|
||||
|
||||
To build a user shadow stack enabled kernel, Binutils v2.29 or LLVM v6 or later
|
||||
are required.
|
||||
|
||||
At run time, /proc/cpuinfo shows CET features if the processor supports
|
||||
CET. "user_shstk" means that userspace shadow stack is supported on the current
|
||||
kernel and HW.
|
||||
|
||||
Application Enabling
|
||||
====================
|
||||
|
||||
An application's CET capability is marked in its ELF note and can be verified
|
||||
from readelf/llvm-readelf output::
|
||||
|
||||
readelf -n <application> | grep -a SHSTK
|
||||
properties: x86 feature: SHSTK
|
||||
|
||||
The kernel does not process these applications markers directly. Applications
|
||||
or loaders must enable CET features using the interface described in section 4.
|
||||
Typically this would be done in dynamic loader or static runtime objects, as is
|
||||
the case in GLIBC.
|
||||
|
||||
Enabling arch_prctl()'s
|
||||
=======================
|
||||
|
||||
Elf features should be enabled by the loader using the below arch_prctl's. They
|
||||
are only supported in 64 bit user applications. These operate on the features
|
||||
on a per-thread basis. The enablement status is inherited on clone, so if the
|
||||
feature is enabled on the first thread, it will propagate to all the thread's
|
||||
in an app.
|
||||
|
||||
arch_prctl(ARCH_SHSTK_ENABLE, unsigned long feature)
|
||||
Enable a single feature specified in 'feature'. Can only operate on
|
||||
one feature at a time.
|
||||
|
||||
arch_prctl(ARCH_SHSTK_DISABLE, unsigned long feature)
|
||||
Disable a single feature specified in 'feature'. Can only operate on
|
||||
one feature at a time.
|
||||
|
||||
arch_prctl(ARCH_SHSTK_LOCK, unsigned long features)
|
||||
Lock in features at their current enabled or disabled status. 'features'
|
||||
is a mask of all features to lock. All bits set are processed, unset bits
|
||||
are ignored. The mask is ORed with the existing value. So any feature bits
|
||||
set here cannot be enabled or disabled afterwards.
|
||||
|
||||
arch_prctl(ARCH_SHSTK_UNLOCK, unsigned long features)
|
||||
Unlock features. 'features' is a mask of all features to unlock. All
|
||||
bits set are processed, unset bits are ignored. Only works via ptrace.
|
||||
|
||||
arch_prctl(ARCH_SHSTK_STATUS, unsigned long addr)
|
||||
Copy the currently enabled features to the address passed in addr. The
|
||||
features are described using the bits passed into the others in
|
||||
'features'.
|
||||
|
||||
The return values are as follows. On success, return 0. On error, errno can
|
||||
be::
|
||||
|
||||
-EPERM if any of the passed feature are locked.
|
||||
-ENOTSUPP if the feature is not supported by the hardware or
|
||||
kernel.
|
||||
-EINVAL arguments (non existing feature, etc)
|
||||
-EFAULT if could not copy information back to userspace
|
||||
|
||||
The feature's bits supported are::
|
||||
|
||||
ARCH_SHSTK_SHSTK - Shadow stack
|
||||
ARCH_SHSTK_WRSS - WRSS
|
||||
|
||||
Currently shadow stack and WRSS are supported via this interface. WRSS
|
||||
can only be enabled with shadow stack, and is automatically disabled
|
||||
if shadow stack is disabled.
|
||||
|
||||
Proc Status
|
||||
===========
|
||||
To check if an application is actually running with shadow stack, the
|
||||
user can read the /proc/$PID/status. It will report "wrss" or "shstk"
|
||||
depending on what is enabled. The lines look like this::
|
||||
|
||||
x86_Thread_features: shstk wrss
|
||||
x86_Thread_features_locked: shstk wrss
|
||||
|
||||
Implementation of the Shadow Stack
|
||||
==================================
|
||||
|
||||
Shadow Stack Size
|
||||
-----------------
|
||||
|
||||
A task's shadow stack is allocated from memory to a fixed size of
|
||||
MIN(RLIMIT_STACK, 4 GB). In other words, the shadow stack is allocated to
|
||||
the maximum size of the normal stack, but capped to 4 GB. In the case
|
||||
of the clone3 syscall, there is a stack size passed in and shadow stack
|
||||
uses this instead of the rlimit.
|
||||
|
||||
Signal
|
||||
------
|
||||
|
||||
The main program and its signal handlers use the same shadow stack. Because
|
||||
the shadow stack stores only return addresses, a large shadow stack covers
|
||||
the condition that both the program stack and the signal alternate stack run
|
||||
out.
|
||||
|
||||
When a signal happens, the old pre-signal state is pushed on the stack. When
|
||||
shadow stack is enabled, the shadow stack specific state is pushed onto the
|
||||
shadow stack. Today this is only the old SSP (shadow stack pointer), pushed
|
||||
in a special format with bit 63 set. On sigreturn this old SSP token is
|
||||
verified and restored by the kernel. The kernel will also push the normal
|
||||
restorer address to the shadow stack to help userspace avoid a shadow stack
|
||||
violation on the sigreturn path that goes through the restorer.
|
||||
|
||||
So the shadow stack signal frame format is as follows::
|
||||
|
||||
|1...old SSP| - Pointer to old pre-signal ssp in sigframe token format
|
||||
(bit 63 set to 1)
|
||||
| ...| - Other state may be added in the future
|
||||
|
||||
|
||||
32 bit ABI signals are not supported in shadow stack processes. Linux prevents
|
||||
32 bit execution while shadow stack is enabled by the allocating shadow stacks
|
||||
outside of the 32 bit address space. When execution enters 32 bit mode, either
|
||||
via far call or returning to userspace, a #GP is generated by the hardware
|
||||
which, will be delivered to the process as a segfault. When transitioning to
|
||||
userspace the register's state will be as if the userspace ip being returned to
|
||||
caused the segfault.
|
||||
|
||||
Fork
|
||||
----
|
||||
|
||||
The shadow stack's vma has VM_SHADOW_STACK flag set; its PTEs are required
|
||||
to be read-only and dirty. When a shadow stack PTE is not RO and dirty, a
|
||||
shadow access triggers a page fault with the shadow stack access bit set
|
||||
in the page fault error code.
|
||||
|
||||
When a task forks a child, its shadow stack PTEs are copied and both the
|
||||
parent's and the child's shadow stack PTEs are cleared of the dirty bit.
|
||||
Upon the next shadow stack access, the resulting shadow stack page fault
|
||||
is handled by page copy/re-use.
|
||||
|
||||
When a pthread child is created, the kernel allocates a new shadow stack
|
||||
for the new thread. New shadow stack creation behaves like mmap() with respect
|
||||
to ASLR behavior. Similarly, on thread exit the thread's shadow stack is
|
||||
disabled.
|
||||
|
||||
Exec
|
||||
----
|
||||
|
||||
On exec, shadow stack features are disabled by the kernel. At which point,
|
||||
userspace can choose to re-enable, or lock them.
|
@ -566,6 +566,7 @@ encoded manner. The codes are the following:
|
||||
mt arm64 MTE allocation tags are enabled
|
||||
um userfaultfd missing tracking
|
||||
uw userfaultfd wr-protect tracking
|
||||
ss shadow stack page
|
||||
== =======================================
|
||||
|
||||
Note that there is no guarantee that every flag and associated mnemonic will
|
||||
|
@ -46,7 +46,11 @@ PTE Page Table Helpers
|
||||
+---------------------------+--------------------------------------------------+
|
||||
| pte_mkclean | Creates a clean PTE |
|
||||
+---------------------------+--------------------------------------------------+
|
||||
| pte_mkwrite | Creates a writable PTE |
|
||||
| pte_mkwrite | Creates a writable PTE of the type specified by |
|
||||
| | the VMA. |
|
||||
+---------------------------+--------------------------------------------------+
|
||||
| pte_mkwrite_novma | Creates a writable PTE, of the conventional type |
|
||||
| | of writable. |
|
||||
+---------------------------+--------------------------------------------------+
|
||||
| pte_wrprotect | Creates a write protected PTE |
|
||||
+---------------------------+--------------------------------------------------+
|
||||
@ -118,7 +122,11 @@ PMD Page Table Helpers
|
||||
+---------------------------+--------------------------------------------------+
|
||||
| pmd_mkclean | Creates a clean PMD |
|
||||
+---------------------------+--------------------------------------------------+
|
||||
| pmd_mkwrite | Creates a writable PMD |
|
||||
| pmd_mkwrite | Creates a writable PMD of the type specified by |
|
||||
| | the VMA. |
|
||||
+---------------------------+--------------------------------------------------+
|
||||
| pmd_mkwrite_novma | Creates a writable PMD, of the conventional type |
|
||||
| | of writable. |
|
||||
+---------------------------+--------------------------------------------------+
|
||||
| pmd_wrprotect | Creates a write protected PMD |
|
||||
+---------------------------+--------------------------------------------------+
|
||||
|
@ -931,6 +931,14 @@ config HAVE_ARCH_HUGE_VMALLOC
|
||||
config ARCH_WANT_HUGE_PMD_SHARE
|
||||
bool
|
||||
|
||||
# Archs that want to use pmd_mkwrite on kernel memory need it defined even
|
||||
# if there are no userspace memory management features that use it
|
||||
config ARCH_WANT_KERNEL_PMD_MKWRITE
|
||||
bool
|
||||
|
||||
config ARCH_WANT_PMD_MKWRITE
|
||||
def_bool TRANSPARENT_HUGEPAGE || ARCH_WANT_KERNEL_PMD_MKWRITE
|
||||
|
||||
config HAVE_ARCH_SOFT_DIRTY
|
||||
bool
|
||||
|
||||
|
@ -256,7 +256,7 @@ extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;
|
||||
extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; }
|
||||
extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(__DIRTY_BITS); return pte; }
|
||||
extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~(__ACCESS_BITS); return pte; }
|
||||
extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return pte; }
|
||||
extern inline pte_t pte_mkwrite_novma(pte_t pte){ pte_val(pte) &= ~_PAGE_FOW; return pte; }
|
||||
extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; }
|
||||
extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; }
|
||||
|
||||
|
@ -21,7 +21,7 @@ static inline pmd_t pte_pmd(pte_t pte)
|
||||
}
|
||||
|
||||
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
|
||||
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
|
||||
#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
|
||||
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
|
||||
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
|
||||
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
|
||||
|
@ -87,7 +87,7 @@
|
||||
|
||||
PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT));
|
||||
PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE));
|
||||
PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE));
|
||||
PTE_BIT_FUNC(mkwrite_novma, |= (_PAGE_WRITE));
|
||||
PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY));
|
||||
PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY));
|
||||
PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED));
|
||||
|
@ -202,7 +202,7 @@ static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
|
||||
|
||||
PMD_BIT_FUNC(wrprotect, |= L_PMD_SECT_RDONLY);
|
||||
PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
|
||||
PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY);
|
||||
PMD_BIT_FUNC(mkwrite_novma, &= ~L_PMD_SECT_RDONLY);
|
||||
PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY);
|
||||
PMD_BIT_FUNC(mkclean, &= ~L_PMD_SECT_DIRTY);
|
||||
PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
|
||||
|
@ -228,7 +228,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
|
||||
return set_pte_bit(pte, __pgprot(L_PTE_RDONLY));
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
return clear_pte_bit(pte, __pgprot(L_PTE_RDONLY));
|
||||
}
|
||||
|
@ -682,7 +682,7 @@ asmlinkage void do_rseq_syscall(struct pt_regs *regs)
|
||||
*/
|
||||
static_assert(NSIGILL == 11);
|
||||
static_assert(NSIGFPE == 15);
|
||||
static_assert(NSIGSEGV == 9);
|
||||
static_assert(NSIGSEGV == 10);
|
||||
static_assert(NSIGBUS == 5);
|
||||
static_assert(NSIGTRAP == 6);
|
||||
static_assert(NSIGCHLD == 6);
|
||||
|
@ -181,7 +181,7 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
|
||||
return pmd;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
|
||||
pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
|
||||
@ -487,7 +487,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
|
||||
#define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
|
||||
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
|
||||
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
|
||||
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
|
||||
#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
|
||||
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
|
||||
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
|
||||
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
|
||||
|
@ -1344,7 +1344,7 @@ void __init minsigstksz_setup(void)
|
||||
*/
|
||||
static_assert(NSIGILL == 11);
|
||||
static_assert(NSIGFPE == 15);
|
||||
static_assert(NSIGSEGV == 9);
|
||||
static_assert(NSIGSEGV == 10);
|
||||
static_assert(NSIGBUS == 5);
|
||||
static_assert(NSIGTRAP == 6);
|
||||
static_assert(NSIGCHLD == 6);
|
||||
|
@ -460,7 +460,7 @@ void compat_setup_restart_syscall(struct pt_regs *regs)
|
||||
*/
|
||||
static_assert(NSIGILL == 11);
|
||||
static_assert(NSIGFPE == 15);
|
||||
static_assert(NSIGSEGV == 9);
|
||||
static_assert(NSIGSEGV == 10);
|
||||
static_assert(NSIGBUS == 5);
|
||||
static_assert(NSIGTRAP == 6);
|
||||
static_assert(NSIGCHLD == 6);
|
||||
|
@ -41,7 +41,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
|
||||
* read only (code, rodata). Clear the RDONLY bit from
|
||||
* the temporary mappings we use during restore.
|
||||
*/
|
||||
set_pte(dst_ptep, pte_mkwrite(pte));
|
||||
set_pte(dst_ptep, pte_mkwrite_novma(pte));
|
||||
} else if ((debug_pagealloc_enabled() ||
|
||||
is_kfence_address((void *)addr)) && !pte_none(pte)) {
|
||||
/*
|
||||
@ -55,7 +55,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
|
||||
*/
|
||||
BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
|
||||
set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
|
||||
set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,7 +176,7 @@ static inline pte_t pte_mkold(pte_t pte)
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= _PAGE_WRITE;
|
||||
if (pte_val(pte) & _PAGE_MODIFIED)
|
||||
|
@ -300,7 +300,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
|
||||
}
|
||||
|
||||
/* pte_mkwrite - mark page as writable */
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= _PAGE_WRITE;
|
||||
return pte;
|
||||
|
@ -269,7 +269,7 @@ ia64_phys_addr_valid (unsigned long addr)
|
||||
* access rights:
|
||||
*/
|
||||
#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW))
|
||||
#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_AR_RW))
|
||||
#define pte_mkwrite_novma(pte) (__pte(pte_val(pte) | _PAGE_AR_RW))
|
||||
#define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A))
|
||||
#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A))
|
||||
#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D))
|
||||
|
@ -384,7 +384,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= _PAGE_WRITE;
|
||||
if (pte_val(pte) & _PAGE_MODIFIED)
|
||||
@ -493,7 +493,7 @@ static inline int pmd_write(pmd_t pmd)
|
||||
return !!(pmd_val(pmd) & _PAGE_WRITE);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkwrite(pmd_t pmd)
|
||||
static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
|
||||
{
|
||||
pmd_val(pmd) |= _PAGE_WRITE;
|
||||
if (pmd_val(pmd) & _PAGE_MODIFIED)
|
||||
|
@ -210,7 +210,7 @@ static inline pte_t pte_mkold(pte_t pte)
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= CF_PAGE_WRITABLE;
|
||||
return pte;
|
||||
|
@ -156,7 +156,7 @@ static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;
|
||||
static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; }
|
||||
static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
|
||||
static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
|
||||
static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_RONLY; return pte; }
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte){ pte_val(pte) &= ~_PAGE_RONLY; return pte; }
|
||||
static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; }
|
||||
static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
|
||||
static inline pte_t pte_mknocache(pte_t pte)
|
||||
|
@ -144,7 +144,7 @@ static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESS
|
||||
static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; }
|
||||
static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_MODIFIED; return pte; }
|
||||
static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_ACCESSED; return pte; }
|
||||
static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= SUN3_PAGE_WRITEABLE; return pte; }
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte){ pte_val(pte) |= SUN3_PAGE_WRITEABLE; return pte; }
|
||||
static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= SUN3_PAGE_MODIFIED; return pte; }
|
||||
static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= SUN3_PAGE_ACCESSED; return pte; }
|
||||
static inline pte_t pte_mknocache(pte_t pte) { pte_val(pte) |= SUN3_PAGE_NOCACHE; return pte; }
|
||||
|
@ -266,7 +266,7 @@ static inline pte_t pte_mkread(pte_t pte) \
|
||||
{ pte_val(pte) |= _PAGE_USER; return pte; }
|
||||
static inline pte_t pte_mkexec(pte_t pte) \
|
||||
{ pte_val(pte) |= _PAGE_USER | _PAGE_EXEC; return pte; }
|
||||
static inline pte_t pte_mkwrite(pte_t pte) \
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte) \
|
||||
{ pte_val(pte) |= _PAGE_RW; return pte; }
|
||||
static inline pte_t pte_mkdirty(pte_t pte) \
|
||||
{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
|
||||
|
@ -319,7 +319,7 @@ static inline pte_t pte_mkold(pte_t pte)
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
pte.pte_low |= _PAGE_WRITE;
|
||||
if (pte.pte_low & _PAGE_MODIFIED) {
|
||||
@ -374,7 +374,7 @@ static inline pte_t pte_mkold(pte_t pte)
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= _PAGE_WRITE;
|
||||
if (pte_val(pte) & _PAGE_MODIFIED)
|
||||
@ -646,7 +646,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
|
||||
return pmd;
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkwrite(pmd_t pmd)
|
||||
static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
|
||||
{
|
||||
pmd_val(pmd) |= _PAGE_WRITE;
|
||||
if (pmd_val(pmd) & _PAGE_MODIFIED)
|
||||
|
@ -129,7 +129,7 @@ static inline pte_t pte_mkold(pte_t pte)
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= _PAGE_WRITE;
|
||||
return pte;
|
||||
|
@ -250,7 +250,7 @@ static inline pte_t pte_mkold(pte_t pte)
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= _PAGE_WRITE;
|
||||
return pte;
|
||||
|
@ -322,7 +322,7 @@ static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; retu
|
||||
static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; return pte; }
|
||||
static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; }
|
||||
static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
|
||||
static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; }
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; }
|
||||
static inline pte_t pte_mkspecial(pte_t pte) { pte_val(pte) |= _PAGE_SPECIAL; return pte; }
|
||||
|
||||
/*
|
||||
|
@ -493,7 +493,7 @@ static inline pte_t pte_mkpte(pte_t pte)
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
return __pte(pte_val(pte) | _PAGE_RW);
|
||||
}
|
||||
|
@ -596,7 +596,7 @@ static inline pte_t pte_mkexec(pte_t pte)
|
||||
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
/*
|
||||
* write implies read, hence set both
|
||||
@ -1088,7 +1088,7 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
|
||||
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
|
||||
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
|
||||
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
|
||||
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
|
||||
#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
|
||||
#define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd))
|
||||
|
@ -170,8 +170,8 @@ void unmap_kernel_page(unsigned long va);
|
||||
#define pte_clear(mm, addr, ptep) \
|
||||
do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0)
|
||||
|
||||
#ifndef pte_mkwrite
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
#ifndef pte_mkwrite_novma
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
return __pte(pte_val(pte) | _PAGE_RW);
|
||||
}
|
||||
|
@ -101,12 +101,12 @@ static inline int pte_write(pte_t pte)
|
||||
|
||||
#define pte_write pte_write
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
return __pte(pte_val(pte) & ~_PAGE_RO);
|
||||
}
|
||||
|
||||
#define pte_mkwrite pte_mkwrite
|
||||
#define pte_mkwrite_novma pte_mkwrite_novma
|
||||
|
||||
static inline bool pte_user(pte_t pte)
|
||||
{
|
||||
|
@ -85,7 +85,7 @@
|
||||
#ifndef __ASSEMBLY__
|
||||
/* pte_clear moved to later in this file */
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
return __pte(pte_val(pte) | _PAGE_RW);
|
||||
}
|
||||
|
@ -380,7 +380,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
|
||||
|
||||
/* static inline pte_t pte_mkread(pte_t pte) */
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
return __pte(pte_val(pte) | _PAGE_WRITE);
|
||||
}
|
||||
@ -677,9 +677,9 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
|
||||
return pte_pmd(pte_mkyoung(pmd_pte(pmd)));
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkwrite(pmd_t pmd)
|
||||
static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
|
||||
{
|
||||
return pte_pmd(pte_mkwrite(pmd_pte(pmd)));
|
||||
return pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)));
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_wrprotect(pmd_t pmd)
|
||||
|
@ -127,6 +127,7 @@ config S390
|
||||
select ARCH_WANTS_NO_INSTR
|
||||
select ARCH_WANT_DEFAULT_BPF_JIT
|
||||
select ARCH_WANT_IPC_PARSE_VERSION
|
||||
select ARCH_WANT_KERNEL_PMD_MKWRITE
|
||||
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
|
||||
select BUILDTIME_TABLE_SORT
|
||||
select CLONE_BACKWARDS2
|
||||
|
@ -104,7 +104,7 @@ static inline int huge_pte_dirty(pte_t pte)
|
||||
|
||||
static inline pte_t huge_pte_mkwrite(pte_t pte)
|
||||
{
|
||||
return pte_mkwrite(pte);
|
||||
return pte_mkwrite_novma(pte);
|
||||
}
|
||||
|
||||
static inline pte_t huge_pte_mkdirty(pte_t pte)
|
||||
|
@ -1001,7 +1001,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
|
||||
return set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
pte = set_pte_bit(pte, __pgprot(_PAGE_WRITE));
|
||||
if (pte_val(pte) & _PAGE_DIRTY)
|
||||
@ -1498,7 +1498,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
|
||||
return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkwrite(pmd_t pmd)
|
||||
static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
|
||||
{
|
||||
pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE));
|
||||
if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
|
||||
|
@ -98,7 +98,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
||||
if (flags & SET_MEMORY_RO)
|
||||
new = pte_wrprotect(new);
|
||||
else if (flags & SET_MEMORY_RW)
|
||||
new = pte_mkwrite(pte_mkdirty(new));
|
||||
new = pte_mkwrite_novma(pte_mkdirty(new));
|
||||
if (flags & SET_MEMORY_NX)
|
||||
new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
|
||||
else if (flags & SET_MEMORY_X)
|
||||
@ -156,7 +156,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
|
||||
if (flags & SET_MEMORY_RO)
|
||||
new = pmd_wrprotect(new);
|
||||
else if (flags & SET_MEMORY_RW)
|
||||
new = pmd_mkwrite(pmd_mkdirty(new));
|
||||
new = pmd_mkwrite_novma(pmd_mkdirty(new));
|
||||
if (flags & SET_MEMORY_NX)
|
||||
new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
|
||||
else if (flags & SET_MEMORY_X)
|
||||
|
@ -358,11 +358,11 @@ static inline pte_t pte_##fn(pte_t pte) { pte.pte_##h op; return pte; }
|
||||
* kernel permissions), we attempt to couple them a bit more sanely here.
|
||||
*/
|
||||
PTE_BIT_FUNC(high, wrprotect, &= ~(_PAGE_EXT_USER_WRITE | _PAGE_EXT_KERN_WRITE));
|
||||
PTE_BIT_FUNC(high, mkwrite, |= _PAGE_EXT_USER_WRITE | _PAGE_EXT_KERN_WRITE);
|
||||
PTE_BIT_FUNC(high, mkwrite_novma, |= _PAGE_EXT_USER_WRITE | _PAGE_EXT_KERN_WRITE);
|
||||
PTE_BIT_FUNC(high, mkhuge, |= _PAGE_SZHUGE);
|
||||
#else
|
||||
PTE_BIT_FUNC(low, wrprotect, &= ~_PAGE_RW);
|
||||
PTE_BIT_FUNC(low, mkwrite, |= _PAGE_RW);
|
||||
PTE_BIT_FUNC(low, mkwrite_novma, |= _PAGE_RW);
|
||||
PTE_BIT_FUNC(low, mkhuge, |= _PAGE_SZHUGE);
|
||||
#endif
|
||||
|
||||
|
@ -239,7 +239,7 @@ static inline pte_t pte_mkold(pte_t pte)
|
||||
return __pte(pte_val(pte) & ~SRMMU_REF);
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
return __pte(pte_val(pte) | SRMMU_WRITE);
|
||||
}
|
||||
|
@ -518,7 +518,7 @@ static inline pte_t pte_mkclean(pte_t pte)
|
||||
return __pte(val);
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
unsigned long val = pte_val(pte), mask;
|
||||
|
||||
@ -773,11 +773,11 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
|
||||
return __pmd(pte_val(pte));
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkwrite(pmd_t pmd)
|
||||
static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
|
||||
{
|
||||
pte_t pte = __pte(pmd_val(pmd));
|
||||
|
||||
pte = pte_mkwrite(pte);
|
||||
pte = pte_mkwrite_novma(pte);
|
||||
|
||||
return __pmd(pte_val(pte));
|
||||
}
|
||||
|
@ -753,7 +753,7 @@ asmlinkage int do_sys32_sigstack(u32 u_ssptr, u32 u_ossptr, unsigned long sp)
|
||||
*/
|
||||
static_assert(NSIGILL == 11);
|
||||
static_assert(NSIGFPE == 15);
|
||||
static_assert(NSIGSEGV == 9);
|
||||
static_assert(NSIGSEGV == 10);
|
||||
static_assert(NSIGBUS == 5);
|
||||
static_assert(NSIGTRAP == 6);
|
||||
static_assert(NSIGCHLD == 6);
|
||||
|
@ -562,7 +562,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long
|
||||
*/
|
||||
static_assert(NSIGILL == 11);
|
||||
static_assert(NSIGFPE == 15);
|
||||
static_assert(NSIGSEGV == 9);
|
||||
static_assert(NSIGSEGV == 10);
|
||||
static_assert(NSIGBUS == 5);
|
||||
static_assert(NSIGTRAP == 6);
|
||||
static_assert(NSIGCHLD == 6);
|
||||
|
@ -207,7 +207,7 @@ static inline pte_t pte_mkyoung(pte_t pte)
|
||||
return(pte);
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
if (unlikely(pte_get_bits(pte, _PAGE_RW)))
|
||||
return pte;
|
||||
|
@ -1815,6 +1815,11 @@ config CC_HAS_IBT
|
||||
(CC_IS_CLANG && CLANG_VERSION >= 140000)) && \
|
||||
$(as-instr,endbr64)
|
||||
|
||||
config X86_CET
|
||||
def_bool n
|
||||
help
|
||||
CET features configured (Shadow stack or IBT)
|
||||
|
||||
config X86_KERNEL_IBT
|
||||
prompt "Indirect Branch Tracking"
|
||||
def_bool y
|
||||
@ -1822,6 +1827,7 @@ config X86_KERNEL_IBT
|
||||
# https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f
|
||||
depends on !LD_IS_LLD || LLD_VERSION >= 140000
|
||||
select OBJTOOL
|
||||
select X86_CET
|
||||
help
|
||||
Build the kernel with support for Indirect Branch Tracking, a
|
||||
hardware support course-grain forward-edge Control Flow Integrity
|
||||
@ -1915,6 +1921,24 @@ config X86_SGX
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config X86_USER_SHADOW_STACK
|
||||
bool "X86 userspace shadow stack"
|
||||
depends on AS_WRUSS
|
||||
depends on X86_64
|
||||
select ARCH_USES_HIGH_VMA_FLAGS
|
||||
select X86_CET
|
||||
help
|
||||
Shadow stack protection is a hardware feature that detects function
|
||||
return address corruption. This helps mitigate ROP attacks.
|
||||
Applications must be enabled to use it, and old userspace does not
|
||||
get protection "for free".
|
||||
|
||||
CPUs supporting shadow stacks were first released in 2020.
|
||||
|
||||
See Documentation/arch/x86/shstk.rst for more information.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config EFI
|
||||
bool "EFI runtime service support"
|
||||
depends on ACPI
|
||||
|
@ -24,3 +24,8 @@ config AS_GFNI
|
||||
def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2)
|
||||
help
|
||||
Supported by binutils >= 2.30 and LLVM integrated assembler
|
||||
|
||||
config AS_WRUSS
|
||||
def_bool $(as-instr,wrussq %rax$(comma)(%rbx))
|
||||
help
|
||||
Supported by binutils >= 2.31 and LLVM integrated assembler
|
||||
|
@ -374,6 +374,7 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
453 64 map_shadow_stack sys_map_shadow_stack
|
||||
|
||||
#
|
||||
# Due to a historical design error, certain syscalls are numbered differently
|
||||
|
@ -307,6 +307,7 @@
|
||||
#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
|
||||
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
|
||||
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
|
||||
#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
|
||||
|
||||
#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
|
||||
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
|
||||
@ -383,6 +384,7 @@
|
||||
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
|
||||
#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
|
||||
#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
|
||||
#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */
|
||||
#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
|
||||
#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
|
||||
#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
|
||||
|
@ -105,6 +105,18 @@
|
||||
# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_USER_SHADOW_STACK
|
||||
#define DISABLE_USER_SHSTK 0
|
||||
#else
|
||||
#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31))
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_KERNEL_IBT
|
||||
#define DISABLE_IBT 0
|
||||
#else
|
||||
#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Make sure to add features to the correct mask
|
||||
*/
|
||||
@ -120,7 +132,7 @@
|
||||
#define DISABLED_MASK9 (DISABLE_SGX)
|
||||
#define DISABLED_MASK10 0
|
||||
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
|
||||
DISABLE_CALL_DEPTH_TRACKING)
|
||||
DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
|
||||
#define DISABLED_MASK12 (DISABLE_LAM)
|
||||
#define DISABLED_MASK13 0
|
||||
#define DISABLED_MASK14 0
|
||||
@ -128,7 +140,7 @@
|
||||
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
|
||||
DISABLE_ENQCMD)
|
||||
#define DISABLED_MASK17 0
|
||||
#define DISABLED_MASK18 0
|
||||
#define DISABLED_MASK18 (DISABLE_IBT)
|
||||
#define DISABLED_MASK19 0
|
||||
#define DISABLED_MASK20 0
|
||||
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
|
||||
|
@ -82,6 +82,15 @@ static inline void fpregs_unlock(void)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* FPU state gets lazily restored before returning to userspace. So when in the
|
||||
* kernel, the valid FPU state may be kept in the buffer. This function will force
|
||||
* restore all the fpu state to the registers early if needed, and lock them from
|
||||
* being automatically saved/restored. Then FPU state can be modified safely in the
|
||||
* registers, before unlocking with fpregs_unlock().
|
||||
*/
|
||||
void fpregs_lock_and_load(void);
|
||||
|
||||
#ifdef CONFIG_X86_DEBUG_FPU
|
||||
extern void fpregs_assert_state_consistent(void);
|
||||
#else
|
||||
|
@ -7,11 +7,12 @@
|
||||
|
||||
#include <linux/regset.h>
|
||||
|
||||
extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active;
|
||||
extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active,
|
||||
ssp_active;
|
||||
extern user_regset_get2_fn fpregs_get, xfpregs_get, fpregs_soft_get,
|
||||
xstateregs_get;
|
||||
xstateregs_get, ssp_get;
|
||||
extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
|
||||
xstateregs_set;
|
||||
xstateregs_set, ssp_set;
|
||||
|
||||
/*
|
||||
* xstateregs_active == regset_fpregs_active. Please refer to the comment
|
||||
|
@ -11,7 +11,8 @@
|
||||
|
||||
extern void save_fpregs_to_fpstate(struct fpu *fpu);
|
||||
extern void fpu__drop(struct fpu *fpu);
|
||||
extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal);
|
||||
extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
|
||||
unsigned long shstk_addr);
|
||||
extern void fpu_flush_thread(void);
|
||||
|
||||
/*
|
||||
|
@ -115,8 +115,8 @@ enum xfeature {
|
||||
XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
|
||||
XFEATURE_PKRU,
|
||||
XFEATURE_PASID,
|
||||
XFEATURE_RSRVD_COMP_11,
|
||||
XFEATURE_RSRVD_COMP_12,
|
||||
XFEATURE_CET_USER,
|
||||
XFEATURE_CET_KERNEL_UNUSED,
|
||||
XFEATURE_RSRVD_COMP_13,
|
||||
XFEATURE_RSRVD_COMP_14,
|
||||
XFEATURE_LBR,
|
||||
@ -138,6 +138,8 @@ enum xfeature {
|
||||
#define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
|
||||
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
|
||||
#define XFEATURE_MASK_PASID (1 << XFEATURE_PASID)
|
||||
#define XFEATURE_MASK_CET_USER (1 << XFEATURE_CET_USER)
|
||||
#define XFEATURE_MASK_CET_KERNEL (1 << XFEATURE_CET_KERNEL_UNUSED)
|
||||
#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
|
||||
#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG)
|
||||
#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA)
|
||||
@ -252,6 +254,16 @@ struct pkru_state {
|
||||
u32 pad;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* State component 11 is Control-flow Enforcement user states
|
||||
*/
|
||||
struct cet_user_state {
|
||||
/* user control-flow settings */
|
||||
u64 user_cet;
|
||||
/* user shadow stack pointer */
|
||||
u64 user_ssp;
|
||||
};
|
||||
|
||||
/*
|
||||
* State component 15: Architectural LBR configuration state.
|
||||
* The size of Arch LBR state depends on the number of LBRs (lbr_depth).
|
||||
|
@ -50,7 +50,8 @@
|
||||
#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA
|
||||
|
||||
/* All currently supported supervisor features */
|
||||
#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID)
|
||||
#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \
|
||||
XFEATURE_MASK_CET_USER)
|
||||
|
||||
/*
|
||||
* A supervisor state component may not always contain valuable information,
|
||||
@ -77,7 +78,8 @@
|
||||
* Unsupported supervisor features. When a supervisor feature in this mask is
|
||||
* supported in the future, move it to the supported supervisor feature mask.
|
||||
*/
|
||||
#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
|
||||
#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT | \
|
||||
XFEATURE_MASK_CET_KERNEL)
|
||||
|
||||
/* All supervisor states including supported and unsupported states. */
|
||||
#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
|
||||
|
@ -614,7 +614,7 @@ DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault);
|
||||
#endif
|
||||
|
||||
/* #CP */
|
||||
#ifdef CONFIG_X86_KERNEL_IBT
|
||||
#ifdef CONFIG_X86_CET
|
||||
DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection);
|
||||
#endif
|
||||
|
||||
|
@ -186,6 +186,8 @@ do { \
|
||||
#else
|
||||
#define deactivate_mm(tsk, mm) \
|
||||
do { \
|
||||
if (!tsk->vfork_done) \
|
||||
shstk_free(tsk); \
|
||||
load_gs_index(0); \
|
||||
loadsegment(fs, 0); \
|
||||
} while (0)
|
||||
|
@ -125,9 +125,15 @@ extern pmdval_t early_pmd_flags;
|
||||
* The following only work if pte_present() is true.
|
||||
* Undefined behaviour if not..
|
||||
*/
|
||||
static inline int pte_dirty(pte_t pte)
|
||||
static inline bool pte_dirty(pte_t pte)
|
||||
{
|
||||
return pte_flags(pte) & _PAGE_DIRTY;
|
||||
return pte_flags(pte) & _PAGE_DIRTY_BITS;
|
||||
}
|
||||
|
||||
static inline bool pte_shstk(pte_t pte)
|
||||
{
|
||||
return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
|
||||
(pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY;
|
||||
}
|
||||
|
||||
static inline int pte_young(pte_t pte)
|
||||
@ -135,9 +141,16 @@ static inline int pte_young(pte_t pte)
|
||||
return pte_flags(pte) & _PAGE_ACCESSED;
|
||||
}
|
||||
|
||||
static inline int pmd_dirty(pmd_t pmd)
|
||||
static inline bool pmd_dirty(pmd_t pmd)
|
||||
{
|
||||
return pmd_flags(pmd) & _PAGE_DIRTY;
|
||||
return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
|
||||
}
|
||||
|
||||
static inline bool pmd_shstk(pmd_t pmd)
|
||||
{
|
||||
return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
|
||||
(pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
|
||||
(_PAGE_DIRTY | _PAGE_PSE);
|
||||
}
|
||||
|
||||
#define pmd_young pmd_young
|
||||
@ -146,9 +159,9 @@ static inline int pmd_young(pmd_t pmd)
|
||||
return pmd_flags(pmd) & _PAGE_ACCESSED;
|
||||
}
|
||||
|
||||
static inline int pud_dirty(pud_t pud)
|
||||
static inline bool pud_dirty(pud_t pud)
|
||||
{
|
||||
return pud_flags(pud) & _PAGE_DIRTY;
|
||||
return pud_flags(pud) & _PAGE_DIRTY_BITS;
|
||||
}
|
||||
|
||||
static inline int pud_young(pud_t pud)
|
||||
@ -158,7 +171,27 @@ static inline int pud_young(pud_t pud)
|
||||
|
||||
static inline int pte_write(pte_t pte)
|
||||
{
|
||||
return pte_flags(pte) & _PAGE_RW;
|
||||
/*
|
||||
* Shadow stack pages are logically writable, but do not have
|
||||
* _PAGE_RW. Check for them separately from _PAGE_RW itself.
|
||||
*/
|
||||
return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte);
|
||||
}
|
||||
|
||||
#define pmd_write pmd_write
|
||||
static inline int pmd_write(pmd_t pmd)
|
||||
{
|
||||
/*
|
||||
* Shadow stack pages are logically writable, but do not have
|
||||
* _PAGE_RW. Check for them separately from _PAGE_RW itself.
|
||||
*/
|
||||
return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd);
|
||||
}
|
||||
|
||||
#define pud_write pud_write
|
||||
static inline int pud_write(pud_t pud)
|
||||
{
|
||||
return pud_flags(pud) & _PAGE_RW;
|
||||
}
|
||||
|
||||
static inline int pte_huge(pte_t pte)
|
||||
@ -292,9 +325,63 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
|
||||
return native_make_pte(v & ~clear);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write protection operations can result in Dirty=1,Write=0 PTEs. But in the
|
||||
* case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So
|
||||
* when creating dirty, write-protected memory, a software bit is used:
|
||||
* _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the
|
||||
* Dirty bit to SavedDirty, and vice-vesra.
|
||||
*
|
||||
* This shifting is only done if needed. In the case of shifting
|
||||
* Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of
|
||||
* shifting SavedDirty->Dirty, the condition is Write=1.
|
||||
*/
|
||||
static inline pgprotval_t mksaveddirty_shift(pgprotval_t v)
|
||||
{
|
||||
pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1;
|
||||
|
||||
v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY;
|
||||
v &= ~(cond << _PAGE_BIT_DIRTY);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v)
|
||||
{
|
||||
pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1;
|
||||
|
||||
v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY;
|
||||
v &= ~(cond << _PAGE_BIT_SAVED_DIRTY);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mksaveddirty(pte_t pte)
|
||||
{
|
||||
pteval_t v = native_pte_val(pte);
|
||||
|
||||
v = mksaveddirty_shift(v);
|
||||
return native_make_pte(v);
|
||||
}
|
||||
|
||||
static inline pte_t pte_clear_saveddirty(pte_t pte)
|
||||
{
|
||||
pteval_t v = native_pte_val(pte);
|
||||
|
||||
v = clear_saveddirty_shift(v);
|
||||
return native_make_pte(v);
|
||||
}
|
||||
|
||||
static inline pte_t pte_wrprotect(pte_t pte)
|
||||
{
|
||||
return pte_clear_flags(pte, _PAGE_RW);
|
||||
pte = pte_clear_flags(pte, _PAGE_RW);
|
||||
|
||||
/*
|
||||
* Blindly clearing _PAGE_RW might accidentally create
|
||||
* a shadow stack PTE (Write=0,Dirty=1). Move the hardware
|
||||
* dirty value to the software bit, if present.
|
||||
*/
|
||||
return pte_mksaveddirty(pte);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
|
||||
@ -332,7 +419,7 @@ static inline pte_t pte_clear_uffd_wp(pte_t pte)
|
||||
|
||||
static inline pte_t pte_mkclean(pte_t pte)
|
||||
{
|
||||
return pte_clear_flags(pte, _PAGE_DIRTY);
|
||||
return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkold(pte_t pte)
|
||||
@ -347,7 +434,16 @@ static inline pte_t pte_mkexec(pte_t pte)
|
||||
|
||||
static inline pte_t pte_mkdirty(pte_t pte)
|
||||
{
|
||||
return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
|
||||
pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
|
||||
|
||||
return pte_mksaveddirty(pte);
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite_shstk(pte_t pte)
|
||||
{
|
||||
pte = pte_clear_flags(pte, _PAGE_RW);
|
||||
|
||||
return pte_set_flags(pte, _PAGE_DIRTY);
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkyoung(pte_t pte)
|
||||
@ -355,11 +451,15 @@ static inline pte_t pte_mkyoung(pte_t pte)
|
||||
return pte_set_flags(pte, _PAGE_ACCESSED);
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{
|
||||
return pte_set_flags(pte, _PAGE_RW);
|
||||
}
|
||||
|
||||
struct vm_area_struct;
|
||||
pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma);
|
||||
#define pte_mkwrite pte_mkwrite
|
||||
|
||||
static inline pte_t pte_mkhuge(pte_t pte)
|
||||
{
|
||||
return pte_set_flags(pte, _PAGE_PSE);
|
||||
@ -404,9 +504,34 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
|
||||
return native_make_pmd(v & ~clear);
|
||||
}
|
||||
|
||||
/* See comments above mksaveddirty_shift() */
|
||||
static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
|
||||
{
|
||||
pmdval_t v = native_pmd_val(pmd);
|
||||
|
||||
v = mksaveddirty_shift(v);
|
||||
return native_make_pmd(v);
|
||||
}
|
||||
|
||||
/* See comments above mksaveddirty_shift() */
|
||||
static inline pmd_t pmd_clear_saveddirty(pmd_t pmd)
|
||||
{
|
||||
pmdval_t v = native_pmd_val(pmd);
|
||||
|
||||
v = clear_saveddirty_shift(v);
|
||||
return native_make_pmd(v);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_wrprotect(pmd_t pmd)
|
||||
{
|
||||
return pmd_clear_flags(pmd, _PAGE_RW);
|
||||
pmd = pmd_clear_flags(pmd, _PAGE_RW);
|
||||
|
||||
/*
|
||||
* Blindly clearing _PAGE_RW might accidentally create
|
||||
* a shadow stack PMD (RW=0, Dirty=1). Move the hardware
|
||||
* dirty value to the software bit.
|
||||
*/
|
||||
return pmd_mksaveddirty(pmd);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
|
||||
@ -433,12 +558,21 @@ static inline pmd_t pmd_mkold(pmd_t pmd)
|
||||
|
||||
static inline pmd_t pmd_mkclean(pmd_t pmd)
|
||||
{
|
||||
return pmd_clear_flags(pmd, _PAGE_DIRTY);
|
||||
return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkdirty(pmd_t pmd)
|
||||
{
|
||||
return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
|
||||
pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
|
||||
|
||||
return pmd_mksaveddirty(pmd);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
|
||||
{
|
||||
pmd = pmd_clear_flags(pmd, _PAGE_RW);
|
||||
|
||||
return pmd_set_flags(pmd, _PAGE_DIRTY);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkdevmap(pmd_t pmd)
|
||||
@ -456,11 +590,14 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
|
||||
return pmd_set_flags(pmd, _PAGE_ACCESSED);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkwrite(pmd_t pmd)
|
||||
static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
|
||||
{
|
||||
return pmd_set_flags(pmd, _PAGE_RW);
|
||||
}
|
||||
|
||||
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
|
||||
#define pmd_mkwrite pmd_mkwrite
|
||||
|
||||
static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
|
||||
{
|
||||
pudval_t v = native_pud_val(pud);
|
||||
@ -475,6 +612,24 @@ static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
|
||||
return native_make_pud(v & ~clear);
|
||||
}
|
||||
|
||||
/* See comments above mksaveddirty_shift() */
|
||||
static inline pud_t pud_mksaveddirty(pud_t pud)
|
||||
{
|
||||
pudval_t v = native_pud_val(pud);
|
||||
|
||||
v = mksaveddirty_shift(v);
|
||||
return native_make_pud(v);
|
||||
}
|
||||
|
||||
/* See comments above mksaveddirty_shift() */
|
||||
static inline pud_t pud_clear_saveddirty(pud_t pud)
|
||||
{
|
||||
pudval_t v = native_pud_val(pud);
|
||||
|
||||
v = clear_saveddirty_shift(v);
|
||||
return native_make_pud(v);
|
||||
}
|
||||
|
||||
static inline pud_t pud_mkold(pud_t pud)
|
||||
{
|
||||
return pud_clear_flags(pud, _PAGE_ACCESSED);
|
||||
@ -482,17 +637,26 @@ static inline pud_t pud_mkold(pud_t pud)
|
||||
|
||||
static inline pud_t pud_mkclean(pud_t pud)
|
||||
{
|
||||
return pud_clear_flags(pud, _PAGE_DIRTY);
|
||||
return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
|
||||
}
|
||||
|
||||
static inline pud_t pud_wrprotect(pud_t pud)
|
||||
{
|
||||
return pud_clear_flags(pud, _PAGE_RW);
|
||||
pud = pud_clear_flags(pud, _PAGE_RW);
|
||||
|
||||
/*
|
||||
* Blindly clearing _PAGE_RW might accidentally create
|
||||
* a shadow stack PUD (RW=0, Dirty=1). Move the hardware
|
||||
* dirty value to the software bit.
|
||||
*/
|
||||
return pud_mksaveddirty(pud);
|
||||
}
|
||||
|
||||
static inline pud_t pud_mkdirty(pud_t pud)
|
||||
{
|
||||
return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
|
||||
pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
|
||||
|
||||
return pud_mksaveddirty(pud);
|
||||
}
|
||||
|
||||
static inline pud_t pud_mkdevmap(pud_t pud)
|
||||
@ -512,7 +676,9 @@ static inline pud_t pud_mkyoung(pud_t pud)
|
||||
|
||||
static inline pud_t pud_mkwrite(pud_t pud)
|
||||
{
|
||||
return pud_set_flags(pud, _PAGE_RW);
|
||||
pud = pud_set_flags(pud, _PAGE_RW);
|
||||
|
||||
return pud_clear_saveddirty(pud);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
|
||||
@ -629,6 +795,7 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
|
||||
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
||||
{
|
||||
pteval_t val = pte_val(pte), oldval = val;
|
||||
pte_t pte_result;
|
||||
|
||||
/*
|
||||
* Chop off the NX bit (if present), and add the NX portion of
|
||||
@ -637,17 +804,54 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
||||
val &= _PAGE_CHG_MASK;
|
||||
val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
|
||||
val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
|
||||
return __pte(val);
|
||||
|
||||
pte_result = __pte(val);
|
||||
|
||||
/*
|
||||
* To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid:
|
||||
* 1. Marking Write=0 PTEs Dirty=1
|
||||
* 2. Marking Dirty=1 PTEs Write=0
|
||||
*
|
||||
* The first case cannot happen because the _PAGE_CHG_MASK will filter
|
||||
* out any Dirty bit passed in newprot. Handle the second case by
|
||||
* going through the mksaveddirty exercise. Only do this if the old
|
||||
* value was Write=1 to avoid doing this on Shadow Stack PTEs.
|
||||
*/
|
||||
if (oldval & _PAGE_RW)
|
||||
pte_result = pte_mksaveddirty(pte_result);
|
||||
else
|
||||
pte_result = pte_clear_saveddirty(pte_result);
|
||||
|
||||
return pte_result;
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
|
||||
{
|
||||
pmdval_t val = pmd_val(pmd), oldval = val;
|
||||
pmd_t pmd_result;
|
||||
|
||||
val &= _HPAGE_CHG_MASK;
|
||||
val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY);
|
||||
val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
|
||||
val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
|
||||
return __pmd(val);
|
||||
|
||||
pmd_result = __pmd(val);
|
||||
|
||||
/*
|
||||
* To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
|
||||
* 1. Marking Write=0 PMDs Dirty=1
|
||||
* 2. Marking Dirty=1 PMDs Write=0
|
||||
*
|
||||
* The first case cannot happen because the _PAGE_CHG_MASK will filter
|
||||
* out any Dirty bit passed in newprot. Handle the second case by
|
||||
* going through the mksaveddirty exercise. Only do this if the old
|
||||
* value was Write=1 to avoid doing this on Shadow Stack PTEs.
|
||||
*/
|
||||
if (oldval & _PAGE_RW)
|
||||
pmd_result = pmd_mksaveddirty(pmd_result);
|
||||
else
|
||||
pmd_result = pmd_clear_saveddirty(pmd_result);
|
||||
|
||||
return pmd_result;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -831,7 +1035,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
|
||||
* (Currently stuck as a macro because of indirect forward reference
|
||||
* to linux/mm.h:page_to_nid())
|
||||
*/
|
||||
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
|
||||
#define mk_pte(page, pgprot) \
|
||||
({ \
|
||||
pgprot_t __pgprot = pgprot; \
|
||||
\
|
||||
WARN_ON_ONCE((pgprot_val(__pgprot) & (_PAGE_DIRTY | _PAGE_RW)) == \
|
||||
_PAGE_DIRTY); \
|
||||
pfn_pte(page_to_pfn(page), __pgprot); \
|
||||
})
|
||||
|
||||
static inline int pmd_bad(pmd_t pmd)
|
||||
{
|
||||
@ -1090,7 +1301,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
|
||||
static inline void ptep_set_wrprotect(struct mm_struct *mm,
|
||||
unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
|
||||
/*
|
||||
* Avoid accidentally creating shadow stack PTEs
|
||||
* (Write=0,Dirty=1). Use cmpxchg() to prevent races with
|
||||
* the hardware setting Dirty=1.
|
||||
*/
|
||||
pte_t old_pte, new_pte;
|
||||
|
||||
old_pte = READ_ONCE(*ptep);
|
||||
do {
|
||||
new_pte = pte_wrprotect(old_pte);
|
||||
} while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte));
|
||||
}
|
||||
|
||||
#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)
|
||||
@ -1116,12 +1337,6 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmdp);
|
||||
|
||||
|
||||
#define pmd_write pmd_write
|
||||
static inline int pmd_write(pmd_t pmd)
|
||||
{
|
||||
return pmd_flags(pmd) & _PAGE_RW;
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
|
||||
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmdp)
|
||||
@ -1148,13 +1363,17 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
|
||||
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
|
||||
}
|
||||
/*
|
||||
* Avoid accidentally creating shadow stack PTEs
|
||||
* (Write=0,Dirty=1). Use cmpxchg() to prevent races with
|
||||
* the hardware setting Dirty=1.
|
||||
*/
|
||||
pmd_t old_pmd, new_pmd;
|
||||
|
||||
#define pud_write pud_write
|
||||
static inline int pud_write(pud_t pud)
|
||||
{
|
||||
return pud_flags(pud) & _PAGE_RW;
|
||||
old_pmd = READ_ONCE(*pmdp);
|
||||
do {
|
||||
new_pmd = pmd_wrprotect(old_pmd);
|
||||
} while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd));
|
||||
}
|
||||
|
||||
#ifndef pmdp_establish
|
||||
@ -1412,6 +1631,11 @@ static inline bool __pte_access_permitted(unsigned long pteval, bool write)
|
||||
{
|
||||
unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
|
||||
|
||||
/*
|
||||
* Write=0,Dirty=1 PTEs are shadow stack, which the kernel
|
||||
* shouldn't generally allow access to, but since they
|
||||
* are already Write=0, the below logic covers both cases.
|
||||
*/
|
||||
if (write)
|
||||
need_pte_bits |= _PAGE_RW;
|
||||
|
||||
@ -1453,6 +1677,12 @@ static inline bool arch_has_hw_pte_young(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
#define arch_check_zapped_pte arch_check_zapped_pte
|
||||
void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);
|
||||
|
||||
#define arch_check_zapped_pmd arch_check_zapped_pmd
|
||||
void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);
|
||||
|
||||
#ifdef CONFIG_XEN_PV
|
||||
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
|
||||
static inline bool arch_has_hw_nonleaf_pmd_young(void)
|
||||
|
@ -21,7 +21,8 @@
|
||||
#define _PAGE_BIT_SOFTW2 10 /* " */
|
||||
#define _PAGE_BIT_SOFTW3 11 /* " */
|
||||
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
|
||||
#define _PAGE_BIT_SOFTW4 58 /* available for programmer */
|
||||
#define _PAGE_BIT_SOFTW4 57 /* available for programmer */
|
||||
#define _PAGE_BIT_SOFTW5 58 /* available for programmer */
|
||||
#define _PAGE_BIT_PKEY_BIT0 59 /* Protection Keys, bit 1/4 */
|
||||
#define _PAGE_BIT_PKEY_BIT1 60 /* Protection Keys, bit 2/4 */
|
||||
#define _PAGE_BIT_PKEY_BIT2 61 /* Protection Keys, bit 3/4 */
|
||||
@ -34,6 +35,13 @@
|
||||
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
|
||||
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */
|
||||
#else
|
||||
/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
|
||||
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */
|
||||
#endif
|
||||
|
||||
/* If _PAGE_BIT_PRESENT is clear, we use these: */
|
||||
/* - if the user mapped it with PROT_NONE; pte_present gives true */
|
||||
#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
|
||||
@ -117,6 +125,18 @@
|
||||
#define _PAGE_SOFTW4 (_AT(pteval_t, 0))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The hardware requires shadow stack to be Write=0,Dirty=1. However,
|
||||
* there are valid cases where the kernel might create read-only PTEs that
|
||||
* are dirty (e.g., fork(), mprotect(), uffd-wp(), soft-dirty tracking). In
|
||||
* this case, the _PAGE_SAVED_DIRTY bit is used instead of the HW-dirty bit,
|
||||
* to avoid creating a wrong "shadow stack" PTEs. Such PTEs have
|
||||
* (Write=0,SavedDirty=1,Dirty=0) set.
|
||||
*/
|
||||
#define _PAGE_SAVED_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SAVED_DIRTY)
|
||||
|
||||
#define _PAGE_DIRTY_BITS (_PAGE_DIRTY | _PAGE_SAVED_DIRTY)
|
||||
|
||||
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
|
||||
|
||||
/*
|
||||
@ -125,10 +145,10 @@
|
||||
* instance, and is *not* included in this mask since
|
||||
* pte_modify() does modify it.
|
||||
*/
|
||||
#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
|
||||
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |\
|
||||
_PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \
|
||||
_PAGE_UFFD_WP)
|
||||
#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
|
||||
_PAGE_SPECIAL | _PAGE_ACCESSED | \
|
||||
_PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \
|
||||
_PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP)
|
||||
#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
|
||||
#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
|
||||
|
||||
@ -189,14 +209,22 @@ enum page_cache_mode {
|
||||
|
||||
#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
|
||||
#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
|
||||
|
||||
/*
|
||||
* Page tables needs to have Write=1 in order for any lower PTEs to be
|
||||
* writable. This includes shadow stack memory (Write=0, Dirty=1)
|
||||
*/
|
||||
#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0)
|
||||
#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC)
|
||||
#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
|
||||
#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
|
||||
#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
|
||||
#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0|___D| 0|___G)
|
||||
|
||||
#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX| 0| 0|___G)
|
||||
#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0| 0| 0|___G)
|
||||
#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
|
||||
#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
|
||||
#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
|
||||
#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
|
||||
#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX| 0| 0|___G)
|
||||
#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
|
||||
#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G)
|
||||
#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP)
|
||||
|
@ -28,6 +28,7 @@ struct vm86;
|
||||
#include <asm/unwind_hints.h>
|
||||
#include <asm/vmxfeatures.h>
|
||||
#include <asm/vdso/processor.h>
|
||||
#include <asm/shstk.h>
|
||||
|
||||
#include <linux/personality.h>
|
||||
#include <linux/cache.h>
|
||||
@ -474,6 +475,13 @@ struct thread_struct {
|
||||
*/
|
||||
u32 pkru;
|
||||
|
||||
#ifdef CONFIG_X86_USER_SHADOW_STACK
|
||||
unsigned long features;
|
||||
unsigned long features_locked;
|
||||
|
||||
struct thread_shstk shstk;
|
||||
#endif
|
||||
|
||||
/* Floating point and extended processor state */
|
||||
struct fpu fpu;
|
||||
/*
|
||||
|
38
arch/x86/include/asm/shstk.h
Normal file
38
arch/x86/include/asm/shstk.h
Normal file
@ -0,0 +1,38 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_X86_SHSTK_H
|
||||
#define _ASM_X86_SHSTK_H
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/types.h>
|
||||
|
||||
struct task_struct;
|
||||
struct ksignal;
|
||||
|
||||
#ifdef CONFIG_X86_USER_SHADOW_STACK
|
||||
struct thread_shstk {
|
||||
u64 base;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
long shstk_prctl(struct task_struct *task, int option, unsigned long arg2);
|
||||
void reset_thread_features(void);
|
||||
unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clone_flags,
|
||||
unsigned long stack_size);
|
||||
void shstk_free(struct task_struct *p);
|
||||
int setup_signal_shadow_stack(struct ksignal *ksig);
|
||||
int restore_signal_shadow_stack(void);
|
||||
#else
|
||||
static inline long shstk_prctl(struct task_struct *task, int option,
|
||||
unsigned long arg2) { return -EINVAL; }
|
||||
static inline void reset_thread_features(void) {}
|
||||
static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
|
||||
unsigned long clone_flags,
|
||||
unsigned long stack_size) { return 0; }
|
||||
static inline void shstk_free(struct task_struct *p) {}
|
||||
static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
|
||||
static inline int restore_signal_shadow_stack(void) { return 0; }
|
||||
#endif /* CONFIG_X86_USER_SHADOW_STACK */
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_X86_SHSTK_H */
|
@ -202,6 +202,19 @@ static inline void clwb(volatile void *__p)
|
||||
: [pax] "a" (p));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_USER_SHADOW_STACK
|
||||
static inline int write_user_shstk_64(u64 __user *addr, u64 val)
|
||||
{
|
||||
asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
|
||||
_ASM_EXTABLE(1b, %l[fail])
|
||||
:: [addr] "r" (addr), [val] "r" (val)
|
||||
:: fail);
|
||||
return 0;
|
||||
fail:
|
||||
return -EFAULT;
|
||||
}
|
||||
#endif /* CONFIG_X86_USER_SHADOW_STACK */
|
||||
|
||||
#define nop() asm volatile ("nop")
|
||||
|
||||
static inline void serialize(void)
|
||||
|
@ -306,7 +306,8 @@ static inline bool pte_flags_need_flush(unsigned long oldflags,
|
||||
const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT |
|
||||
_PAGE_ACCESSED;
|
||||
const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 |
|
||||
_PAGE_SOFTW3 | _PAGE_SOFTW4;
|
||||
_PAGE_SOFTW3 | _PAGE_SOFTW4 |
|
||||
_PAGE_SAVED_DIRTY;
|
||||
const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT |
|
||||
_PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT |
|
||||
_PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 |
|
||||
|
@ -11,6 +11,7 @@
|
||||
* bit 3 == 1: use of reserved bit detected
|
||||
* bit 4 == 1: fault was an instruction fetch
|
||||
* bit 5 == 1: protection keys block access
|
||||
* bit 6 == 1: shadow stack access fault
|
||||
* bit 15 == 1: SGX MMU page-fault
|
||||
*/
|
||||
enum x86_pf_error_code {
|
||||
@ -20,6 +21,7 @@ enum x86_pf_error_code {
|
||||
X86_PF_RSVD = 1 << 3,
|
||||
X86_PF_INSTR = 1 << 4,
|
||||
X86_PF_PK = 1 << 5,
|
||||
X86_PF_SHSTK = 1 << 6,
|
||||
X86_PF_SGX = 1 << 15,
|
||||
};
|
||||
|
||||
|
@ -18,7 +18,8 @@ void __init trap_init(void);
|
||||
asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
|
||||
#endif
|
||||
|
||||
extern bool ibt_selftest(void);
|
||||
extern int ibt_selftest(void);
|
||||
extern int ibt_selftest_noendbr(void);
|
||||
|
||||
#ifdef CONFIG_X86_F00F_BUG
|
||||
/* For handling the FOOF bug */
|
||||
@ -47,4 +48,16 @@ void __noreturn handle_stack_overflow(struct pt_regs *regs,
|
||||
struct stack_info *info);
|
||||
#endif
|
||||
|
||||
static inline void cond_local_irq_enable(struct pt_regs *regs)
|
||||
{
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static inline void cond_local_irq_disable(struct pt_regs *regs)
|
||||
{
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_TRAPS_H */
|
||||
|
@ -3,6 +3,7 @@
|
||||
#define _ASM_X86_MMAN_H
|
||||
|
||||
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
|
||||
#define MAP_ABOVE4G 0x80 /* only map above 4GB */
|
||||
|
||||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
#define arch_calc_vm_prot_bits(prot, key) ( \
|
||||
@ -12,6 +13,9 @@
|
||||
((key) & 0x8 ? VM_PKEY_BIT3 : 0))
|
||||
#endif
|
||||
|
||||
/* Flags for map_shadow_stack(2) */
|
||||
#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
|
||||
|
||||
#include <asm-generic/mman.h>
|
||||
|
||||
#endif /* _ASM_X86_MMAN_H */
|
||||
|
@ -23,9 +23,21 @@
|
||||
#define ARCH_MAP_VDSO_32 0x2002
|
||||
#define ARCH_MAP_VDSO_64 0x2003
|
||||
|
||||
/* Don't use 0x3001-0x3004 because of old glibcs */
|
||||
|
||||
#define ARCH_GET_UNTAG_MASK 0x4001
|
||||
#define ARCH_ENABLE_TAGGED_ADDR 0x4002
|
||||
#define ARCH_GET_MAX_TAG_BITS 0x4003
|
||||
#define ARCH_FORCE_TAGGED_SVA 0x4004
|
||||
|
||||
#define ARCH_SHSTK_ENABLE 0x5001
|
||||
#define ARCH_SHSTK_DISABLE 0x5002
|
||||
#define ARCH_SHSTK_LOCK 0x5003
|
||||
#define ARCH_SHSTK_UNLOCK 0x5004
|
||||
#define ARCH_SHSTK_STATUS 0x5005
|
||||
|
||||
/* ARCH_SHSTK_ features bits */
|
||||
#define ARCH_SHSTK_SHSTK (1ULL << 0)
|
||||
#define ARCH_SHSTK_WRSS (1ULL << 1)
|
||||
|
||||
#endif /* _ASM_X86_PRCTL_H */
|
||||
|
@ -48,6 +48,7 @@ obj-y += process_$(BITS).o signal.o signal_$(BITS).o
|
||||
obj-y += traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
|
||||
obj-y += time.o ioport.o dumpstack.o nmi.o
|
||||
obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
|
||||
obj-$(CONFIG_X86_KERNEL_IBT) += ibt_selftest.o
|
||||
obj-y += setup.o x86_init.o i8259.o irqinit.o
|
||||
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
|
||||
obj-$(CONFIG_IRQ_WORK) += irq_work.o
|
||||
@ -144,6 +145,10 @@ obj-$(CONFIG_CFI_CLANG) += cfi.o
|
||||
|
||||
obj-$(CONFIG_CALL_THUNKS) += callthunks.o
|
||||
|
||||
obj-$(CONFIG_X86_CET) += cet.o
|
||||
|
||||
obj-$(CONFIG_X86_USER_SHADOW_STACK) += shstk.o
|
||||
|
||||
###
|
||||
# 64 bit specific files
|
||||
ifeq ($(CONFIG_X86_64),y)
|
||||
|
131
arch/x86/kernel/cet.c
Normal file
131
arch/x86/kernel/cet.c
Normal file
@ -0,0 +1,131 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/ptrace.h>
|
||||
#include <asm/bugs.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
enum cp_error_code {
|
||||
CP_EC = (1 << 15) - 1,
|
||||
|
||||
CP_RET = 1,
|
||||
CP_IRET = 2,
|
||||
CP_ENDBR = 3,
|
||||
CP_RSTRORSSP = 4,
|
||||
CP_SETSSBSY = 5,
|
||||
|
||||
CP_ENCL = 1 << 15,
|
||||
};
|
||||
|
||||
static const char cp_err[][10] = {
|
||||
[0] = "unknown",
|
||||
[1] = "near ret",
|
||||
[2] = "far/iret",
|
||||
[3] = "endbranch",
|
||||
[4] = "rstorssp",
|
||||
[5] = "setssbsy",
|
||||
};
|
||||
|
||||
static const char *cp_err_string(unsigned long error_code)
|
||||
{
|
||||
unsigned int cpec = error_code & CP_EC;
|
||||
|
||||
if (cpec >= ARRAY_SIZE(cp_err))
|
||||
cpec = 0;
|
||||
return cp_err[cpec];
|
||||
}
|
||||
|
||||
static void do_unexpected_cp(struct pt_regs *regs, unsigned long error_code)
|
||||
{
|
||||
WARN_ONCE(1, "Unexpected %s #CP, error_code: %s\n",
|
||||
user_mode(regs) ? "user mode" : "kernel mode",
|
||||
cp_err_string(error_code));
|
||||
}
|
||||
|
||||
static DEFINE_RATELIMIT_STATE(cpf_rate, DEFAULT_RATELIMIT_INTERVAL,
|
||||
DEFAULT_RATELIMIT_BURST);
|
||||
|
||||
static void do_user_cp_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
unsigned long ssp;
|
||||
|
||||
/*
|
||||
* An exception was just taken from userspace. Since interrupts are disabled
|
||||
* here, no scheduling should have messed with the registers yet and they
|
||||
* will be whatever is live in userspace. So read the SSP before enabling
|
||||
* interrupts so locking the fpregs to do it later is not required.
|
||||
*/
|
||||
rdmsrl(MSR_IA32_PL3_SSP, ssp);
|
||||
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
tsk = current;
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_CP;
|
||||
|
||||
/* Ratelimit to prevent log spamming. */
|
||||
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
||||
__ratelimit(&cpf_rate)) {
|
||||
pr_emerg("%s[%d] control protection ip:%lx sp:%lx ssp:%lx error:%lx(%s)%s",
|
||||
tsk->comm, task_pid_nr(tsk),
|
||||
regs->ip, regs->sp, ssp, error_code,
|
||||
cp_err_string(error_code),
|
||||
error_code & CP_ENCL ? " in enclave" : "");
|
||||
print_vma_addr(KERN_CONT " in ", regs->ip);
|
||||
pr_cont("\n");
|
||||
}
|
||||
|
||||
force_sig_fault(SIGSEGV, SEGV_CPERR, (void __user *)0);
|
||||
cond_local_irq_disable(regs);
|
||||
}
|
||||
|
||||
static __ro_after_init bool ibt_fatal = true;
|
||||
|
||||
static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
{
|
||||
if ((error_code & CP_EC) != CP_ENDBR) {
|
||||
do_unexpected_cp(regs, error_code);
|
||||
return;
|
||||
}
|
||||
|
||||
if (unlikely(regs->ip == (unsigned long)&ibt_selftest_noendbr)) {
|
||||
regs->ax = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs));
|
||||
if (!ibt_fatal) {
|
||||
printk(KERN_DEFAULT CUT_HERE);
|
||||
__warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
|
||||
return;
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
|
||||
static int __init ibt_setup(char *str)
|
||||
{
|
||||
if (!strcmp(str, "off"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_IBT);
|
||||
|
||||
if (!strcmp(str, "warn"))
|
||||
ibt_fatal = false;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("ibt=", ibt_setup);
|
||||
|
||||
DEFINE_IDTENTRY_ERRORCODE(exc_control_protection)
|
||||
{
|
||||
if (user_mode(regs)) {
|
||||
if (cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
|
||||
do_user_cp_fault(regs, error_code);
|
||||
else
|
||||
do_unexpected_cp(regs, error_code);
|
||||
} else {
|
||||
if (cpu_feature_enabled(X86_FEATURE_IBT))
|
||||
do_kernel_cp_fault(regs, error_code);
|
||||
else
|
||||
do_unexpected_cp(regs, error_code);
|
||||
}
|
||||
}
|
@ -587,27 +587,43 @@ __noendbr void ibt_restore(u64 save)
|
||||
|
||||
static __always_inline void setup_cet(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 msr = CET_ENDBR_EN;
|
||||
bool user_shstk, kernel_ibt;
|
||||
|
||||
if (!HAS_KERNEL_IBT ||
|
||||
!cpu_feature_enabled(X86_FEATURE_IBT))
|
||||
if (!IS_ENABLED(CONFIG_X86_CET))
|
||||
return;
|
||||
|
||||
wrmsrl(MSR_IA32_S_CET, msr);
|
||||
kernel_ibt = HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT);
|
||||
user_shstk = cpu_feature_enabled(X86_FEATURE_SHSTK) &&
|
||||
IS_ENABLED(CONFIG_X86_USER_SHADOW_STACK);
|
||||
|
||||
if (!kernel_ibt && !user_shstk)
|
||||
return;
|
||||
|
||||
if (user_shstk)
|
||||
set_cpu_cap(c, X86_FEATURE_USER_SHSTK);
|
||||
|
||||
if (kernel_ibt)
|
||||
wrmsrl(MSR_IA32_S_CET, CET_ENDBR_EN);
|
||||
else
|
||||
wrmsrl(MSR_IA32_S_CET, 0);
|
||||
|
||||
cr4_set_bits(X86_CR4_CET);
|
||||
|
||||
if (!ibt_selftest()) {
|
||||
if (kernel_ibt && ibt_selftest()) {
|
||||
pr_err("IBT selftest: Failed!\n");
|
||||
wrmsrl(MSR_IA32_S_CET, 0);
|
||||
setup_clear_cpu_cap(X86_FEATURE_IBT);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
__noendbr void cet_disable(void)
|
||||
{
|
||||
if (cpu_feature_enabled(X86_FEATURE_IBT))
|
||||
wrmsrl(MSR_IA32_S_CET, 0);
|
||||
if (!(cpu_feature_enabled(X86_FEATURE_IBT) ||
|
||||
cpu_feature_enabled(X86_FEATURE_SHSTK)))
|
||||
return;
|
||||
|
||||
wrmsrl(MSR_IA32_S_CET, 0);
|
||||
wrmsrl(MSR_IA32_U_CET, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1491,6 +1507,9 @@ static void __init cpu_parse_early_param(void)
|
||||
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
|
||||
|
||||
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
|
||||
if (arglen <= 0)
|
||||
return;
|
||||
|
@ -81,6 +81,7 @@ static const struct cpuid_dep cpuid_deps[] = {
|
||||
{ X86_FEATURE_XFD, X86_FEATURE_XSAVES },
|
||||
{ X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
|
||||
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
|
||||
{ X86_FEATURE_SHSTK, X86_FEATURE_XSAVES },
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <asm/prctl.h>
|
||||
#include <linux/proc_fs.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
@ -175,3 +177,24 @@ const struct seq_operations cpuinfo_op = {
|
||||
.stop = c_stop,
|
||||
.show = show_cpuinfo,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_USER_SHADOW_STACK
|
||||
static void dump_x86_features(struct seq_file *m, unsigned long features)
|
||||
{
|
||||
if (features & ARCH_SHSTK_SHSTK)
|
||||
seq_puts(m, "shstk ");
|
||||
if (features & ARCH_SHSTK_WRSS)
|
||||
seq_puts(m, "wrss ");
|
||||
}
|
||||
|
||||
void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task)
|
||||
{
|
||||
seq_puts(m, "x86_Thread_features:\t");
|
||||
dump_x86_features(m, task->thread.features);
|
||||
seq_putc(m, '\n');
|
||||
|
||||
seq_puts(m, "x86_Thread_features_locked:\t");
|
||||
dump_x86_features(m, task->thread.features_locked);
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
#endif /* CONFIG_X86_USER_SHADOW_STACK */
|
||||
|
@ -552,8 +552,36 @@ static inline void fpu_inherit_perms(struct fpu *dst_fpu)
|
||||
}
|
||||
}
|
||||
|
||||
/* A passed ssp of zero will not cause any update */
|
||||
static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
|
||||
{
|
||||
#ifdef CONFIG_X86_USER_SHADOW_STACK
|
||||
struct cet_user_state *xstate;
|
||||
|
||||
/* If ssp update is not needed. */
|
||||
if (!ssp)
|
||||
return 0;
|
||||
|
||||
xstate = get_xsave_addr(&dst->thread.fpu.fpstate->regs.xsave,
|
||||
XFEATURE_CET_USER);
|
||||
|
||||
/*
|
||||
* If there is a non-zero ssp, then 'dst' must be configured with a shadow
|
||||
* stack and the fpu state should be up to date since it was just copied
|
||||
* from the parent in fpu_clone(). So there must be a valid non-init CET
|
||||
* state location in the buffer.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!xstate))
|
||||
return 1;
|
||||
|
||||
xstate->user_ssp = (u64)ssp;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Clone current's FPU state on fork */
|
||||
int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
|
||||
int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
|
||||
unsigned long ssp)
|
||||
{
|
||||
struct fpu *src_fpu = ¤t->thread.fpu;
|
||||
struct fpu *dst_fpu = &dst->thread.fpu;
|
||||
@ -613,6 +641,12 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
|
||||
if (use_xsave())
|
||||
dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
|
||||
|
||||
/*
|
||||
* Update shadow stack pointer, in case it changed during clone.
|
||||
*/
|
||||
if (update_fpu_shstk(dst, ssp))
|
||||
return 1;
|
||||
|
||||
trace_x86_fpu_copy_src(src_fpu);
|
||||
trace_x86_fpu_copy_dst(dst_fpu);
|
||||
|
||||
@ -753,6 +787,24 @@ void switch_fpu_return(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(switch_fpu_return);
|
||||
|
||||
void fpregs_lock_and_load(void)
|
||||
{
|
||||
/*
|
||||
* fpregs_lock() only disables preemption (mostly). So modifying state
|
||||
* in an interrupt could screw up some in progress fpregs operation.
|
||||
* Warn about it.
|
||||
*/
|
||||
WARN_ON_ONCE(!irq_fpu_usable());
|
||||
WARN_ON_ONCE(current->flags & PF_KTHREAD);
|
||||
|
||||
fpregs_lock();
|
||||
|
||||
fpregs_assert_state_consistent();
|
||||
|
||||
if (test_thread_flag(TIF_NEED_FPU_LOAD))
|
||||
fpregs_restore_userregs();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_DEBUG_FPU
|
||||
/*
|
||||
* If current FPU state according to its tracking (loaded FPU context on this
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/fpu/signal.h>
|
||||
#include <asm/fpu/regset.h>
|
||||
#include <asm/prctl.h>
|
||||
|
||||
#include "context.h"
|
||||
#include "internal.h"
|
||||
@ -174,6 +175,86 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_USER_SHADOW_STACK
|
||||
int ssp_active(struct task_struct *target, const struct user_regset *regset)
|
||||
{
|
||||
if (target->thread.features & ARCH_SHSTK_SHSTK)
|
||||
return regset->n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ssp_get(struct task_struct *target, const struct user_regset *regset,
|
||||
struct membuf to)
|
||||
{
|
||||
struct fpu *fpu = &target->thread.fpu;
|
||||
struct cet_user_state *cetregs;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
|
||||
return -ENODEV;
|
||||
|
||||
sync_fpstate(fpu);
|
||||
cetregs = get_xsave_addr(&fpu->fpstate->regs.xsave, XFEATURE_CET_USER);
|
||||
if (WARN_ON(!cetregs)) {
|
||||
/*
|
||||
* This shouldn't ever be NULL because shadow stack was
|
||||
* verified to be enabled above. This means
|
||||
* MSR_IA32_U_CET.CET_SHSTK_EN should be 1 and so
|
||||
* XFEATURE_CET_USER should not be in the init state.
|
||||
*/
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return membuf_write(&to, (unsigned long *)&cetregs->user_ssp,
|
||||
sizeof(cetregs->user_ssp));
|
||||
}
|
||||
|
||||
int ssp_set(struct task_struct *target, const struct user_regset *regset,
|
||||
unsigned int pos, unsigned int count,
|
||||
const void *kbuf, const void __user *ubuf)
|
||||
{
|
||||
struct fpu *fpu = &target->thread.fpu;
|
||||
struct xregs_state *xsave = &fpu->fpstate->regs.xsave;
|
||||
struct cet_user_state *cetregs;
|
||||
unsigned long user_ssp;
|
||||
int r;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
|
||||
!ssp_active(target, regset))
|
||||
return -ENODEV;
|
||||
|
||||
if (pos != 0 || count != sizeof(user_ssp))
|
||||
return -EINVAL;
|
||||
|
||||
r = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_ssp, 0, -1);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* Some kernel instructions (IRET, etc) can cause exceptions in the case
|
||||
* of disallowed CET register values. Just prevent invalid values.
|
||||
*/
|
||||
if (user_ssp >= TASK_SIZE_MAX || !IS_ALIGNED(user_ssp, 8))
|
||||
return -EINVAL;
|
||||
|
||||
fpu_force_restore(fpu);
|
||||
|
||||
cetregs = get_xsave_addr(xsave, XFEATURE_CET_USER);
|
||||
if (WARN_ON(!cetregs)) {
|
||||
/*
|
||||
* This shouldn't ever be NULL because shadow stack was
|
||||
* verified to be enabled above. This means
|
||||
* MSR_IA32_U_CET.CET_SHSTK_EN should be 1 and so
|
||||
* XFEATURE_CET_USER should not be in the init state.
|
||||
*/
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
cetregs->user_ssp = user_ssp;
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_X86_USER_SHADOW_STACK */
|
||||
|
||||
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
||||
|
||||
/*
|
||||
|
@ -39,26 +39,26 @@
|
||||
*/
|
||||
static const char *xfeature_names[] =
|
||||
{
|
||||
"x87 floating point registers" ,
|
||||
"SSE registers" ,
|
||||
"AVX registers" ,
|
||||
"MPX bounds registers" ,
|
||||
"MPX CSR" ,
|
||||
"AVX-512 opmask" ,
|
||||
"AVX-512 Hi256" ,
|
||||
"AVX-512 ZMM_Hi256" ,
|
||||
"Processor Trace (unused)" ,
|
||||
"x87 floating point registers",
|
||||
"SSE registers",
|
||||
"AVX registers",
|
||||
"MPX bounds registers",
|
||||
"MPX CSR",
|
||||
"AVX-512 opmask",
|
||||
"AVX-512 Hi256",
|
||||
"AVX-512 ZMM_Hi256",
|
||||
"Processor Trace (unused)",
|
||||
"Protection Keys User registers",
|
||||
"PASID state",
|
||||
"unknown xstate feature" ,
|
||||
"unknown xstate feature" ,
|
||||
"unknown xstate feature" ,
|
||||
"unknown xstate feature" ,
|
||||
"unknown xstate feature" ,
|
||||
"unknown xstate feature" ,
|
||||
"AMX Tile config" ,
|
||||
"AMX Tile data" ,
|
||||
"unknown xstate feature" ,
|
||||
"Control-flow User registers",
|
||||
"Control-flow Kernel registers (unused)",
|
||||
"unknown xstate feature",
|
||||
"unknown xstate feature",
|
||||
"unknown xstate feature",
|
||||
"unknown xstate feature",
|
||||
"AMX Tile config",
|
||||
"AMX Tile data",
|
||||
"unknown xstate feature",
|
||||
};
|
||||
|
||||
static unsigned short xsave_cpuid_features[] __initdata = {
|
||||
@ -73,6 +73,7 @@ static unsigned short xsave_cpuid_features[] __initdata = {
|
||||
[XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT,
|
||||
[XFEATURE_PKRU] = X86_FEATURE_PKU,
|
||||
[XFEATURE_PASID] = X86_FEATURE_ENQCMD,
|
||||
[XFEATURE_CET_USER] = X86_FEATURE_SHSTK,
|
||||
[XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
|
||||
[XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
|
||||
};
|
||||
@ -276,6 +277,7 @@ static void __init print_xstate_features(void)
|
||||
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
|
||||
print_xstate_feature(XFEATURE_MASK_PKRU);
|
||||
print_xstate_feature(XFEATURE_MASK_PASID);
|
||||
print_xstate_feature(XFEATURE_MASK_CET_USER);
|
||||
print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
|
||||
print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
|
||||
}
|
||||
@ -344,6 +346,7 @@ static __init void os_xrstor_booting(struct xregs_state *xstate)
|
||||
XFEATURE_MASK_BNDREGS | \
|
||||
XFEATURE_MASK_BNDCSR | \
|
||||
XFEATURE_MASK_PASID | \
|
||||
XFEATURE_MASK_CET_USER | \
|
||||
XFEATURE_MASK_XTILE)
|
||||
|
||||
/*
|
||||
@ -446,14 +449,15 @@ static void __init __xstate_dump_leaves(void)
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define XCHECK_SZ(sz, nr, nr_macro, __struct) do { \
|
||||
if ((nr == nr_macro) && \
|
||||
WARN_ONCE(sz != sizeof(__struct), \
|
||||
"%s: struct is %zu bytes, cpu state %d bytes\n", \
|
||||
__stringify(nr_macro), sizeof(__struct), sz)) { \
|
||||
#define XCHECK_SZ(sz, nr, __struct) ({ \
|
||||
if (WARN_ONCE(sz != sizeof(__struct), \
|
||||
"[%s]: struct is %zu bytes, cpu state %d bytes\n", \
|
||||
xfeature_names[nr], sizeof(__struct), sz)) { \
|
||||
__xstate_dump_leaves(); \
|
||||
} \
|
||||
} while (0)
|
||||
true; \
|
||||
})
|
||||
|
||||
|
||||
/**
|
||||
* check_xtile_data_against_struct - Check tile data state size.
|
||||
@ -527,36 +531,28 @@ static bool __init check_xstate_against_struct(int nr)
|
||||
* Ask the CPU for the size of the state.
|
||||
*/
|
||||
int sz = xfeature_size(nr);
|
||||
|
||||
/*
|
||||
* Match each CPU state with the corresponding software
|
||||
* structure.
|
||||
*/
|
||||
XCHECK_SZ(sz, nr, XFEATURE_YMM, struct ymmh_struct);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_BNDREGS, struct mpx_bndreg_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_BNDCSR, struct mpx_bndcsr_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state);
|
||||
XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg);
|
||||
|
||||
/* The tile data size varies between implementations. */
|
||||
if (nr == XFEATURE_XTILE_DATA)
|
||||
check_xtile_data_against_struct(sz);
|
||||
|
||||
/*
|
||||
* Make *SURE* to add any feature numbers in below if
|
||||
* there are "holes" in the xsave state component
|
||||
* numbers.
|
||||
*/
|
||||
if ((nr < XFEATURE_YMM) ||
|
||||
(nr >= XFEATURE_MAX) ||
|
||||
(nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
|
||||
((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) {
|
||||
switch (nr) {
|
||||
case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct);
|
||||
case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
|
||||
case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
|
||||
case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
|
||||
case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
|
||||
case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
|
||||
case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state);
|
||||
case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
|
||||
case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg);
|
||||
case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state);
|
||||
case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
|
||||
default:
|
||||
XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
17
arch/x86/kernel/ibt_selftest.S
Normal file
17
arch/x86/kernel/ibt_selftest.S
Normal file
@ -0,0 +1,17 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/objtool.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
SYM_CODE_START(ibt_selftest_noendbr)
|
||||
ANNOTATE_NOENDBR
|
||||
UNWIND_HINT_FUNC
|
||||
/* #CP handler sets %ax to 0 */
|
||||
RET
|
||||
SYM_CODE_END(ibt_selftest_noendbr)
|
||||
|
||||
SYM_FUNC_START(ibt_selftest)
|
||||
lea ibt_selftest_noendbr(%rip), %rax
|
||||
ANNOTATE_RETPOLINE_SAFE
|
||||
jmp *%rax
|
||||
SYM_FUNC_END(ibt_selftest)
|
@ -107,7 +107,7 @@ static const __initconst struct idt_data def_idts[] = {
|
||||
ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE),
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_KERNEL_IBT
|
||||
#ifdef CONFIG_X86_CET
|
||||
INTG(X86_TRAP_CP, asm_exc_control_protection),
|
||||
#endif
|
||||
|
||||
|
@ -51,6 +51,7 @@
|
||||
#include <asm/unwind.h>
|
||||
#include <asm/tdx.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/shstk.h>
|
||||
|
||||
#include "process.h"
|
||||
|
||||
@ -122,6 +123,7 @@ void exit_thread(struct task_struct *tsk)
|
||||
|
||||
free_vm86(t);
|
||||
|
||||
shstk_free(tsk);
|
||||
fpu__drop(fpu);
|
||||
}
|
||||
|
||||
@ -162,6 +164,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
||||
struct inactive_task_frame *frame;
|
||||
struct fork_frame *fork_frame;
|
||||
struct pt_regs *childregs;
|
||||
unsigned long new_ssp;
|
||||
int ret = 0;
|
||||
|
||||
childregs = task_pt_regs(p);
|
||||
@ -199,7 +202,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
||||
frame->flags = X86_EFLAGS_FIXED;
|
||||
#endif
|
||||
|
||||
fpu_clone(p, clone_flags, args->fn);
|
||||
/*
|
||||
* Allocate a new shadow stack for thread if needed. If shadow stack,
|
||||
* is disabled, new_ssp will remain 0, and fpu_clone() will know not to
|
||||
* update it.
|
||||
*/
|
||||
new_ssp = shstk_alloc_thread_stack(p, clone_flags, args->stack_size);
|
||||
if (IS_ERR_VALUE(new_ssp))
|
||||
return PTR_ERR((void *)new_ssp);
|
||||
|
||||
fpu_clone(p, clone_flags, args->fn, new_ssp);
|
||||
|
||||
/* Kernel thread ? */
|
||||
if (unlikely(p->flags & PF_KTHREAD)) {
|
||||
@ -245,6 +257,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
||||
if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP)))
|
||||
io_bitmap_share(p);
|
||||
|
||||
/*
|
||||
* If copy_thread() if failing, don't leak the shadow stack possibly
|
||||
* allocated in shstk_alloc_thread_stack() above.
|
||||
*/
|
||||
if (ret)
|
||||
shstk_free(p);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -515,6 +515,8 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
|
||||
load_gs_index(__USER_DS);
|
||||
}
|
||||
|
||||
reset_thread_features();
|
||||
|
||||
loadsegment(fs, 0);
|
||||
loadsegment(es, _ds);
|
||||
loadsegment(ds, _ds);
|
||||
@ -894,6 +896,12 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
|
||||
else
|
||||
return put_user(LAM_U57_BITS, (unsigned long __user *)arg2);
|
||||
#endif
|
||||
case ARCH_SHSTK_ENABLE:
|
||||
case ARCH_SHSTK_DISABLE:
|
||||
case ARCH_SHSTK_LOCK:
|
||||
case ARCH_SHSTK_UNLOCK:
|
||||
case ARCH_SHSTK_STATUS:
|
||||
return shstk_prctl(task, option, arg2);
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
|
@ -58,6 +58,7 @@ enum x86_regset_64 {
|
||||
REGSET64_FP,
|
||||
REGSET64_IOPERM,
|
||||
REGSET64_XSTATE,
|
||||
REGSET64_SSP,
|
||||
};
|
||||
|
||||
#define REGSET_GENERAL \
|
||||
@ -1267,6 +1268,17 @@ static struct user_regset x86_64_regsets[] __ro_after_init = {
|
||||
.active = ioperm_active,
|
||||
.regset_get = ioperm_get
|
||||
},
|
||||
#ifdef CONFIG_X86_USER_SHADOW_STACK
|
||||
[REGSET64_SSP] = {
|
||||
.core_note_type = NT_X86_SHSTK,
|
||||
.n = 1,
|
||||
.size = sizeof(u64),
|
||||
.align = sizeof(u64),
|
||||
.active = ssp_active,
|
||||
.regset_get = ssp_get,
|
||||
.set = ssp_set
|
||||
},
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct user_regset_view user_x86_64_view = {
|
||||
|
550
arch/x86/kernel/shstk.c
Normal file
550
arch/x86/kernel/shstk.c
Normal file
@ -0,0 +1,550 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* shstk.c - Intel shadow stack support
|
||||
*
|
||||
* Copyright (c) 2021, Intel Corporation.
|
||||
* Yu-cheng Yu <yu-cheng.yu@intel.com>
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/user.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/fpu/xstate.h>
|
||||
#include <asm/fpu/types.h>
|
||||
#include <asm/shstk.h>
|
||||
#include <asm/special_insns.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/prctl.h>
|
||||
|
||||
#define SS_FRAME_SIZE 8
|
||||
|
||||
static bool features_enabled(unsigned long features)
|
||||
{
|
||||
return current->thread.features & features;
|
||||
}
|
||||
|
||||
static void features_set(unsigned long features)
|
||||
{
|
||||
current->thread.features |= features;
|
||||
}
|
||||
|
||||
static void features_clr(unsigned long features)
|
||||
{
|
||||
current->thread.features &= ~features;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a restore token on the shadow stack. A token is always 8-byte
|
||||
* and aligned to 8.
|
||||
*/
|
||||
static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
/* Token must be aligned */
|
||||
if (!IS_ALIGNED(ssp, 8))
|
||||
return -EINVAL;
|
||||
|
||||
addr = ssp - SS_FRAME_SIZE;
|
||||
|
||||
/*
|
||||
* SSP is aligned, so reserved bits and mode bit are a zero, just mark
|
||||
* the token 64-bit.
|
||||
*/
|
||||
ssp |= BIT(0);
|
||||
|
||||
if (write_user_shstk_64((u64 __user *)addr, (u64)ssp))
|
||||
return -EFAULT;
|
||||
|
||||
if (token_addr)
|
||||
*token_addr = addr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* VM_SHADOW_STACK will have a guard page. This helps userspace protect
|
||||
* itself from attacks. The reasoning is as follows:
|
||||
*
|
||||
* The shadow stack pointer(SSP) is moved by CALL, RET, and INCSSPQ. The
|
||||
* INCSSP instruction can increment the shadow stack pointer. It is the
|
||||
* shadow stack analog of an instruction like:
|
||||
*
|
||||
* addq $0x80, %rsp
|
||||
*
|
||||
* However, there is one important difference between an ADD on %rsp
|
||||
* and INCSSP. In addition to modifying SSP, INCSSP also reads from the
|
||||
* memory of the first and last elements that were "popped". It can be
|
||||
* thought of as acting like this:
|
||||
*
|
||||
* READ_ONCE(ssp); // read+discard top element on stack
|
||||
* ssp += nr_to_pop * 8; // move the shadow stack
|
||||
* READ_ONCE(ssp-8); // read+discard last popped stack element
|
||||
*
|
||||
* The maximum distance INCSSP can move the SSP is 2040 bytes, before
|
||||
* it would read the memory. Therefore a single page gap will be enough
|
||||
* to prevent any operation from shifting the SSP to an adjacent stack,
|
||||
* since it would have to land in the gap at least once, causing a
|
||||
* fault.
|
||||
*/
|
||||
static unsigned long alloc_shstk(unsigned long addr, unsigned long size,
|
||||
unsigned long token_offset, bool set_res_tok)
|
||||
{
|
||||
int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long mapped_addr, unused;
|
||||
|
||||
if (addr)
|
||||
flags |= MAP_FIXED_NOREPLACE;
|
||||
|
||||
mmap_write_lock(mm);
|
||||
mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags,
|
||||
VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
|
||||
mmap_write_unlock(mm);
|
||||
|
||||
if (!set_res_tok || IS_ERR_VALUE(mapped_addr))
|
||||
goto out;
|
||||
|
||||
if (create_rstor_token(mapped_addr + token_offset, NULL)) {
|
||||
vm_munmap(mapped_addr, size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
out:
|
||||
return mapped_addr;
|
||||
}
|
||||
|
||||
static unsigned long adjust_shstk_size(unsigned long size)
|
||||
{
|
||||
if (size)
|
||||
return PAGE_ALIGN(size);
|
||||
|
||||
return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
|
||||
}
|
||||
|
||||
static void unmap_shadow_stack(u64 base, u64 size)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = vm_munmap(base, size);
|
||||
|
||||
/*
|
||||
* mmap_write_lock_killable() failed with -EINTR. This means
|
||||
* the process is about to die and have it's MM cleaned up.
|
||||
* This task shouldn't ever make it back to userspace. In this
|
||||
* case it is ok to leak a shadow stack, so just exit out.
|
||||
*/
|
||||
if (r == -EINTR)
|
||||
return;
|
||||
|
||||
/*
|
||||
* For all other types of vm_munmap() failure, either the
|
||||
* system is out of memory or there is bug.
|
||||
*/
|
||||
WARN_ON_ONCE(r);
|
||||
}
|
||||
|
||||
static int shstk_setup(void)
|
||||
{
|
||||
struct thread_shstk *shstk = ¤t->thread.shstk;
|
||||
unsigned long addr, size;
|
||||
|
||||
/* Already enabled */
|
||||
if (features_enabled(ARCH_SHSTK_SHSTK))
|
||||
return 0;
|
||||
|
||||
/* Also not supported for 32 bit and x32 */
|
||||
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
size = adjust_shstk_size(0);
|
||||
addr = alloc_shstk(0, size, 0, false);
|
||||
if (IS_ERR_VALUE(addr))
|
||||
return PTR_ERR((void *)addr);
|
||||
|
||||
fpregs_lock_and_load();
|
||||
wrmsrl(MSR_IA32_PL3_SSP, addr + size);
|
||||
wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
|
||||
fpregs_unlock();
|
||||
|
||||
shstk->base = addr;
|
||||
shstk->size = size;
|
||||
features_set(ARCH_SHSTK_SHSTK);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void reset_thread_features(void)
|
||||
{
|
||||
memset(¤t->thread.shstk, 0, sizeof(struct thread_shstk));
|
||||
current->thread.features = 0;
|
||||
current->thread.features_locked = 0;
|
||||
}
|
||||
|
||||
unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
|
||||
unsigned long stack_size)
|
||||
{
|
||||
struct thread_shstk *shstk = &tsk->thread.shstk;
|
||||
unsigned long addr, size;
|
||||
|
||||
/*
|
||||
* If shadow stack is not enabled on the new thread, skip any
|
||||
* switch to a new shadow stack.
|
||||
*/
|
||||
if (!features_enabled(ARCH_SHSTK_SHSTK))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* For CLONE_VM, except vfork, the child needs a separate shadow
|
||||
* stack.
|
||||
*/
|
||||
if ((clone_flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM)
|
||||
return 0;
|
||||
|
||||
size = adjust_shstk_size(stack_size);
|
||||
addr = alloc_shstk(0, size, 0, false);
|
||||
if (IS_ERR_VALUE(addr))
|
||||
return addr;
|
||||
|
||||
shstk->base = addr;
|
||||
shstk->size = size;
|
||||
|
||||
return addr + size;
|
||||
}
|
||||
|
||||
static unsigned long get_user_shstk_addr(void)
|
||||
{
|
||||
unsigned long long ssp;
|
||||
|
||||
fpregs_lock_and_load();
|
||||
|
||||
rdmsrl(MSR_IA32_PL3_SSP, ssp);
|
||||
|
||||
fpregs_unlock();
|
||||
|
||||
return ssp;
|
||||
}
|
||||
|
||||
#define SHSTK_DATA_BIT BIT(63)
|
||||
|
||||
static int put_shstk_data(u64 __user *addr, u64 data)
|
||||
{
|
||||
if (WARN_ON_ONCE(data & SHSTK_DATA_BIT))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Mark the high bit so that the sigframe can't be processed as a
|
||||
* return address.
|
||||
*/
|
||||
if (write_user_shstk_64(addr, data | SHSTK_DATA_BIT))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_shstk_data(unsigned long *data, unsigned long __user *addr)
|
||||
{
|
||||
unsigned long ldata;
|
||||
|
||||
if (unlikely(get_user(ldata, addr)))
|
||||
return -EFAULT;
|
||||
|
||||
if (!(ldata & SHSTK_DATA_BIT))
|
||||
return -EINVAL;
|
||||
|
||||
*data = ldata & ~SHSTK_DATA_BIT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int shstk_push_sigframe(unsigned long *ssp)
|
||||
{
|
||||
unsigned long target_ssp = *ssp;
|
||||
|
||||
/* Token must be aligned */
|
||||
if (!IS_ALIGNED(target_ssp, 8))
|
||||
return -EINVAL;
|
||||
|
||||
*ssp -= SS_FRAME_SIZE;
|
||||
if (put_shstk_data((void __user *)*ssp, target_ssp))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int shstk_pop_sigframe(unsigned long *ssp)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long token_addr;
|
||||
bool need_to_check_vma;
|
||||
int err = 1;
|
||||
|
||||
/*
|
||||
* It is possible for the SSP to be off the end of a shadow stack by 4
|
||||
* or 8 bytes. If the shadow stack is at the start of a page or 4 bytes
|
||||
* before it, it might be this case, so check that the address being
|
||||
* read is actually shadow stack.
|
||||
*/
|
||||
if (!IS_ALIGNED(*ssp, 8))
|
||||
return -EINVAL;
|
||||
|
||||
need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp;
|
||||
|
||||
if (need_to_check_vma)
|
||||
mmap_read_lock_killable(current->mm);
|
||||
|
||||
err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp);
|
||||
if (unlikely(err))
|
||||
goto out_err;
|
||||
|
||||
if (need_to_check_vma) {
|
||||
vma = find_vma(current->mm, *ssp);
|
||||
if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) {
|
||||
err = -EFAULT;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
mmap_read_unlock(current->mm);
|
||||
}
|
||||
|
||||
/* Restore SSP aligned? */
|
||||
if (unlikely(!IS_ALIGNED(token_addr, 8)))
|
||||
return -EINVAL;
|
||||
|
||||
/* SSP in userspace? */
|
||||
if (unlikely(token_addr >= TASK_SIZE_MAX))
|
||||
return -EINVAL;
|
||||
|
||||
*ssp = token_addr;
|
||||
|
||||
return 0;
|
||||
out_err:
|
||||
if (need_to_check_vma)
|
||||
mmap_read_unlock(current->mm);
|
||||
return err;
|
||||
}
|
||||
|
||||
int setup_signal_shadow_stack(struct ksignal *ksig)
|
||||
{
|
||||
void __user *restorer = ksig->ka.sa.sa_restorer;
|
||||
unsigned long ssp;
|
||||
int err;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
|
||||
!features_enabled(ARCH_SHSTK_SHSTK))
|
||||
return 0;
|
||||
|
||||
if (!restorer)
|
||||
return -EINVAL;
|
||||
|
||||
ssp = get_user_shstk_addr();
|
||||
if (unlikely(!ssp))
|
||||
return -EINVAL;
|
||||
|
||||
err = shstk_push_sigframe(&ssp);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
/* Push restorer address */
|
||||
ssp -= SS_FRAME_SIZE;
|
||||
err = write_user_shstk_64((u64 __user *)ssp, (u64)restorer);
|
||||
if (unlikely(err))
|
||||
return -EFAULT;
|
||||
|
||||
fpregs_lock_and_load();
|
||||
wrmsrl(MSR_IA32_PL3_SSP, ssp);
|
||||
fpregs_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int restore_signal_shadow_stack(void)
|
||||
{
|
||||
unsigned long ssp;
|
||||
int err;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
|
||||
!features_enabled(ARCH_SHSTK_SHSTK))
|
||||
return 0;
|
||||
|
||||
ssp = get_user_shstk_addr();
|
||||
if (unlikely(!ssp))
|
||||
return -EINVAL;
|
||||
|
||||
err = shstk_pop_sigframe(&ssp);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
fpregs_lock_and_load();
|
||||
wrmsrl(MSR_IA32_PL3_SSP, ssp);
|
||||
fpregs_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void shstk_free(struct task_struct *tsk)
|
||||
{
|
||||
struct thread_shstk *shstk = &tsk->thread.shstk;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
|
||||
!features_enabled(ARCH_SHSTK_SHSTK))
|
||||
return;
|
||||
|
||||
/*
|
||||
* When fork() with CLONE_VM fails, the child (tsk) already has a
|
||||
* shadow stack allocated, and exit_thread() calls this function to
|
||||
* free it. In this case the parent (current) and the child share
|
||||
* the same mm struct.
|
||||
*/
|
||||
if (!tsk->mm || tsk->mm != current->mm)
|
||||
return;
|
||||
|
||||
unmap_shadow_stack(shstk->base, shstk->size);
|
||||
}
|
||||
|
||||
static int wrss_control(bool enable)
|
||||
{
|
||||
u64 msrval;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/*
|
||||
* Only enable WRSS if shadow stack is enabled. If shadow stack is not
|
||||
* enabled, WRSS will already be disabled, so don't bother clearing it
|
||||
* when disabling.
|
||||
*/
|
||||
if (!features_enabled(ARCH_SHSTK_SHSTK))
|
||||
return -EPERM;
|
||||
|
||||
/* Already enabled/disabled? */
|
||||
if (features_enabled(ARCH_SHSTK_WRSS) == enable)
|
||||
return 0;
|
||||
|
||||
fpregs_lock_and_load();
|
||||
rdmsrl(MSR_IA32_U_CET, msrval);
|
||||
|
||||
if (enable) {
|
||||
features_set(ARCH_SHSTK_WRSS);
|
||||
msrval |= CET_WRSS_EN;
|
||||
} else {
|
||||
features_clr(ARCH_SHSTK_WRSS);
|
||||
if (!(msrval & CET_WRSS_EN))
|
||||
goto unlock;
|
||||
|
||||
msrval &= ~CET_WRSS_EN;
|
||||
}
|
||||
|
||||
wrmsrl(MSR_IA32_U_CET, msrval);
|
||||
|
||||
unlock:
|
||||
fpregs_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int shstk_disable(void)
|
||||
{
|
||||
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* Already disabled? */
|
||||
if (!features_enabled(ARCH_SHSTK_SHSTK))
|
||||
return 0;
|
||||
|
||||
fpregs_lock_and_load();
|
||||
/* Disable WRSS too when disabling shadow stack */
|
||||
wrmsrl(MSR_IA32_U_CET, 0);
|
||||
wrmsrl(MSR_IA32_PL3_SSP, 0);
|
||||
fpregs_unlock();
|
||||
|
||||
shstk_free(current);
|
||||
features_clr(ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
|
||||
{
|
||||
bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
|
||||
unsigned long aligned_size;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (flags & ~SHADOW_STACK_SET_TOKEN)
|
||||
return -EINVAL;
|
||||
|
||||
/* If there isn't space for a token */
|
||||
if (set_tok && size < 8)
|
||||
return -ENOSPC;
|
||||
|
||||
if (addr && addr < SZ_4G)
|
||||
return -ERANGE;
|
||||
|
||||
/*
|
||||
* An overflow would result in attempting to write the restore token
|
||||
* to the wrong location. Not catastrophic, but just return the right
|
||||
* error code and block it.
|
||||
*/
|
||||
aligned_size = PAGE_ALIGN(size);
|
||||
if (aligned_size < size)
|
||||
return -EOVERFLOW;
|
||||
|
||||
return alloc_shstk(addr, aligned_size, size, set_tok);
|
||||
}
|
||||
|
||||
long shstk_prctl(struct task_struct *task, int option, unsigned long arg2)
|
||||
{
|
||||
unsigned long features = arg2;
|
||||
|
||||
if (option == ARCH_SHSTK_STATUS) {
|
||||
return put_user(task->thread.features, (unsigned long __user *)arg2);
|
||||
}
|
||||
|
||||
if (option == ARCH_SHSTK_LOCK) {
|
||||
task->thread.features_locked |= features;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Only allow via ptrace */
|
||||
if (task != current) {
|
||||
if (option == ARCH_SHSTK_UNLOCK && IS_ENABLED(CONFIG_CHECKPOINT_RESTORE)) {
|
||||
task->thread.features_locked &= ~features;
|
||||
return 0;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Do not allow to change locked features */
|
||||
if (features & task->thread.features_locked)
|
||||
return -EPERM;
|
||||
|
||||
/* Only support enabling/disabling one feature at a time. */
|
||||
if (hweight_long(features) > 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (option == ARCH_SHSTK_DISABLE) {
|
||||
if (features & ARCH_SHSTK_WRSS)
|
||||
return wrss_control(false);
|
||||
if (features & ARCH_SHSTK_SHSTK)
|
||||
return shstk_disable();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Handle ARCH_SHSTK_ENABLE */
|
||||
if (features & ARCH_SHSTK_SHSTK)
|
||||
return shstk_setup();
|
||||
if (features & ARCH_SHSTK_WRSS)
|
||||
return wrss_control(true);
|
||||
return -EINVAL;
|
||||
}
|
@ -40,6 +40,7 @@
|
||||
#include <asm/syscall.h>
|
||||
#include <asm/sigframe.h>
|
||||
#include <asm/signal.h>
|
||||
#include <asm/shstk.h>
|
||||
|
||||
static inline int is_ia32_compat_frame(struct ksignal *ksig)
|
||||
{
|
||||
|
@ -402,7 +402,7 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
|
||||
*/
|
||||
static_assert(NSIGILL == 11);
|
||||
static_assert(NSIGFPE == 15);
|
||||
static_assert(NSIGSEGV == 9);
|
||||
static_assert(NSIGSEGV == 10);
|
||||
static_assert(NSIGBUS == 5);
|
||||
static_assert(NSIGTRAP == 6);
|
||||
static_assert(NSIGCHLD == 6);
|
||||
|
@ -175,6 +175,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
|
||||
frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
|
||||
uc_flags = frame_uc_flags(regs);
|
||||
|
||||
if (setup_signal_shadow_stack(ksig))
|
||||
return -EFAULT;
|
||||
|
||||
if (!user_access_begin(frame, sizeof(*frame)))
|
||||
return -EFAULT;
|
||||
|
||||
@ -260,6 +263,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
|
||||
if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
|
||||
goto badframe;
|
||||
|
||||
if (restore_signal_shadow_stack())
|
||||
goto badframe;
|
||||
|
||||
if (restore_altstack(&frame->uc.uc_stack))
|
||||
goto badframe;
|
||||
|
||||
@ -403,7 +409,7 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
|
||||
*/
|
||||
static_assert(NSIGILL == 11);
|
||||
static_assert(NSIGFPE == 15);
|
||||
static_assert(NSIGSEGV == 9);
|
||||
static_assert(NSIGSEGV == 10);
|
||||
static_assert(NSIGBUS == 5);
|
||||
static_assert(NSIGTRAP == 6);
|
||||
static_assert(NSIGCHLD == 6);
|
||||
|
@ -193,7 +193,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
||||
|
||||
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
||||
info.length = len;
|
||||
info.low_limit = PAGE_SIZE;
|
||||
if (!in_32bit_syscall() && (flags & MAP_ABOVE4G))
|
||||
info.low_limit = SZ_4G;
|
||||
else
|
||||
info.low_limit = PAGE_SIZE;
|
||||
|
||||
info.high_limit = get_mmap_base(0);
|
||||
|
||||
/*
|
||||
|
@ -77,18 +77,6 @@
|
||||
|
||||
DECLARE_BITMAP(system_vectors, NR_VECTORS);
|
||||
|
||||
static inline void cond_local_irq_enable(struct pt_regs *regs)
|
||||
{
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static inline void cond_local_irq_disable(struct pt_regs *regs)
|
||||
{
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
__always_inline int is_valid_bugaddr(unsigned long addr)
|
||||
{
|
||||
if (addr < TASK_SIZE_MAX)
|
||||
@ -213,81 +201,6 @@ DEFINE_IDTENTRY(exc_overflow)
|
||||
do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_KERNEL_IBT
|
||||
|
||||
static __ro_after_init bool ibt_fatal = true;
|
||||
|
||||
extern void ibt_selftest_ip(void); /* code label defined in asm below */
|
||||
|
||||
enum cp_error_code {
|
||||
CP_EC = (1 << 15) - 1,
|
||||
|
||||
CP_RET = 1,
|
||||
CP_IRET = 2,
|
||||
CP_ENDBR = 3,
|
||||
CP_RSTRORSSP = 4,
|
||||
CP_SETSSBSY = 5,
|
||||
|
||||
CP_ENCL = 1 << 15,
|
||||
};
|
||||
|
||||
DEFINE_IDTENTRY_ERRORCODE(exc_control_protection)
|
||||
{
|
||||
if (!cpu_feature_enabled(X86_FEATURE_IBT)) {
|
||||
pr_err("Unexpected #CP\n");
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(user_mode(regs) || (error_code & CP_EC) != CP_ENDBR))
|
||||
return;
|
||||
|
||||
if (unlikely(regs->ip == (unsigned long)&ibt_selftest_ip)) {
|
||||
regs->ax = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs));
|
||||
if (!ibt_fatal) {
|
||||
printk(KERN_DEFAULT CUT_HERE);
|
||||
__warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
|
||||
return;
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* Must be noinline to ensure uniqueness of ibt_selftest_ip. */
|
||||
noinline bool ibt_selftest(void)
|
||||
{
|
||||
unsigned long ret;
|
||||
|
||||
asm (" lea ibt_selftest_ip(%%rip), %%rax\n\t"
|
||||
ANNOTATE_RETPOLINE_SAFE
|
||||
" jmp *%%rax\n\t"
|
||||
"ibt_selftest_ip:\n\t"
|
||||
UNWIND_HINT_FUNC
|
||||
ANNOTATE_NOENDBR
|
||||
" nop\n\t"
|
||||
|
||||
: "=a" (ret) : : "memory");
|
||||
|
||||
return !ret;
|
||||
}
|
||||
|
||||
static int __init ibt_setup(char *str)
|
||||
{
|
||||
if (!strcmp(str, "off"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_IBT);
|
||||
|
||||
if (!strcmp(str, "warn"))
|
||||
ibt_fatal = false;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("ibt=", ibt_setup);
|
||||
|
||||
#endif /* CONFIG_X86_KERNEL_IBT */
|
||||
|
||||
#ifdef CONFIG_X86_F00F_BUG
|
||||
void handle_invalid_op(struct pt_regs *regs)
|
||||
#else
|
||||
|
@ -1112,8 +1112,22 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
||||
(error_code & X86_PF_INSTR), foreign))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Shadow stack accesses (PF_SHSTK=1) are only permitted to
|
||||
* shadow stack VMAs. All other accesses result in an error.
|
||||
*/
|
||||
if (error_code & X86_PF_SHSTK) {
|
||||
if (unlikely(!(vma->vm_flags & VM_SHADOW_STACK)))
|
||||
return 1;
|
||||
if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (error_code & X86_PF_WRITE) {
|
||||
/* write, present and write, not present: */
|
||||
if (unlikely(vma->vm_flags & VM_SHADOW_STACK))
|
||||
return 1;
|
||||
if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
||||
return 1;
|
||||
return 0;
|
||||
@ -1305,6 +1319,14 @@ void do_user_addr_fault(struct pt_regs *regs,
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
|
||||
/*
|
||||
* Read-only permissions can not be expressed in shadow stack PTEs.
|
||||
* Treat all shadow stack accesses as WRITE faults. This ensures
|
||||
* that the MM will prepare everything (e.g., break COW) such that
|
||||
* maybe_mkwrite() can create a proper shadow stack PTE.
|
||||
*/
|
||||
if (error_code & X86_PF_SHSTK)
|
||||
flags |= FAULT_FLAG_WRITE;
|
||||
if (error_code & X86_PF_WRITE)
|
||||
flags |= FAULT_FLAG_WRITE;
|
||||
if (error_code & X86_PF_INSTR)
|
||||
|
@ -2074,12 +2074,12 @@ int set_memory_nx(unsigned long addr, int numpages)
|
||||
|
||||
int set_memory_ro(unsigned long addr, int numpages)
|
||||
{
|
||||
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
|
||||
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW | _PAGE_DIRTY), 0);
|
||||
}
|
||||
|
||||
int set_memory_rox(unsigned long addr, int numpages)
|
||||
{
|
||||
pgprot_t clr = __pgprot(_PAGE_RW);
|
||||
pgprot_t clr = __pgprot(_PAGE_RW | _PAGE_DIRTY);
|
||||
|
||||
if (__supported_pte_mask & _PAGE_NX)
|
||||
clr.pgprot |= _PAGE_NX;
|
||||
|
@ -881,3 +881,43 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
|
||||
|
||||
pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
|
||||
{
|
||||
if (vma->vm_flags & VM_SHADOW_STACK)
|
||||
return pte_mkwrite_shstk(pte);
|
||||
|
||||
pte = pte_mkwrite_novma(pte);
|
||||
|
||||
return pte_clear_saveddirty(pte);
|
||||
}
|
||||
|
||||
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
|
||||
{
|
||||
if (vma->vm_flags & VM_SHADOW_STACK)
|
||||
return pmd_mkwrite_shstk(pmd);
|
||||
|
||||
pmd = pmd_mkwrite_novma(pmd);
|
||||
|
||||
return pmd_clear_saveddirty(pmd);
|
||||
}
|
||||
|
||||
void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte)
|
||||
{
|
||||
/*
|
||||
* Hardware before shadow stack can (rarely) set Dirty=1
|
||||
* on a Write=0 PTE. So the below condition
|
||||
* only indicates a software bug when shadow stack is
|
||||
* supported by the HW. This checking is covered in
|
||||
* pte_shstk().
|
||||
*/
|
||||
VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
|
||||
pte_shstk(pte));
|
||||
}
|
||||
|
||||
void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd)
|
||||
{
|
||||
/* See note in arch_check_zapped_pte() */
|
||||
VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
|
||||
pmd_shstk(pmd));
|
||||
}
|
||||
|
@ -694,7 +694,7 @@ static struct trap_array_entry trap_array[] = {
|
||||
TRAP_ENTRY(exc_coprocessor_error, false ),
|
||||
TRAP_ENTRY(exc_alignment_check, false ),
|
||||
TRAP_ENTRY(exc_simd_coprocessor_error, false ),
|
||||
#ifdef CONFIG_X86_KERNEL_IBT
|
||||
#ifdef CONFIG_X86_CET
|
||||
TRAP_ENTRY(exc_control_protection, false ),
|
||||
#endif
|
||||
};
|
||||
|
@ -166,7 +166,7 @@ void make_lowmem_page_readwrite(void *vaddr)
|
||||
if (pte == NULL)
|
||||
return; /* vaddr missing */
|
||||
|
||||
ptev = pte_mkwrite(*pte);
|
||||
ptev = pte_mkwrite_novma(*pte);
|
||||
|
||||
if (HYPERVISOR_update_va_mapping(address, ptev, 0))
|
||||
BUG();
|
||||
|
@ -148,7 +148,7 @@ xen_pv_trap asm_exc_page_fault
|
||||
xen_pv_trap asm_exc_spurious_interrupt_bug
|
||||
xen_pv_trap asm_exc_coprocessor_error
|
||||
xen_pv_trap asm_exc_alignment_check
|
||||
#ifdef CONFIG_X86_KERNEL_IBT
|
||||
#ifdef CONFIG_X86_CET
|
||||
xen_pv_trap asm_exc_control_protection
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
|
@ -262,7 +262,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
|
||||
{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
|
||||
static inline pte_t pte_mkyoung(pte_t pte)
|
||||
{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
static inline pte_t pte_mkwrite_novma(pte_t pte)
|
||||
{ pte_val(pte) |= _PAGE_WRITABLE; return pte; }
|
||||
|
||||
#define pgprot_noncached(prot) \
|
||||
|
2
fs/aio.c
2
fs/aio.c
@ -558,7 +558,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
|
||||
|
||||
ctx->mmap_base = do_mmap(ctx->aio_ring_file, 0, ctx->mmap_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, 0, &unused, NULL);
|
||||
MAP_SHARED, 0, 0, &unused, NULL);
|
||||
mmap_write_unlock(mm);
|
||||
if (IS_ERR((void *)ctx->mmap_base)) {
|
||||
ctx->mmap_size = 0;
|
||||
|
@ -431,6 +431,11 @@ static inline void task_untag_mask(struct seq_file *m, struct mm_struct *mm)
|
||||
seq_printf(m, "untag_mask:\t%#lx\n", mm_untag_mask(mm));
|
||||
}
|
||||
|
||||
__weak void arch_proc_pid_thread_features(struct seq_file *m,
|
||||
struct task_struct *task)
|
||||
{
|
||||
}
|
||||
|
||||
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
|
||||
struct pid *pid, struct task_struct *task)
|
||||
{
|
||||
@ -455,6 +460,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
|
||||
task_cpus_allowed(m, task);
|
||||
cpuset_task_status_allowed(m, task);
|
||||
task_context_switch_counts(m, task);
|
||||
arch_proc_pid_thread_features(m, task);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -692,6 +692,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
|
||||
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
|
||||
[ilog2(VM_UFFD_MINOR)] = "ui",
|
||||
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
|
||||
#ifdef CONFIG_X86_USER_SHADOW_STACK
|
||||
[ilog2(VM_SHADOW_STACK)] = "ss",
|
||||
#endif
|
||||
};
|
||||
size_t i;
|
||||
|
||||
|
@ -22,7 +22,7 @@ static inline unsigned long huge_pte_dirty(pte_t pte)
|
||||
|
||||
static inline pte_t huge_pte_mkwrite(pte_t pte)
|
||||
{
|
||||
return pte_mkwrite(pte);
|
||||
return pte_mkwrite_novma(pte);
|
||||
}
|
||||
|
||||
#ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT
|
||||
|
@ -319,11 +319,13 @@ extern unsigned int kobjsize(const void *objp);
|
||||
#define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */
|
||||
#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */
|
||||
#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */
|
||||
#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */
|
||||
#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0)
|
||||
#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1)
|
||||
#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2)
|
||||
#define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3)
|
||||
#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4)
|
||||
#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5)
|
||||
#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_PKEYS
|
||||
@ -339,6 +341,21 @@ extern unsigned int kobjsize(const void *objp);
|
||||
#endif
|
||||
#endif /* CONFIG_ARCH_HAS_PKEYS */
|
||||
|
||||
#ifdef CONFIG_X86_USER_SHADOW_STACK
|
||||
/*
|
||||
* VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
|
||||
* support core mm.
|
||||
*
|
||||
* These VMAs will get a single end guard page. This helps userspace protect
|
||||
* itself from attacks. A single page is enough for current shadow stack archs
|
||||
* (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c
|
||||
* for more details on the guard size.
|
||||
*/
|
||||
# define VM_SHADOW_STACK VM_HIGH_ARCH_5
|
||||
#else
|
||||
# define VM_SHADOW_STACK VM_NONE
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86)
|
||||
# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
|
||||
#elif defined(CONFIG_PPC)
|
||||
@ -370,7 +387,7 @@ extern unsigned int kobjsize(const void *objp);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
|
||||
# define VM_UFFD_MINOR_BIT 37
|
||||
# define VM_UFFD_MINOR_BIT 38
|
||||
# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */
|
||||
#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
|
||||
# define VM_UFFD_MINOR VM_NONE
|
||||
@ -397,6 +414,8 @@ extern unsigned int kobjsize(const void *objp);
|
||||
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
|
||||
#endif
|
||||
|
||||
#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
|
||||
|
||||
#ifdef CONFIG_STACK_GROWSUP
|
||||
#define VM_STACK VM_GROWSUP
|
||||
#define VM_STACK_EARLY VM_GROWSDOWN
|
||||
@ -1309,7 +1328,7 @@ static inline unsigned long thp_size(struct page *page)
|
||||
static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
|
||||
{
|
||||
if (likely(vma->vm_flags & VM_WRITE))
|
||||
pte = pte_mkwrite(pte);
|
||||
pte = pte_mkwrite(pte, vma);
|
||||
return pte;
|
||||
}
|
||||
|
||||
@ -3265,7 +3284,8 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
struct list_head *uf);
|
||||
extern unsigned long do_mmap(struct file *file, unsigned long addr,
|
||||
unsigned long len, unsigned long prot, unsigned long flags,
|
||||
unsigned long pgoff, unsigned long *populate, struct list_head *uf);
|
||||
vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
|
||||
struct list_head *uf);
|
||||
extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
|
||||
unsigned long start, size_t len, struct list_head *uf,
|
||||
bool unlock);
|
||||
@ -3353,15 +3373,26 @@ struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
|
||||
return mtree_load(&mm->mm_mt, addr);
|
||||
}
|
||||
|
||||
static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
|
||||
{
|
||||
if (vma->vm_flags & VM_GROWSDOWN)
|
||||
return stack_guard_gap;
|
||||
|
||||
/* See reasoning around the VM_SHADOW_STACK definition */
|
||||
if (vma->vm_flags & VM_SHADOW_STACK)
|
||||
return PAGE_SIZE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
|
||||
{
|
||||
unsigned long gap = stack_guard_start_gap(vma);
|
||||
unsigned long vm_start = vma->vm_start;
|
||||
|
||||
if (vma->vm_flags & VM_GROWSDOWN) {
|
||||
vm_start -= stack_guard_gap;
|
||||
if (vm_start > vma->vm_start)
|
||||
vm_start = 0;
|
||||
}
|
||||
vm_start -= gap;
|
||||
if (vm_start > vma->vm_start)
|
||||
vm_start = 0;
|
||||
return vm_start;
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,9 @@
|
||||
#ifndef MAP_32BIT
|
||||
#define MAP_32BIT 0
|
||||
#endif
|
||||
#ifndef MAP_ABOVE4G
|
||||
#define MAP_ABOVE4G 0
|
||||
#endif
|
||||
#ifndef MAP_HUGE_2MB
|
||||
#define MAP_HUGE_2MB 0
|
||||
#endif
|
||||
@ -50,6 +53,7 @@
|
||||
| MAP_STACK \
|
||||
| MAP_HUGETLB \
|
||||
| MAP_32BIT \
|
||||
| MAP_ABOVE4G \
|
||||
| MAP_HUGE_2MB \
|
||||
| MAP_HUGE_1GB)
|
||||
|
||||
|
@ -371,6 +371,20 @@ static inline bool arch_has_hw_pte_young(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef arch_check_zapped_pte
|
||||
static inline void arch_check_zapped_pte(struct vm_area_struct *vma,
|
||||
pte_t pte)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef arch_check_zapped_pmd
|
||||
static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
|
||||
pmd_t pmd)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
||||
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
||||
unsigned long address,
|
||||
@ -577,6 +591,20 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
|
||||
pud_t *pudp);
|
||||
#endif
|
||||
|
||||
#ifndef pte_mkwrite
|
||||
static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
|
||||
{
|
||||
return pte_mkwrite_novma(pte);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite)
|
||||
static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
|
||||
{
|
||||
return pmd_mkwrite_novma(pmd);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
|
||||
struct mm_struct;
|
||||
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
|
||||
|
@ -159,6 +159,7 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
|
||||
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
|
||||
|
||||
void arch_report_meminfo(struct seq_file *m);
|
||||
void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task);
|
||||
|
||||
#else /* CONFIG_PROC_FS */
|
||||
|
||||
|
@ -939,6 +939,7 @@ asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long l
|
||||
asmlinkage long sys_cachestat(unsigned int fd,
|
||||
struct cachestat_range __user *cstat_range,
|
||||
struct cachestat __user *cstat, unsigned int flags);
|
||||
asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags);
|
||||
|
||||
/*
|
||||
* Architecture-specific system calls
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user