mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 15:29:16 +00:00
1930a6e739
This set of changes removes tracehook.h, moves modification of all of the ptrace fields inside of siglock to remove races, adds a missing permission check to ptrace.c The removal of tracehook.h is quite significant as it has been a major source of confusion in recent years. Much of that confusion was around task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled making the semantics clearer). For people who don't know tracehook.h is a vestiage of an attempt to implement uprobes like functionality that was never fully merged, and was later superseeded by uprobes when uprobes was merged. For many years now we have been removing what tracehook functionaly a little bit at a time. To the point where now anything left in tracehook.h is some weird strange thing that is difficult to understand. Eric W. Biederman (15): ptrace: Move ptrace_report_syscall into ptrace.h ptrace/arm: Rename tracehook_report_syscall report_syscall ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h ptrace: Remove arch_syscall_{enter,exit}_tracehook ptrace: Remove tracehook_signal_handler task_work: Remove unnecessary include from posix_timers.h task_work: Introduce task_work_pending task_work: Call tracehook_notify_signal from get_signal on all architectures task_work: Decouple TIF_NOTIFY_SIGNAL and task_work signal: Move set_notify_signal and clear_notify_signal into sched/signal.h resume_user_mode: Remove #ifdef TIF_NOTIFY_RESUME in set_notify_resume resume_user_mode: Move to resume_user_mode.h tracehook: Remove tracehook.h ptrace: Move setting/clearing ptrace_message into ptrace_stop ptrace: Return the signal to continue with from ptrace_stop Jann Horn (1): ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE Yang Li (1): ptrace: Remove duplicated include in ptrace.c MAINTAINERS | 1 - arch/Kconfig | 5 +- arch/alpha/kernel/ptrace.c | 5 +- arch/alpha/kernel/signal.c | 4 +- arch/arc/kernel/ptrace.c | 5 +- arch/arc/kernel/signal.c | 4 +- arch/arm/kernel/ptrace.c | 12 +- arch/arm/kernel/signal.c | 4 +- arch/arm64/kernel/ptrace.c | 14 +-- arch/arm64/kernel/signal.c | 4 +- arch/csky/kernel/ptrace.c | 5 +- arch/csky/kernel/signal.c | 4 +- arch/h8300/kernel/ptrace.c | 5 +- arch/h8300/kernel/signal.c | 4 +- arch/hexagon/kernel/process.c | 4 +- arch/hexagon/kernel/signal.c | 1 - arch/hexagon/kernel/traps.c | 6 +- arch/ia64/kernel/process.c | 4 +- arch/ia64/kernel/ptrace.c | 6 +- arch/ia64/kernel/signal.c | 1 - arch/m68k/kernel/ptrace.c | 5 +- arch/m68k/kernel/signal.c | 4 +- arch/microblaze/kernel/ptrace.c | 5 +- arch/microblaze/kernel/signal.c | 4 +- arch/mips/kernel/ptrace.c | 5 +- arch/mips/kernel/signal.c | 4 +- arch/nds32/include/asm/syscall.h | 2 +- arch/nds32/kernel/ptrace.c | 5 +- arch/nds32/kernel/signal.c | 4 +- arch/nios2/kernel/ptrace.c | 5 +- arch/nios2/kernel/signal.c | 4 +- arch/openrisc/kernel/ptrace.c | 5 +- arch/openrisc/kernel/signal.c | 4 +- arch/parisc/kernel/ptrace.c | 7 +- arch/parisc/kernel/signal.c | 4 +- arch/powerpc/kernel/ptrace/ptrace.c | 8 +- arch/powerpc/kernel/signal.c | 4 +- arch/riscv/kernel/ptrace.c | 5 +- arch/riscv/kernel/signal.c | 4 +- arch/s390/include/asm/entry-common.h | 1 - arch/s390/kernel/ptrace.c | 1 - arch/s390/kernel/signal.c | 5 +- arch/sh/kernel/ptrace_32.c | 5 +- arch/sh/kernel/signal_32.c | 4 +- arch/sparc/kernel/ptrace_32.c | 5 +- arch/sparc/kernel/ptrace_64.c | 5 +- arch/sparc/kernel/signal32.c | 1 - arch/sparc/kernel/signal_32.c | 4 +- arch/sparc/kernel/signal_64.c | 4 +- arch/um/kernel/process.c | 4 +- arch/um/kernel/ptrace.c | 5 +- arch/x86/kernel/ptrace.c | 1 - arch/x86/kernel/signal.c | 5 +- arch/x86/mm/tlb.c | 1 + arch/xtensa/kernel/ptrace.c | 5 +- arch/xtensa/kernel/signal.c | 4 +- block/blk-cgroup.c | 2 +- fs/coredump.c | 1 - fs/exec.c | 1 - fs/io-wq.c | 6 +- fs/io_uring.c | 11 +- fs/proc/array.c | 1 - fs/proc/base.c | 1 - include/asm-generic/syscall.h | 2 +- include/linux/entry-common.h | 47 +------- include/linux/entry-kvm.h | 2 +- include/linux/posix-timers.h | 1 - include/linux/ptrace.h | 81 ++++++++++++- include/linux/resume_user_mode.h | 64 ++++++++++ include/linux/sched/signal.h | 17 +++ include/linux/task_work.h | 5 + include/linux/tracehook.h | 226 ----------------------------------- include/uapi/linux/ptrace.h | 2 +- kernel/entry/common.c | 19 +-- kernel/entry/kvm.c | 9 +- kernel/exit.c | 3 +- kernel/livepatch/transition.c | 1 - kernel/ptrace.c | 47 +++++--- kernel/seccomp.c | 1 - kernel/signal.c | 62 +++++----- kernel/task_work.c | 4 +- kernel/time/posix-cpu-timers.c | 1 + mm/memcontrol.c | 2 +- security/apparmor/domain.c | 1 - security/selinux/hooks.c | 1 - 85 files changed, 372 insertions(+), 495 deletions(-) Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEgjlraLDcwBA2B+6cC/v6Eiajj0AFAmJCQkoACgkQC/v6Eiaj j0DCWQ/5AZVFU+hX32obUNCLackHTwgcCtSOs3JNBmNA/zL/htPiYYG0ghkvtlDR Dw5J5DnxC6P7PVAdAqrpvx2uX2FebHYU0bRlyLx8LYUEP5dhyNicxX9jA882Z+vw Ud0Ue9EojwGWS76dC9YoKUj3slThMATbhA2r4GVEoof8fSNJaBxQIqath44t0FwU DinWa+tIOvZANGBZr6CUUINNIgqBIZCH/R4h6ArBhMlJpuQ5Ufk2kAaiWFwZCkX4 0LuuAwbKsCKkF8eap5I2KrIg/7zZVgxAg9O3cHOzzm8OPbKzRnNnQClcDe8perqp S6e/f3MgpE+eavd1EiLxevZ660cJChnmikXVVh8ZYYoefaMKGqBaBSsB38bNcLjY 3+f2dB+TNBFRnZs1aCujK3tWBT9QyjZDKtCBfzxDNWBpXGLhHH6j6lA5Lj+Cef5K /HNHFb+FuqedlFZh5m1Y+piFQ70hTgCa2u8b+FSOubI2hW9Zd+WzINV0ANaZ2LvZ 4YGtcyDNk1q1+c87lxP9xMRl/xi6rNg+B9T2MCo4IUnHgpSVP6VEB3osgUmrrrN0 eQlUI154G/AaDlqXLgmn1xhRmlPGfmenkxpok1AuzxvNJsfLKnpEwQSc13g3oiZr disZQxNY0kBO2Nv3G323Z6PLinhbiIIFez6cJzK5v0YJ2WtO3pY= =uEro -----END PGP SIGNATURE----- Merge tag 'ptrace-cleanups-for-v5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace Pull ptrace cleanups from Eric Biederman: "This set of changes removes tracehook.h, moves modification of all of the ptrace fields inside of siglock to remove races, adds a missing permission check to ptrace.c The removal of tracehook.h is quite significant as it has been a major source of confusion in recent years. Much of that confusion was around task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled making the semantics clearer). For people who don't know tracehook.h is a vestiage of an attempt to implement uprobes like functionality that was never fully merged, and was later superseeded by uprobes when uprobes was merged. For many years now we have been removing what tracehook functionaly a little bit at a time. To the point where anything left in tracehook.h was some weird strange thing that was difficult to understand" * tag 'ptrace-cleanups-for-v5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: ptrace: Remove duplicated include in ptrace.c ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE ptrace: Return the signal to continue with from ptrace_stop ptrace: Move setting/clearing ptrace_message into ptrace_stop tracehook: Remove tracehook.h resume_user_mode: Move to resume_user_mode.h resume_user_mode: Remove #ifdef TIF_NOTIFY_RESUME in set_notify_resume signal: Move set_notify_signal and clear_notify_signal into sched/signal.h task_work: Decouple TIF_NOTIFY_SIGNAL and task_work task_work: Call tracehook_notify_signal from get_signal on all architectures task_work: Introduce task_work_pending task_work: Remove unnecessary include from posix_timers.h ptrace: Remove tracehook_signal_handler ptrace: Remove arch_syscall_{enter,exit}_tracehook ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h ptrace/arm: Rename tracehook_report_syscall report_syscall ptrace: Move ptrace_report_syscall into ptrace.h
470 lines
16 KiB
C
470 lines
16 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __LINUX_ENTRYCOMMON_H
|
|
#define __LINUX_ENTRYCOMMON_H
|
|
|
|
#include <linux/static_call_types.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/seccomp.h>
|
|
#include <linux/sched.h>
|
|
|
|
#include <asm/entry-common.h>
|
|
|
|
/*
|
|
* Define dummy _TIF work flags if not defined by the architecture or for
|
|
* disabled functionality.
|
|
*/
|
|
#ifndef _TIF_PATCH_PENDING
|
|
# define _TIF_PATCH_PENDING (0)
|
|
#endif
|
|
|
|
#ifndef _TIF_UPROBE
|
|
# define _TIF_UPROBE (0)
|
|
#endif
|
|
|
|
/*
|
|
* SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
|
|
*/
|
|
#ifndef ARCH_SYSCALL_WORK_ENTER
|
|
# define ARCH_SYSCALL_WORK_ENTER (0)
|
|
#endif
|
|
|
|
/*
|
|
* SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
|
|
*/
|
|
#ifndef ARCH_SYSCALL_WORK_EXIT
|
|
# define ARCH_SYSCALL_WORK_EXIT (0)
|
|
#endif
|
|
|
|
#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \
|
|
SYSCALL_WORK_SYSCALL_TRACEPOINT | \
|
|
SYSCALL_WORK_SYSCALL_TRACE | \
|
|
SYSCALL_WORK_SYSCALL_EMU | \
|
|
SYSCALL_WORK_SYSCALL_AUDIT | \
|
|
SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
|
|
ARCH_SYSCALL_WORK_ENTER)
|
|
#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \
|
|
SYSCALL_WORK_SYSCALL_TRACE | \
|
|
SYSCALL_WORK_SYSCALL_AUDIT | \
|
|
SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
|
|
SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
|
|
ARCH_SYSCALL_WORK_EXIT)
|
|
|
|
/*
|
|
* TIF flags handled in exit_to_user_mode_loop()
|
|
*/
|
|
#ifndef ARCH_EXIT_TO_USER_MODE_WORK
|
|
# define ARCH_EXIT_TO_USER_MODE_WORK (0)
|
|
#endif
|
|
|
|
#define EXIT_TO_USER_MODE_WORK \
|
|
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
|
|
_TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
|
|
ARCH_EXIT_TO_USER_MODE_WORK)
|
|
|
|
/**
|
|
* arch_check_user_regs - Architecture specific sanity check for user mode regs
|
|
* @regs: Pointer to currents pt_regs
|
|
*
|
|
* Defaults to an empty implementation. Can be replaced by architecture
|
|
* specific code.
|
|
*
|
|
* Invoked from syscall_enter_from_user_mode() in the non-instrumentable
|
|
* section. Use __always_inline so the compiler cannot push it out of line
|
|
* and make it instrumentable.
|
|
*/
|
|
static __always_inline void arch_check_user_regs(struct pt_regs *regs);
|
|
|
|
#ifndef arch_check_user_regs
|
|
static __always_inline void arch_check_user_regs(struct pt_regs *regs) {}
|
|
#endif
|
|
|
|
/**
|
|
* enter_from_user_mode - Establish state when coming from user mode
|
|
*
|
|
* Syscall/interrupt entry disables interrupts, but user mode is traced as
|
|
* interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
|
|
*
|
|
* 1) Tell lockdep that interrupts are disabled
|
|
* 2) Invoke context tracking if enabled to reactivate RCU
|
|
* 3) Trace interrupts off state
|
|
*
|
|
* Invoked from architecture specific syscall entry code with interrupts
|
|
* disabled. The calling code has to be non-instrumentable. When the
|
|
* function returns all state is correct and interrupts are still
|
|
* disabled. The subsequent functions can be instrumented.
|
|
*
|
|
* This is invoked when there is architecture specific functionality to be
|
|
* done between establishing state and enabling interrupts. The caller must
|
|
* enable interrupts before invoking syscall_enter_from_user_mode_work().
|
|
*/
|
|
void enter_from_user_mode(struct pt_regs *regs);
|
|
|
|
/**
|
|
* syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
|
|
* @regs: Pointer to currents pt_regs
|
|
*
|
|
* Invoked from architecture specific syscall entry code with interrupts
|
|
* disabled. The calling code has to be non-instrumentable. When the
|
|
* function returns all state is correct, interrupts are enabled and the
|
|
* subsequent functions can be instrumented.
|
|
*
|
|
* This handles lockdep, RCU (context tracking) and tracing state, i.e.
|
|
* the functionality provided by enter_from_user_mode().
|
|
*
|
|
* This is invoked when there is extra architecture specific functionality
|
|
* to be done between establishing state and handling user mode entry work.
|
|
*/
|
|
void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
|
|
|
|
/**
|
|
* syscall_enter_from_user_mode_work - Check and handle work before invoking
|
|
* a syscall
|
|
* @regs: Pointer to currents pt_regs
|
|
* @syscall: The syscall number
|
|
*
|
|
* Invoked from architecture specific syscall entry code with interrupts
|
|
* enabled after invoking syscall_enter_from_user_mode_prepare() and extra
|
|
* architecture specific work.
|
|
*
|
|
* Returns: The original or a modified syscall number
|
|
*
|
|
* If the returned syscall number is -1 then the syscall should be
|
|
* skipped. In this case the caller may invoke syscall_set_error() or
|
|
* syscall_set_return_value() first. If neither of those are called and -1
|
|
* is returned, then the syscall will fail with ENOSYS.
|
|
*
|
|
* It handles the following work items:
|
|
*
|
|
* 1) syscall_work flag dependent invocations of
|
|
* ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter()
|
|
* 2) Invocation of audit_syscall_entry()
|
|
*/
|
|
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
|
|
|
|
/**
|
|
* syscall_enter_from_user_mode - Establish state and check and handle work
|
|
* before invoking a syscall
|
|
* @regs: Pointer to currents pt_regs
|
|
* @syscall: The syscall number
|
|
*
|
|
* Invoked from architecture specific syscall entry code with interrupts
|
|
* disabled. The calling code has to be non-instrumentable. When the
|
|
* function returns all state is correct, interrupts are enabled and the
|
|
* subsequent functions can be instrumented.
|
|
*
|
|
* This is combination of syscall_enter_from_user_mode_prepare() and
|
|
* syscall_enter_from_user_mode_work().
|
|
*
|
|
* Returns: The original or a modified syscall number. See
|
|
* syscall_enter_from_user_mode_work() for further explanation.
|
|
*/
|
|
long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
|
|
|
|
/**
|
|
* local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable()
|
|
* @ti_work: Cached TIF flags gathered with interrupts disabled
|
|
*
|
|
* Defaults to local_irq_enable(). Can be supplied by architecture specific
|
|
* code.
|
|
*/
|
|
static inline void local_irq_enable_exit_to_user(unsigned long ti_work);
|
|
|
|
#ifndef local_irq_enable_exit_to_user
|
|
static inline void local_irq_enable_exit_to_user(unsigned long ti_work)
|
|
{
|
|
local_irq_enable();
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable()
|
|
*
|
|
* Defaults to local_irq_disable(). Can be supplied by architecture specific
|
|
* code.
|
|
*/
|
|
static inline void local_irq_disable_exit_to_user(void);
|
|
|
|
#ifndef local_irq_disable_exit_to_user
|
|
static inline void local_irq_disable_exit_to_user(void)
|
|
{
|
|
local_irq_disable();
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* arch_exit_to_user_mode_work - Architecture specific TIF work for exit
|
|
* to user mode.
|
|
* @regs: Pointer to currents pt_regs
|
|
* @ti_work: Cached TIF flags gathered with interrupts disabled
|
|
*
|
|
* Invoked from exit_to_user_mode_loop() with interrupt enabled
|
|
*
|
|
* Defaults to NOOP. Can be supplied by architecture specific code.
|
|
*/
|
|
static inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
|
|
unsigned long ti_work);
|
|
|
|
#ifndef arch_exit_to_user_mode_work
|
|
static inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
|
|
unsigned long ti_work)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* arch_exit_to_user_mode_prepare - Architecture specific preparation for
|
|
* exit to user mode.
|
|
* @regs: Pointer to currents pt_regs
|
|
* @ti_work: Cached TIF flags gathered with interrupts disabled
|
|
*
|
|
* Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last
|
|
* function before return. Defaults to NOOP.
|
|
*/
|
|
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
|
unsigned long ti_work);
|
|
|
|
#ifndef arch_exit_to_user_mode_prepare
|
|
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
|
unsigned long ti_work)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* arch_exit_to_user_mode - Architecture specific final work before
|
|
* exit to user mode.
|
|
*
|
|
* Invoked from exit_to_user_mode() with interrupt disabled as the last
|
|
* function before return. Defaults to NOOP.
|
|
*
|
|
* This needs to be __always_inline because it is non-instrumentable code
|
|
* invoked after context tracking switched to user mode.
|
|
*
|
|
* An architecture implementation must not do anything complex, no locking
|
|
* etc. The main purpose is for speculation mitigations.
|
|
*/
|
|
static __always_inline void arch_exit_to_user_mode(void);
|
|
|
|
#ifndef arch_exit_to_user_mode
|
|
static __always_inline void arch_exit_to_user_mode(void) { }
|
|
#endif
|
|
|
|
/**
|
|
* arch_do_signal_or_restart - Architecture specific signal delivery function
|
|
* @regs: Pointer to currents pt_regs
|
|
* @has_signal: actual signal to handle
|
|
*
|
|
* Invoked from exit_to_user_mode_loop().
|
|
*/
|
|
void arch_do_signal_or_restart(struct pt_regs *regs);
|
|
|
|
/**
|
|
* exit_to_user_mode - Fixup state when exiting to user mode
|
|
*
|
|
* Syscall/interrupt exit enables interrupts, but the kernel state is
|
|
* interrupts disabled when this is invoked. Also tell RCU about it.
|
|
*
|
|
* 1) Trace interrupts on state
|
|
* 2) Invoke context tracking if enabled to adjust RCU state
|
|
* 3) Invoke architecture specific last minute exit code, e.g. speculation
|
|
* mitigations, etc.: arch_exit_to_user_mode()
|
|
* 4) Tell lockdep that interrupts are enabled
|
|
*
|
|
* Invoked from architecture specific code when syscall_exit_to_user_mode()
|
|
* is not suitable as the last step before returning to userspace. Must be
|
|
* invoked with interrupts disabled and the caller must be
|
|
* non-instrumentable.
|
|
* The caller has to invoke syscall_exit_to_user_mode_work() before this.
|
|
*/
|
|
void exit_to_user_mode(void);
|
|
|
|
/**
|
|
* syscall_exit_to_user_mode_work - Handle work before returning to user mode
|
|
* @regs: Pointer to currents pt_regs
|
|
*
|
|
* Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
|
|
* exit_to_user_mode() to perform the final transition to user mode.
|
|
*
|
|
* Calling convention is the same as for syscall_exit_to_user_mode() and it
|
|
* returns with all work handled and interrupts disabled. The caller must
|
|
* invoke exit_to_user_mode() before actually switching to user mode to
|
|
* make the final state transitions. Interrupts must stay disabled between
|
|
* return from this function and the invocation of exit_to_user_mode().
|
|
*/
|
|
void syscall_exit_to_user_mode_work(struct pt_regs *regs);
|
|
|
|
/**
|
|
* syscall_exit_to_user_mode - Handle work before returning to user mode
|
|
* @regs: Pointer to currents pt_regs
|
|
*
|
|
* Invoked with interrupts enabled and fully valid regs. Returns with all
|
|
* work handled, interrupts disabled such that the caller can immediately
|
|
* switch to user mode. Called from architecture specific syscall and ret
|
|
* from fork code.
|
|
*
|
|
* The call order is:
|
|
* 1) One-time syscall exit work:
|
|
* - rseq syscall exit
|
|
* - audit
|
|
* - syscall tracing
|
|
* - ptrace (single stepping)
|
|
*
|
|
* 2) Preparatory work
|
|
* - Exit to user mode loop (common TIF handling). Invokes
|
|
* arch_exit_to_user_mode_work() for architecture specific TIF work
|
|
* - Architecture specific one time work arch_exit_to_user_mode_prepare()
|
|
* - Address limit and lockdep checks
|
|
*
|
|
* 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
|
|
* functionality in exit_to_user_mode().
|
|
*
|
|
* This is a combination of syscall_exit_to_user_mode_work() (1,2) and
|
|
* exit_to_user_mode(). This function is preferred unless there is a
|
|
* compelling architectural reason to use the separate functions.
|
|
*/
|
|
void syscall_exit_to_user_mode(struct pt_regs *regs);
|
|
|
|
/**
|
|
* irqentry_enter_from_user_mode - Establish state before invoking the irq handler
|
|
* @regs: Pointer to currents pt_regs
|
|
*
|
|
* Invoked from architecture specific entry code with interrupts disabled.
|
|
* Can only be called when the interrupt entry came from user mode. The
|
|
* calling code must be non-instrumentable. When the function returns all
|
|
* state is correct and the subsequent functions can be instrumented.
|
|
*
|
|
* The function establishes state (lockdep, RCU (context tracking), tracing)
|
|
*/
|
|
void irqentry_enter_from_user_mode(struct pt_regs *regs);
|
|
|
|
/**
|
|
* irqentry_exit_to_user_mode - Interrupt exit work
|
|
* @regs: Pointer to current's pt_regs
|
|
*
|
|
* Invoked with interrupts disabled and fully valid regs. Returns with all
|
|
* work handled, interrupts disabled such that the caller can immediately
|
|
* switch to user mode. Called from architecture specific interrupt
|
|
* handling code.
|
|
*
|
|
* The call order is #2 and #3 as described in syscall_exit_to_user_mode().
|
|
* Interrupt exit is not invoking #1 which is the syscall specific one time
|
|
* work.
|
|
*/
|
|
void irqentry_exit_to_user_mode(struct pt_regs *regs);
|
|
|
|
#ifndef irqentry_state
|
|
/**
|
|
* struct irqentry_state - Opaque object for exception state storage
|
|
* @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
|
|
* exit path has to invoke rcu_irq_exit().
|
|
* @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
|
|
* lockdep state is restored correctly on exit from nmi.
|
|
*
|
|
* This opaque object is filled in by the irqentry_*_enter() functions and
|
|
* must be passed back into the corresponding irqentry_*_exit() functions
|
|
* when the exception is complete.
|
|
*
|
|
* Callers of irqentry_*_[enter|exit]() must consider this structure opaque
|
|
* and all members private. Descriptions of the members are provided to aid in
|
|
* the maintenance of the irqentry_*() functions.
|
|
*/
|
|
typedef struct irqentry_state {
|
|
union {
|
|
bool exit_rcu;
|
|
bool lockdep;
|
|
};
|
|
} irqentry_state_t;
|
|
#endif
|
|
|
|
/**
|
|
* irqentry_enter - Handle state tracking on ordinary interrupt entries
|
|
* @regs: Pointer to pt_regs of interrupted context
|
|
*
|
|
* Invokes:
|
|
* - lockdep irqflag state tracking as low level ASM entry disabled
|
|
* interrupts.
|
|
*
|
|
* - Context tracking if the exception hit user mode.
|
|
*
|
|
* - The hardirq tracer to keep the state consistent as low level ASM
|
|
* entry disabled interrupts.
|
|
*
|
|
* As a precondition, this requires that the entry came from user mode,
|
|
* idle, or a kernel context in which RCU is watching.
|
|
*
|
|
* For kernel mode entries RCU handling is done conditional. If RCU is
|
|
* watching then the only RCU requirement is to check whether the tick has
|
|
* to be restarted. If RCU is not watching then rcu_irq_enter() has to be
|
|
* invoked on entry and rcu_irq_exit() on exit.
|
|
*
|
|
* Avoiding the rcu_irq_enter/exit() calls is an optimization but also
|
|
* solves the problem of kernel mode pagefaults which can schedule, which
|
|
* is not possible after invoking rcu_irq_enter() without undoing it.
|
|
*
|
|
* For user mode entries irqentry_enter_from_user_mode() is invoked to
|
|
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
|
|
* would not be possible.
|
|
*
|
|
* Returns: An opaque object that must be passed to idtentry_exit()
|
|
*/
|
|
irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
|
|
|
|
/**
|
|
* irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt
|
|
*
|
|
* Conditional reschedule with additional sanity checks.
|
|
*/
|
|
void raw_irqentry_exit_cond_resched(void);
|
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
|
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
|
#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched
|
|
#define irqentry_exit_cond_resched_dynamic_disabled NULL
|
|
DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
|
|
#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)()
|
|
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
|
DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
|
void dynamic_irqentry_exit_cond_resched(void);
|
|
#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched()
|
|
#endif
|
|
#else /* CONFIG_PREEMPT_DYNAMIC */
|
|
#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched()
|
|
#endif /* CONFIG_PREEMPT_DYNAMIC */
|
|
|
|
/**
|
|
* irqentry_exit - Handle return from exception that used irqentry_enter()
|
|
* @regs: Pointer to pt_regs (exception entry regs)
|
|
* @state: Return value from matching call to irqentry_enter()
|
|
*
|
|
* Depending on the return target (kernel/user) this runs the necessary
|
|
* preemption and work checks if possible and required and returns to
|
|
* the caller with interrupts disabled and no further work pending.
|
|
*
|
|
* This is the last action before returning to the low level ASM code which
|
|
* just needs to return to the appropriate context.
|
|
*
|
|
* Counterpart to irqentry_enter().
|
|
*/
|
|
void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state);
|
|
|
|
/**
|
|
* irqentry_nmi_enter - Handle NMI entry
|
|
* @regs: Pointer to currents pt_regs
|
|
*
|
|
* Similar to irqentry_enter() but taking care of the NMI constraints.
|
|
*/
|
|
irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs);
|
|
|
|
/**
|
|
* irqentry_nmi_exit - Handle return from NMI handling
|
|
* @regs: Pointer to pt_regs (NMI entry regs)
|
|
* @irq_state: Return value from matching call to irqentry_nmi_enter()
|
|
*
|
|
* Last action before returning to the low level assembly code.
|
|
*
|
|
* Counterpart to irqentry_nmi_enter().
|
|
*/
|
|
void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state);
|
|
|
|
#endif
|