mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 11:57:46 +00:00
1930a6e739
This set of changes removes tracehook.h, moves modification of all of the ptrace fields inside of siglock to remove races, adds a missing permission check to ptrace.c The removal of tracehook.h is quite significant as it has been a major source of confusion in recent years. Much of that confusion was around task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled making the semantics clearer). For people who don't know tracehook.h is a vestiage of an attempt to implement uprobes like functionality that was never fully merged, and was later superseeded by uprobes when uprobes was merged. For many years now we have been removing what tracehook functionaly a little bit at a time. To the point where now anything left in tracehook.h is some weird strange thing that is difficult to understand. Eric W. Biederman (15): ptrace: Move ptrace_report_syscall into ptrace.h ptrace/arm: Rename tracehook_report_syscall report_syscall ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h ptrace: Remove arch_syscall_{enter,exit}_tracehook ptrace: Remove tracehook_signal_handler task_work: Remove unnecessary include from posix_timers.h task_work: Introduce task_work_pending task_work: Call tracehook_notify_signal from get_signal on all architectures task_work: Decouple TIF_NOTIFY_SIGNAL and task_work signal: Move set_notify_signal and clear_notify_signal into sched/signal.h resume_user_mode: Remove #ifdef TIF_NOTIFY_RESUME in set_notify_resume resume_user_mode: Move to resume_user_mode.h tracehook: Remove tracehook.h ptrace: Move setting/clearing ptrace_message into ptrace_stop ptrace: Return the signal to continue with from ptrace_stop Jann Horn (1): ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE Yang Li (1): ptrace: Remove duplicated include in ptrace.c MAINTAINERS | 1 - arch/Kconfig | 5 +- arch/alpha/kernel/ptrace.c | 5 +- arch/alpha/kernel/signal.c | 4 +- arch/arc/kernel/ptrace.c | 5 +- arch/arc/kernel/signal.c | 4 +- arch/arm/kernel/ptrace.c | 12 +- arch/arm/kernel/signal.c | 4 +- arch/arm64/kernel/ptrace.c | 14 +-- arch/arm64/kernel/signal.c | 4 +- arch/csky/kernel/ptrace.c | 5 +- arch/csky/kernel/signal.c | 4 +- arch/h8300/kernel/ptrace.c | 5 +- arch/h8300/kernel/signal.c | 4 +- arch/hexagon/kernel/process.c | 4 +- arch/hexagon/kernel/signal.c | 1 - arch/hexagon/kernel/traps.c | 6 +- arch/ia64/kernel/process.c | 4 +- arch/ia64/kernel/ptrace.c | 6 +- arch/ia64/kernel/signal.c | 1 - arch/m68k/kernel/ptrace.c | 5 +- arch/m68k/kernel/signal.c | 4 +- arch/microblaze/kernel/ptrace.c | 5 +- arch/microblaze/kernel/signal.c | 4 +- arch/mips/kernel/ptrace.c | 5 +- arch/mips/kernel/signal.c | 4 +- arch/nds32/include/asm/syscall.h | 2 +- arch/nds32/kernel/ptrace.c | 5 +- arch/nds32/kernel/signal.c | 4 +- arch/nios2/kernel/ptrace.c | 5 +- arch/nios2/kernel/signal.c | 4 +- arch/openrisc/kernel/ptrace.c | 5 +- arch/openrisc/kernel/signal.c | 4 +- arch/parisc/kernel/ptrace.c | 7 +- arch/parisc/kernel/signal.c | 4 +- arch/powerpc/kernel/ptrace/ptrace.c | 8 +- arch/powerpc/kernel/signal.c | 4 +- arch/riscv/kernel/ptrace.c | 5 +- arch/riscv/kernel/signal.c | 4 +- arch/s390/include/asm/entry-common.h | 1 - arch/s390/kernel/ptrace.c | 1 - arch/s390/kernel/signal.c | 5 +- arch/sh/kernel/ptrace_32.c | 5 +- arch/sh/kernel/signal_32.c | 4 +- arch/sparc/kernel/ptrace_32.c | 5 +- arch/sparc/kernel/ptrace_64.c | 5 +- arch/sparc/kernel/signal32.c | 1 - arch/sparc/kernel/signal_32.c | 4 +- arch/sparc/kernel/signal_64.c | 4 +- arch/um/kernel/process.c | 4 +- arch/um/kernel/ptrace.c | 5 +- arch/x86/kernel/ptrace.c | 1 - arch/x86/kernel/signal.c | 5 +- arch/x86/mm/tlb.c | 1 + arch/xtensa/kernel/ptrace.c | 5 +- arch/xtensa/kernel/signal.c | 4 +- block/blk-cgroup.c | 2 +- fs/coredump.c | 1 - fs/exec.c | 1 - fs/io-wq.c | 6 +- fs/io_uring.c | 11 +- fs/proc/array.c | 1 - fs/proc/base.c | 1 - include/asm-generic/syscall.h | 2 +- include/linux/entry-common.h | 47 +------- include/linux/entry-kvm.h | 2 +- include/linux/posix-timers.h | 1 - include/linux/ptrace.h | 81 ++++++++++++- include/linux/resume_user_mode.h | 64 ++++++++++ include/linux/sched/signal.h | 17 +++ include/linux/task_work.h | 5 + include/linux/tracehook.h | 226 ----------------------------------- include/uapi/linux/ptrace.h | 2 +- kernel/entry/common.c | 19 +-- kernel/entry/kvm.c | 9 +- kernel/exit.c | 3 +- kernel/livepatch/transition.c | 1 - kernel/ptrace.c | 47 +++++--- kernel/seccomp.c | 1 - kernel/signal.c | 62 +++++----- kernel/task_work.c | 4 +- kernel/time/posix-cpu-timers.c | 1 + mm/memcontrol.c | 2 +- security/apparmor/domain.c | 1 - security/selinux/hooks.c | 1 - 85 files changed, 372 insertions(+), 495 deletions(-) Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEgjlraLDcwBA2B+6cC/v6Eiajj0AFAmJCQkoACgkQC/v6Eiaj j0DCWQ/5AZVFU+hX32obUNCLackHTwgcCtSOs3JNBmNA/zL/htPiYYG0ghkvtlDR Dw5J5DnxC6P7PVAdAqrpvx2uX2FebHYU0bRlyLx8LYUEP5dhyNicxX9jA882Z+vw Ud0Ue9EojwGWS76dC9YoKUj3slThMATbhA2r4GVEoof8fSNJaBxQIqath44t0FwU DinWa+tIOvZANGBZr6CUUINNIgqBIZCH/R4h6ArBhMlJpuQ5Ufk2kAaiWFwZCkX4 0LuuAwbKsCKkF8eap5I2KrIg/7zZVgxAg9O3cHOzzm8OPbKzRnNnQClcDe8perqp S6e/f3MgpE+eavd1EiLxevZ660cJChnmikXVVh8ZYYoefaMKGqBaBSsB38bNcLjY 3+f2dB+TNBFRnZs1aCujK3tWBT9QyjZDKtCBfzxDNWBpXGLhHH6j6lA5Lj+Cef5K /HNHFb+FuqedlFZh5m1Y+piFQ70hTgCa2u8b+FSOubI2hW9Zd+WzINV0ANaZ2LvZ 4YGtcyDNk1q1+c87lxP9xMRl/xi6rNg+B9T2MCo4IUnHgpSVP6VEB3osgUmrrrN0 eQlUI154G/AaDlqXLgmn1xhRmlPGfmenkxpok1AuzxvNJsfLKnpEwQSc13g3oiZr disZQxNY0kBO2Nv3G323Z6PLinhbiIIFez6cJzK5v0YJ2WtO3pY= =uEro -----END PGP SIGNATURE----- Merge tag 'ptrace-cleanups-for-v5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace Pull ptrace cleanups from Eric Biederman: "This set of changes removes tracehook.h, moves modification of all of the ptrace fields inside of siglock to remove races, adds a missing permission check to ptrace.c The removal of tracehook.h is quite significant as it has been a major source of confusion in recent years. Much of that confusion was around task_work and TIF_NOTIFY_SIGNAL (which I have now decoupled making the semantics clearer). For people who don't know tracehook.h is a vestiage of an attempt to implement uprobes like functionality that was never fully merged, and was later superseeded by uprobes when uprobes was merged. For many years now we have been removing what tracehook functionaly a little bit at a time. To the point where anything left in tracehook.h was some weird strange thing that was difficult to understand" * tag 'ptrace-cleanups-for-v5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: ptrace: Remove duplicated include in ptrace.c ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE ptrace: Return the signal to continue with from ptrace_stop ptrace: Move setting/clearing ptrace_message into ptrace_stop tracehook: Remove tracehook.h resume_user_mode: Move to resume_user_mode.h resume_user_mode: Remove #ifdef TIF_NOTIFY_RESUME in set_notify_resume signal: Move set_notify_signal and clear_notify_signal into sched/signal.h task_work: Decouple TIF_NOTIFY_SIGNAL and task_work task_work: Call tracehook_notify_signal from get_signal on all architectures task_work: Introduce task_work_pending task_work: Remove unnecessary include from posix_timers.h ptrace: Remove tracehook_signal_handler ptrace: Remove arch_syscall_{enter,exit}_tracehook ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h ptrace/arm: Rename tracehook_report_syscall report_syscall ptrace: Move ptrace_report_syscall into ptrace.h
492 lines
12 KiB
C
492 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <linux/context_tracking.h>
|
|
#include <linux/entry-common.h>
|
|
#include <linux/resume_user_mode.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/jump_label.h>
|
|
#include <linux/livepatch.h>
|
|
#include <linux/audit.h>
|
|
#include <linux/tick.h>
|
|
|
|
#include "common.h"
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#include <trace/events/syscalls.h>
|
|
|
|
/* See comment for enter_from_user_mode() in entry-common.h */
|
|
static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
|
|
{
|
|
arch_check_user_regs(regs);
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
|
|
CT_WARN_ON(ct_state() != CONTEXT_USER);
|
|
user_exit_irqoff();
|
|
|
|
instrumentation_begin();
|
|
trace_hardirqs_off_finish();
|
|
instrumentation_end();
|
|
}
|
|
|
|
void noinstr enter_from_user_mode(struct pt_regs *regs)
|
|
{
|
|
__enter_from_user_mode(regs);
|
|
}
|
|
|
|
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
|
|
{
|
|
if (unlikely(audit_context())) {
|
|
unsigned long args[6];
|
|
|
|
syscall_get_arguments(current, regs, args);
|
|
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
|
|
}
|
|
}
|
|
|
|
static long syscall_trace_enter(struct pt_regs *regs, long syscall,
|
|
unsigned long work)
|
|
{
|
|
long ret = 0;
|
|
|
|
/*
|
|
* Handle Syscall User Dispatch. This must comes first, since
|
|
* the ABI here can be something that doesn't make sense for
|
|
* other syscall_work features.
|
|
*/
|
|
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
|
|
if (syscall_user_dispatch(regs))
|
|
return -1L;
|
|
}
|
|
|
|
/* Handle ptrace */
|
|
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
|
|
ret = ptrace_report_syscall_entry(regs);
|
|
if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
|
|
return -1L;
|
|
}
|
|
|
|
/* Do seccomp after ptrace, to catch any tracer changes. */
|
|
if (work & SYSCALL_WORK_SECCOMP) {
|
|
ret = __secure_computing(NULL);
|
|
if (ret == -1L)
|
|
return ret;
|
|
}
|
|
|
|
/* Either of the above might have changed the syscall number */
|
|
syscall = syscall_get_nr(current, regs);
|
|
|
|
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
|
|
trace_sys_enter(regs, syscall);
|
|
|
|
syscall_enter_audit(regs, syscall);
|
|
|
|
return ret ? : syscall;
|
|
}
|
|
|
|
static __always_inline long
|
|
__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
|
|
{
|
|
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
|
|
|
|
if (work & SYSCALL_WORK_ENTER)
|
|
syscall = syscall_trace_enter(regs, syscall, work);
|
|
|
|
return syscall;
|
|
}
|
|
|
|
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
|
|
{
|
|
return __syscall_enter_from_user_work(regs, syscall);
|
|
}
|
|
|
|
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
|
|
{
|
|
long ret;
|
|
|
|
__enter_from_user_mode(regs);
|
|
|
|
instrumentation_begin();
|
|
local_irq_enable();
|
|
ret = __syscall_enter_from_user_work(regs, syscall);
|
|
instrumentation_end();
|
|
|
|
return ret;
|
|
}
|
|
|
|
noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
|
|
{
|
|
__enter_from_user_mode(regs);
|
|
instrumentation_begin();
|
|
local_irq_enable();
|
|
instrumentation_end();
|
|
}
|
|
|
|
/* See comment for exit_to_user_mode() in entry-common.h */
|
|
static __always_inline void __exit_to_user_mode(void)
|
|
{
|
|
instrumentation_begin();
|
|
trace_hardirqs_on_prepare();
|
|
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
|
instrumentation_end();
|
|
|
|
user_enter_irqoff();
|
|
arch_exit_to_user_mode();
|
|
lockdep_hardirqs_on(CALLER_ADDR0);
|
|
}
|
|
|
|
void noinstr exit_to_user_mode(void)
|
|
{
|
|
__exit_to_user_mode();
|
|
}
|
|
|
|
/* Workaround to allow gradual conversion of architecture code */
|
|
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
|
|
|
|
#ifdef CONFIG_RT_DELAYED_SIGNALS
|
|
static inline void raise_delayed_signal(void)
|
|
{
|
|
if (unlikely(current->forced_info.si_signo)) {
|
|
force_sig_info(¤t->forced_info);
|
|
current->forced_info.si_signo = 0;
|
|
}
|
|
}
|
|
#else
|
|
static inline void raise_delayed_signal(void) { }
|
|
#endif
|
|
|
|
static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
|
|
unsigned long ti_work)
|
|
{
|
|
/*
|
|
* Before returning to user space ensure that all pending work
|
|
* items have been completed.
|
|
*/
|
|
while (ti_work & EXIT_TO_USER_MODE_WORK) {
|
|
|
|
local_irq_enable_exit_to_user(ti_work);
|
|
|
|
if (ti_work & _TIF_NEED_RESCHED)
|
|
schedule();
|
|
|
|
raise_delayed_signal();
|
|
|
|
if (ti_work & _TIF_UPROBE)
|
|
uprobe_notify_resume(regs);
|
|
|
|
if (ti_work & _TIF_PATCH_PENDING)
|
|
klp_update_patch_state(current);
|
|
|
|
if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
|
|
arch_do_signal_or_restart(regs);
|
|
|
|
if (ti_work & _TIF_NOTIFY_RESUME)
|
|
resume_user_mode_work(regs);
|
|
|
|
/* Architecture specific TIF work */
|
|
arch_exit_to_user_mode_work(regs, ti_work);
|
|
|
|
/*
|
|
* Disable interrupts and reevaluate the work flags as they
|
|
* might have changed while interrupts and preemption was
|
|
* enabled above.
|
|
*/
|
|
local_irq_disable_exit_to_user();
|
|
|
|
/* Check if any of the above work has queued a deferred wakeup */
|
|
tick_nohz_user_enter_prepare();
|
|
|
|
ti_work = read_thread_flags();
|
|
}
|
|
|
|
/* Return the latest work state for arch_exit_to_user_mode() */
|
|
return ti_work;
|
|
}
|
|
|
|
static void exit_to_user_mode_prepare(struct pt_regs *regs)
|
|
{
|
|
unsigned long ti_work = read_thread_flags();
|
|
|
|
lockdep_assert_irqs_disabled();
|
|
|
|
/* Flush pending rcuog wakeup before the last need_resched() check */
|
|
tick_nohz_user_enter_prepare();
|
|
|
|
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
|
|
ti_work = exit_to_user_mode_loop(regs, ti_work);
|
|
|
|
arch_exit_to_user_mode_prepare(regs, ti_work);
|
|
|
|
/* Ensure that the address limit is intact and no locks are held */
|
|
addr_limit_user_check();
|
|
kmap_assert_nomap();
|
|
lockdep_assert_irqs_disabled();
|
|
lockdep_sys_exit();
|
|
}
|
|
|
|
/*
|
|
* If SYSCALL_EMU is set, then the only reason to report is when
|
|
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
|
|
* instruction has been already reported in syscall_enter_from_user_mode().
|
|
*/
|
|
static inline bool report_single_step(unsigned long work)
|
|
{
|
|
if (work & SYSCALL_WORK_SYSCALL_EMU)
|
|
return false;
|
|
|
|
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
|
|
}
|
|
|
|
static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
|
|
{
|
|
bool step;
|
|
|
|
/*
|
|
* If the syscall was rolled back due to syscall user dispatching,
|
|
* then the tracers below are not invoked for the same reason as
|
|
* the entry side was not invoked in syscall_trace_enter(): The ABI
|
|
* of these syscalls is unknown.
|
|
*/
|
|
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
|
|
if (unlikely(current->syscall_dispatch.on_dispatch)) {
|
|
current->syscall_dispatch.on_dispatch = false;
|
|
return;
|
|
}
|
|
}
|
|
|
|
audit_syscall_exit(regs);
|
|
|
|
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
|
|
trace_sys_exit(regs, syscall_get_return_value(current, regs));
|
|
|
|
step = report_single_step(work);
|
|
if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
|
|
ptrace_report_syscall_exit(regs, step);
|
|
}
|
|
|
|
/*
|
|
* Syscall specific exit to user mode preparation. Runs with interrupts
|
|
* enabled.
|
|
*/
|
|
static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
|
|
{
|
|
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
|
|
unsigned long nr = syscall_get_nr(current, regs);
|
|
|
|
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
|
|
|
|
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
|
|
if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
|
|
local_irq_enable();
|
|
}
|
|
|
|
rseq_syscall(regs);
|
|
|
|
/*
|
|
* Do one-time syscall specific work. If these work items are
|
|
* enabled, we want to run them exactly once per syscall exit with
|
|
* interrupts enabled.
|
|
*/
|
|
if (unlikely(work & SYSCALL_WORK_EXIT))
|
|
syscall_exit_work(regs, work);
|
|
}
|
|
|
|
static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
|
|
{
|
|
syscall_exit_to_user_mode_prepare(regs);
|
|
local_irq_disable_exit_to_user();
|
|
exit_to_user_mode_prepare(regs);
|
|
}
|
|
|
|
void syscall_exit_to_user_mode_work(struct pt_regs *regs)
|
|
{
|
|
__syscall_exit_to_user_mode_work(regs);
|
|
}
|
|
|
|
__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
|
|
{
|
|
instrumentation_begin();
|
|
__syscall_exit_to_user_mode_work(regs);
|
|
instrumentation_end();
|
|
__exit_to_user_mode();
|
|
}
|
|
|
|
noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
|
|
{
|
|
__enter_from_user_mode(regs);
|
|
}
|
|
|
|
noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
|
|
{
|
|
instrumentation_begin();
|
|
exit_to_user_mode_prepare(regs);
|
|
instrumentation_end();
|
|
__exit_to_user_mode();
|
|
}
|
|
|
|
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
|
|
{
|
|
irqentry_state_t ret = {
|
|
.exit_rcu = false,
|
|
};
|
|
|
|
if (user_mode(regs)) {
|
|
irqentry_enter_from_user_mode(regs);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* If this entry hit the idle task invoke rcu_irq_enter() whether
|
|
* RCU is watching or not.
|
|
*
|
|
* Interrupts can nest when the first interrupt invokes softirq
|
|
* processing on return which enables interrupts.
|
|
*
|
|
* Scheduler ticks in the idle task can mark quiescent state and
|
|
* terminate a grace period, if and only if the timer interrupt is
|
|
* not nested into another interrupt.
|
|
*
|
|
* Checking for rcu_is_watching() here would prevent the nesting
|
|
* interrupt to invoke rcu_irq_enter(). If that nested interrupt is
|
|
* the tick then rcu_flavor_sched_clock_irq() would wrongfully
|
|
* assume that it is the first interrupt and eventually claim
|
|
* quiescent state and end grace periods prematurely.
|
|
*
|
|
* Unconditionally invoke rcu_irq_enter() so RCU state stays
|
|
* consistent.
|
|
*
|
|
* TINY_RCU does not support EQS, so let the compiler eliminate
|
|
* this part when enabled.
|
|
*/
|
|
if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
|
|
/*
|
|
* If RCU is not watching then the same careful
|
|
* sequence vs. lockdep and tracing is required
|
|
* as in irqentry_enter_from_user_mode().
|
|
*/
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
rcu_irq_enter();
|
|
instrumentation_begin();
|
|
trace_hardirqs_off_finish();
|
|
instrumentation_end();
|
|
|
|
ret.exit_rcu = true;
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* If RCU is watching then RCU only wants to check whether it needs
|
|
* to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
|
|
* already contains a warning when RCU is not watching, so no point
|
|
* in having another one here.
|
|
*/
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
instrumentation_begin();
|
|
rcu_irq_enter_check_tick();
|
|
trace_hardirqs_off_finish();
|
|
instrumentation_end();
|
|
|
|
return ret;
|
|
}
|
|
|
|
void raw_irqentry_exit_cond_resched(void)
|
|
{
|
|
if (!preempt_count()) {
|
|
/* Sanity check RCU and thread stack */
|
|
rcu_irq_exit_check_preempt();
|
|
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
|
|
WARN_ON_ONCE(!on_thread_stack());
|
|
if (need_resched())
|
|
preempt_schedule_irq();
|
|
}
|
|
}
|
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
|
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
|
DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
|
|
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
|
DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
|
void dynamic_irqentry_exit_cond_resched(void)
|
|
{
|
|
if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
|
|
return;
|
|
raw_irqentry_exit_cond_resched();
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
|
|
{
|
|
lockdep_assert_irqs_disabled();
|
|
|
|
/* Check whether this returns to user mode */
|
|
if (user_mode(regs)) {
|
|
irqentry_exit_to_user_mode(regs);
|
|
} else if (!regs_irqs_disabled(regs)) {
|
|
/*
|
|
* If RCU was not watching on entry this needs to be done
|
|
* carefully and needs the same ordering of lockdep/tracing
|
|
* and RCU as the return to user mode path.
|
|
*/
|
|
if (state.exit_rcu) {
|
|
instrumentation_begin();
|
|
/* Tell the tracer that IRET will enable interrupts */
|
|
trace_hardirqs_on_prepare();
|
|
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
|
instrumentation_end();
|
|
rcu_irq_exit();
|
|
lockdep_hardirqs_on(CALLER_ADDR0);
|
|
return;
|
|
}
|
|
|
|
instrumentation_begin();
|
|
if (IS_ENABLED(CONFIG_PREEMPTION))
|
|
irqentry_exit_cond_resched();
|
|
|
|
/* Covers both tracing and lockdep */
|
|
trace_hardirqs_on();
|
|
instrumentation_end();
|
|
} else {
|
|
/*
|
|
* IRQ flags state is correct already. Just tell RCU if it
|
|
* was not watching on entry.
|
|
*/
|
|
if (state.exit_rcu)
|
|
rcu_irq_exit();
|
|
}
|
|
}
|
|
|
|
irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
|
|
{
|
|
irqentry_state_t irq_state;
|
|
|
|
irq_state.lockdep = lockdep_hardirqs_enabled();
|
|
|
|
__nmi_enter();
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
lockdep_hardirq_enter();
|
|
rcu_nmi_enter();
|
|
|
|
instrumentation_begin();
|
|
trace_hardirqs_off_finish();
|
|
ftrace_nmi_enter();
|
|
instrumentation_end();
|
|
|
|
return irq_state;
|
|
}
|
|
|
|
void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
|
|
{
|
|
instrumentation_begin();
|
|
ftrace_nmi_exit();
|
|
if (irq_state.lockdep) {
|
|
trace_hardirqs_on_prepare();
|
|
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
|
}
|
|
instrumentation_end();
|
|
|
|
rcu_nmi_exit();
|
|
lockdep_hardirq_exit();
|
|
if (irq_state.lockdep)
|
|
lockdep_hardirqs_on(CALLER_ADDR0);
|
|
__nmi_exit();
|
|
}
|