mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 07:23:14 +00:00
179a9cf792
The typical steps with context tracking are: 1) Task runs in userspace 2) Task enters the kernel (syscall/exception/IRQ) 3) Task switches from context tracking state CONTEXT_USER to CONTEXT_KERNEL (user_exit()) 4) Task does stuff in kernel 5) Task switches from context tracking state CONTEXT_KERNEL to CONTEXT_USER (user_enter()) 6) Task exits the kernel If an exception fires between 5) and 6), the pt_regs and the context tracking disagree on the context of the faulted/trapped instruction. CONTEXT_KERNEL must be set before the exception handler, that's unconditional for those handlers that want to be able to call into schedule(), but CONTEXT_USER must be restored when the exception exits whereas pt_regs tells that we are resuming to kernel space. This can't be fixed with storing the context tracking state in a per-cpu or per-task variable since another exception may fire onto the current one and overwrite the saved state. Also the task can schedule. So it has to be stored in a per task stack. This is how exception_enter()/exception_exit() paper over the problem: 5) Task switches from context tracking state CONTEXT_KERNEL to CONTEXT_USER (user_enter()) 5.1) Exception fires 5.2) prev_state = exception_enter() // save CONTEXT_USER to prev_state // and set CONTEXT_KERNEL 5.3) Exception handler 5.4) exception_enter(prev_state) // restore CONTEXT_USER 5.5) Exception resumes 6) Task exits the kernel The condition to live without exception_enter()/exception_exit() is to forbid exceptions and IRQs between 2) and 3) and between 5) and 6), or if any is allowed to trigger, it won't call into context tracking, eg: NMIs, and it won't schedule. These requirements are met by architectures supporting CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK and those can therefore afford not to implement this hack. Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20201117151637.259084-3-frederic@kernel.org
182 lines
4.7 KiB
C
182 lines
4.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_CONTEXT_TRACKING_H
|
|
#define _LINUX_CONTEXT_TRACKING_H
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/vtime.h>
|
|
#include <linux/context_tracking_state.h>
|
|
#include <linux/instrumentation.h>
|
|
|
|
#include <asm/ptrace.h>
|
|
|
|
|
|
#ifdef CONFIG_CONTEXT_TRACKING
|
|
extern void context_tracking_cpu_set(int cpu);
|
|
|
|
/* Called with interrupts disabled. */
|
|
extern void __context_tracking_enter(enum ctx_state state);
|
|
extern void __context_tracking_exit(enum ctx_state state);
|
|
|
|
extern void context_tracking_enter(enum ctx_state state);
|
|
extern void context_tracking_exit(enum ctx_state state);
|
|
extern void context_tracking_user_enter(void);
|
|
extern void context_tracking_user_exit(void);
|
|
|
|
static inline void user_enter(void)
|
|
{
|
|
if (context_tracking_enabled())
|
|
context_tracking_enter(CONTEXT_USER);
|
|
|
|
}
|
|
static inline void user_exit(void)
|
|
{
|
|
if (context_tracking_enabled())
|
|
context_tracking_exit(CONTEXT_USER);
|
|
}
|
|
|
|
/* Called with interrupts disabled. */
|
|
static __always_inline void user_enter_irqoff(void)
|
|
{
|
|
if (context_tracking_enabled())
|
|
__context_tracking_enter(CONTEXT_USER);
|
|
|
|
}
|
|
static __always_inline void user_exit_irqoff(void)
|
|
{
|
|
if (context_tracking_enabled())
|
|
__context_tracking_exit(CONTEXT_USER);
|
|
}
|
|
|
|
static inline enum ctx_state exception_enter(void)
|
|
{
|
|
enum ctx_state prev_ctx;
|
|
|
|
if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) ||
|
|
!context_tracking_enabled())
|
|
return 0;
|
|
|
|
prev_ctx = this_cpu_read(context_tracking.state);
|
|
if (prev_ctx != CONTEXT_KERNEL)
|
|
context_tracking_exit(prev_ctx);
|
|
|
|
return prev_ctx;
|
|
}
|
|
|
|
static inline void exception_exit(enum ctx_state prev_ctx)
|
|
{
|
|
if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) &&
|
|
context_tracking_enabled()) {
|
|
if (prev_ctx != CONTEXT_KERNEL)
|
|
context_tracking_enter(prev_ctx);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* ct_state() - return the current context tracking state if known
|
|
*
|
|
* Returns the current cpu's context tracking state if context tracking
|
|
* is enabled. If context tracking is disabled, returns
|
|
* CONTEXT_DISABLED. This should be used primarily for debugging.
|
|
*/
|
|
static __always_inline enum ctx_state ct_state(void)
|
|
{
|
|
return context_tracking_enabled() ?
|
|
this_cpu_read(context_tracking.state) : CONTEXT_DISABLED;
|
|
}
|
|
#else
|
|
static inline void user_enter(void) { }
|
|
static inline void user_exit(void) { }
|
|
static inline void user_enter_irqoff(void) { }
|
|
static inline void user_exit_irqoff(void) { }
|
|
static inline enum ctx_state exception_enter(void) { return 0; }
|
|
static inline void exception_exit(enum ctx_state prev_ctx) { }
|
|
static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
|
|
#endif /* !CONFIG_CONTEXT_TRACKING */
|
|
|
|
#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
|
|
|
|
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
|
extern void context_tracking_init(void);
|
|
#else
|
|
static inline void context_tracking_init(void) { }
|
|
#endif /* CONFIG_CONTEXT_TRACKING_FORCE */
|
|
|
|
|
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
|
/* must be called with irqs disabled */
|
|
static __always_inline void guest_enter_irqoff(void)
|
|
{
|
|
instrumentation_begin();
|
|
if (vtime_accounting_enabled_this_cpu())
|
|
vtime_guest_enter(current);
|
|
else
|
|
current->flags |= PF_VCPU;
|
|
instrumentation_end();
|
|
|
|
if (context_tracking_enabled())
|
|
__context_tracking_enter(CONTEXT_GUEST);
|
|
|
|
/* KVM does not hold any references to rcu protected data when it
|
|
* switches CPU into a guest mode. In fact switching to a guest mode
|
|
* is very similar to exiting to userspace from rcu point of view. In
|
|
* addition CPU may stay in a guest mode for quite a long time (up to
|
|
* one time slice). Lets treat guest mode as quiescent state, just like
|
|
* we do with user-mode execution.
|
|
*/
|
|
if (!context_tracking_enabled_this_cpu()) {
|
|
instrumentation_begin();
|
|
rcu_virt_note_context_switch(smp_processor_id());
|
|
instrumentation_end();
|
|
}
|
|
}
|
|
|
|
static __always_inline void guest_exit_irqoff(void)
|
|
{
|
|
if (context_tracking_enabled())
|
|
__context_tracking_exit(CONTEXT_GUEST);
|
|
|
|
instrumentation_begin();
|
|
if (vtime_accounting_enabled_this_cpu())
|
|
vtime_guest_exit(current);
|
|
else
|
|
current->flags &= ~PF_VCPU;
|
|
instrumentation_end();
|
|
}
|
|
|
|
#else
|
|
static __always_inline void guest_enter_irqoff(void)
|
|
{
|
|
/*
|
|
* This is running in ioctl context so its safe
|
|
* to assume that it's the stime pending cputime
|
|
* to flush.
|
|
*/
|
|
instrumentation_begin();
|
|
vtime_account_kernel(current);
|
|
current->flags |= PF_VCPU;
|
|
rcu_virt_note_context_switch(smp_processor_id());
|
|
instrumentation_end();
|
|
}
|
|
|
|
static __always_inline void guest_exit_irqoff(void)
|
|
{
|
|
instrumentation_begin();
|
|
/* Flush the guest cputime we spent on the guest */
|
|
vtime_account_kernel(current);
|
|
current->flags &= ~PF_VCPU;
|
|
instrumentation_end();
|
|
}
|
|
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
|
|
|
|
static inline void guest_exit(void)
|
|
{
|
|
unsigned long flags;
|
|
|
|
local_irq_save(flags);
|
|
guest_exit_irqoff();
|
|
local_irq_restore(flags);
|
|
}
|
|
|
|
#endif
|