mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 15:29:16 +00:00
i387: re-introduce FPU state preloading at context switch time
After all the FPU state cleanups and finally finding the problem that caused all our FPU save/restore problems, this re-introduces the preloading of FPU state that was removed in commit b3b0870ef3ff ("i387: do not preload FPU state at task switch time"). However, instead of simply reverting the removal, this reimplements preloading with several fixes, most notably - properly abstracted as a true FPU state switch, rather than as open-coded save and restore with various hacks. In particular, implementing it as a proper FPU state switch allows us to optimize the CR0.TS flag accesses: there is no reason to set the TS bit only to then almost immediately clear it again. CR0 accesses are quite slow and expensive, don't flip the bit back and forth for no good reason. - Make sure that the same model works for both x86-32 and x86-64, so that there are no gratuitous differences between the two due to the way they save and restore segment state differently due to architectural differences that really don't matter to the FPU state. - Avoid exposing the "preload" state to the context switch routines, and in particular allow the concept of lazy state restore: if nothing else has used the FPU in the meantime, and the process is still on the same CPU, we can avoid restoring state from memory entirely, just re-expose the state that is still in the FPU unit. That optimized lazy restore isn't actually implemented here, but the infrastructure is set up for it. Of course, older CPU's that use 'fnsave' to save the state cannot take advantage of this, since the state saving also trashes the state. In other words, there is now an actual _design_ to the FPU state saving, rather than just random historical baggage. Hopefully it's easier to follow as a result. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
f94edacf99
commit
34ddc81a23
@ -29,6 +29,7 @@ extern unsigned int sig_xstate_size;
|
||||
extern void fpu_init(void);
|
||||
extern void mxcsr_feature_mask_init(void);
|
||||
extern int init_fpu(struct task_struct *child);
|
||||
extern void __math_state_restore(struct task_struct *);
|
||||
extern void math_state_restore(void);
|
||||
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
|
||||
|
||||
@ -212,9 +213,10 @@ static inline void fpu_fxsave(struct fpu *fpu)
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* These must be called with preempt disabled
|
||||
* These must be called with preempt disabled. Returns
|
||||
* 'true' if the FPU state is still intact.
|
||||
*/
|
||||
static inline void fpu_save_init(struct fpu *fpu)
|
||||
static inline int fpu_save_init(struct fpu *fpu)
|
||||
{
|
||||
if (use_xsave()) {
|
||||
fpu_xsave(fpu);
|
||||
@ -223,22 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu)
|
||||
* xsave header may indicate the init state of the FP.
|
||||
*/
|
||||
if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
|
||||
return;
|
||||
return 1;
|
||||
} else if (use_fxsr()) {
|
||||
fpu_fxsave(fpu);
|
||||
} else {
|
||||
asm volatile("fnsave %[fx]; fwait"
|
||||
: [fx] "=m" (fpu->state->fsave));
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
|
||||
/*
|
||||
* If exceptions are pending, we need to clear them so
|
||||
* that we don't randomly get exceptions later.
|
||||
*
|
||||
* FIXME! Is this perhaps only true for the old-style
|
||||
* irq13 case? Maybe we could leave the x87 state
|
||||
* intact otherwise?
|
||||
*/
|
||||
if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
|
||||
asm volatile("fnclex");
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void __save_init_fpu(struct task_struct *tsk)
|
||||
static inline int __save_init_fpu(struct task_struct *tsk)
|
||||
{
|
||||
fpu_save_init(&tsk->thread.fpu);
|
||||
return fpu_save_init(&tsk->thread.fpu);
|
||||
}
|
||||
|
||||
static inline int fpu_fxrstor_checking(struct fpu *fpu)
|
||||
@ -300,21 +313,80 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
|
||||
__thread_set_has_fpu(tsk);
|
||||
}
|
||||
|
||||
/*
|
||||
* FPU state switching for scheduling.
|
||||
*
|
||||
* This is a two-stage process:
|
||||
*
|
||||
* - switch_fpu_prepare() saves the old state and
|
||||
* sets the new state of the CR0.TS bit. This is
|
||||
* done within the context of the old process.
|
||||
*
|
||||
* - switch_fpu_finish() restores the new state as
|
||||
* necessary.
|
||||
*/
|
||||
typedef struct { int preload; } fpu_switch_t;
|
||||
|
||||
/*
|
||||
* FIXME! We could do a totally lazy restore, but we need to
|
||||
* add a per-cpu "this was the task that last touched the FPU
|
||||
* on this CPU" variable, and the task needs to have a "I last
|
||||
* touched the FPU on this CPU" and check them.
|
||||
*
|
||||
* We don't do that yet, so "fpu_lazy_restore()" always returns
|
||||
* false, but some day..
|
||||
*/
|
||||
#define fpu_lazy_restore(tsk) (0)
|
||||
#define fpu_lazy_state_intact(tsk) do { } while (0)
|
||||
|
||||
static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
|
||||
{
|
||||
fpu_switch_t fpu;
|
||||
|
||||
fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
|
||||
if (__thread_has_fpu(old)) {
|
||||
if (__save_init_fpu(old))
|
||||
fpu_lazy_state_intact(old);
|
||||
__thread_clear_has_fpu(old);
|
||||
old->fpu_counter++;
|
||||
|
||||
/* Don't change CR0.TS if we just switch! */
|
||||
if (fpu.preload) {
|
||||
__thread_set_has_fpu(new);
|
||||
prefetch(new->thread.fpu.state);
|
||||
} else
|
||||
stts();
|
||||
} else {
|
||||
old->fpu_counter = 0;
|
||||
if (fpu.preload) {
|
||||
if (fpu_lazy_restore(new))
|
||||
fpu.preload = 0;
|
||||
else
|
||||
prefetch(new->thread.fpu.state);
|
||||
__thread_fpu_begin(new);
|
||||
}
|
||||
}
|
||||
return fpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* By the time this gets called, we've already cleared CR0.TS and
|
||||
* given the process the FPU if we are going to preload the FPU
|
||||
* state - all we need to do is to conditionally restore the register
|
||||
* state itself.
|
||||
*/
|
||||
static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
|
||||
{
|
||||
if (fpu.preload)
|
||||
__math_state_restore(new);
|
||||
}
|
||||
|
||||
/*
|
||||
* Signal frame handlers...
|
||||
*/
|
||||
extern int save_i387_xstate(void __user *buf);
|
||||
extern int restore_i387_xstate(void __user *buf);
|
||||
|
||||
static inline void __unlazy_fpu(struct task_struct *tsk)
|
||||
{
|
||||
if (__thread_has_fpu(tsk)) {
|
||||
__save_init_fpu(tsk);
|
||||
__thread_fpu_end(tsk);
|
||||
} else
|
||||
tsk->fpu_counter = 0;
|
||||
}
|
||||
|
||||
static inline void __clear_fpu(struct task_struct *tsk)
|
||||
{
|
||||
if (__thread_has_fpu(tsk)) {
|
||||
@ -474,7 +546,11 @@ static inline void save_init_fpu(struct task_struct *tsk)
|
||||
static inline void unlazy_fpu(struct task_struct *tsk)
|
||||
{
|
||||
preempt_disable();
|
||||
__unlazy_fpu(tsk);
|
||||
if (__thread_has_fpu(tsk)) {
|
||||
__save_init_fpu(tsk);
|
||||
__thread_fpu_end(tsk);
|
||||
} else
|
||||
tsk->fpu_counter = 0;
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
|
@ -299,10 +299,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
*next = &next_p->thread;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
||||
fpu_switch_t fpu;
|
||||
|
||||
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
|
||||
|
||||
__unlazy_fpu(prev_p);
|
||||
fpu = switch_fpu_prepare(prev_p, next_p);
|
||||
|
||||
/*
|
||||
* Reload esp0.
|
||||
@ -357,6 +358,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
if (prev->gs | next->gs)
|
||||
lazy_load_gs(next->gs);
|
||||
|
||||
switch_fpu_finish(next_p, fpu);
|
||||
|
||||
percpu_write(current_task, next_p);
|
||||
|
||||
return prev_p;
|
||||
|
@ -386,8 +386,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
||||
unsigned fsindex, gsindex;
|
||||
fpu_switch_t fpu;
|
||||
|
||||
__unlazy_fpu(prev_p);
|
||||
fpu = switch_fpu_prepare(prev_p, next_p);
|
||||
|
||||
/*
|
||||
* Reload esp0, LDT and the page table pointer:
|
||||
@ -457,6 +458,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
|
||||
prev->gsindex = gsindex;
|
||||
|
||||
switch_fpu_finish(next_p, fpu);
|
||||
|
||||
/*
|
||||
* Switch the PDA and FPU contexts.
|
||||
*/
|
||||
|
@ -571,40 +571,16 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* 'math_state_restore()' saves the current math information in the
|
||||
* old math state array, and gets the new ones from the current task
|
||||
*
|
||||
* Careful.. There are problems with IBM-designed IRQ13 behaviour.
|
||||
* Don't touch unless you *really* know how it works.
|
||||
*
|
||||
* Must be called with kernel preemption disabled (eg with local
|
||||
* local interrupts as in the case of do_device_not_available).
|
||||
* This gets called with the process already owning the
|
||||
* FPU state, and with CR0.TS cleared. It just needs to
|
||||
* restore the FPU register state.
|
||||
*/
|
||||
void math_state_restore(void)
|
||||
void __math_state_restore(struct task_struct *tsk)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
/* We need a safe address that is cheap to find and that is already
|
||||
in L1. We're just bringing in "tsk->thread.has_fpu", so use that */
|
||||
in L1. We've just brought in "tsk->thread.has_fpu", so use that */
|
||||
#define safe_address (tsk->thread.has_fpu)
|
||||
|
||||
if (!tsk_used_math(tsk)) {
|
||||
local_irq_enable();
|
||||
/*
|
||||
* does a slab alloc which can sleep
|
||||
*/
|
||||
if (init_fpu(tsk)) {
|
||||
/*
|
||||
* ran out of memory!
|
||||
*/
|
||||
do_group_exit(SIGKILL);
|
||||
return;
|
||||
}
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
__thread_fpu_begin(tsk);
|
||||
|
||||
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
|
||||
is pending. Clear the x87 state here by setting it to fixed
|
||||
values. safe_address is a random variable that should be in L1 */
|
||||
@ -623,6 +599,39 @@ void math_state_restore(void)
|
||||
force_sig(SIGSEGV, tsk);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* 'math_state_restore()' saves the current math information in the
|
||||
* old math state array, and gets the new ones from the current task
|
||||
*
|
||||
* Careful.. There are problems with IBM-designed IRQ13 behaviour.
|
||||
* Don't touch unless you *really* know how it works.
|
||||
*
|
||||
* Must be called with kernel preemption disabled (eg with local
|
||||
* local interrupts as in the case of do_device_not_available).
|
||||
*/
|
||||
void math_state_restore(void)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
if (!tsk_used_math(tsk)) {
|
||||
local_irq_enable();
|
||||
/*
|
||||
* does a slab alloc which can sleep
|
||||
*/
|
||||
if (init_fpu(tsk)) {
|
||||
/*
|
||||
* ran out of memory!
|
||||
*/
|
||||
do_group_exit(SIGKILL);
|
||||
return;
|
||||
}
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
__thread_fpu_begin(tsk);
|
||||
__math_state_restore(tsk);
|
||||
|
||||
tsk->fpu_counter++;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user