mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-29 17:22:07 +00:00
lazy tlb: fix hotplug exit race with MMU_LAZY_TLB_SHOOTDOWN
CPU unplug first calls __cpu_disable(), and that's where powerpc calls cleanup_cpu_mmu_context(), which clears this CPU from mm_cpumask() of all mms in the system. However this CPU may still be using a lazy tlb mm, and its mm_cpumask bit will be cleared from it. The CPU does not switch away from the lazy tlb mm until arch_cpu_idle_dead() calls idle_task_exit(). If that user mm exits in this window, it will not be subject to the lazy tlb mm shootdown and may be freed while in use as a lazy mm by the CPU that is being unplugged. cleanup_cpu_mmu_context() could be moved later, but it looks better to move the lazy tlb mm switching earlier. The problem with doing the lazy mm switching in idle_task_exit() is explained in commitbf2c59fce4
("sched/core: Fix illegal RCU from offline CPUs"), which added a wart to switch away from the mm but leave it set in active_mm to be cleaned up later. So instead, switch away from the lazy tlb mm at sched_cpu_wait_empty(), which is the last hotplug state before teardown (CPUHP_AP_SCHED_WAIT_EMPTY). This CPU will never switch to a user thread from this point, so it has no chance to pick up a new lazy tlb mm. This removes the lazy tlb mm handling wart in CPU unplug. With this, idle_task_exit() is not needed anymore and can be cleaned up. This leaves the prototype alone, to be cleaned after this change. herton: took the suggestions from https://lore.kernel.org/all/87jzvyprsw.ffs@tglx/ and made adjustments on the initial patch proposed by Nicholas. Link: https://lkml.kernel.org/r/20230524060455.147699-1-npiggin@gmail.com Link: https://lore.kernel.org/all/20230525205253.E2FAEC433EF@smtp.kernel.org/ Link: https://lkml.kernel.org/r/20241104142318.3295663-1-herton@redhat.com Fixes:2655421ae6
("lazy tlb: shoot lazies, non-refcounting lazy tlb mm reference handling scheme") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Herton R. Krzesinski <herton@redhat.com> Suggested-by: Thomas Gleixner <tglx@linutronix.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
315c342b8b
commit
9664c5b908
@ -18,10 +18,6 @@ extern int sched_cpu_dying(unsigned int cpu);
|
||||
# define sched_cpu_dying NULL
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
extern void idle_task_exit(void);
|
||||
#else
|
||||
static inline void idle_task_exit(void) {}
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_SCHED_HOTPLUG_H */
|
||||
|
@ -905,12 +905,13 @@ static int finish_cpu(unsigned int cpu)
|
||||
struct mm_struct *mm = idle->active_mm;
|
||||
|
||||
/*
|
||||
* idle_task_exit() will have switched to &init_mm, now
|
||||
* clean up any remaining active_mm state.
|
||||
* sched_force_init_mm() ensured the use of &init_mm,
|
||||
* drop that refcount now that the CPU has stopped.
|
||||
*/
|
||||
if (mm != &init_mm)
|
||||
idle->active_mm = &init_mm;
|
||||
WARN_ON(mm != &init_mm);
|
||||
idle->active_mm = NULL;
|
||||
mmdrop_lazy_tlb(mm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -7930,19 +7930,26 @@ void sched_setnuma(struct task_struct *p, int nid)
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/*
|
||||
* Ensure that the idle task is using init_mm right before its CPU goes
|
||||
* offline.
|
||||
* Invoked on the outgoing CPU in context of the CPU hotplug thread
|
||||
* after ensuring that there are no user space tasks left on the CPU.
|
||||
*
|
||||
* If there is a lazy mm in use on the hotplug thread, drop it and
|
||||
* switch to init_mm.
|
||||
*
|
||||
* The reference count on init_mm is dropped in finish_cpu().
|
||||
*/
|
||||
void idle_task_exit(void)
|
||||
static void sched_force_init_mm(void)
|
||||
{
|
||||
struct mm_struct *mm = current->active_mm;
|
||||
|
||||
BUG_ON(cpu_online(smp_processor_id()));
|
||||
BUG_ON(current != this_rq()->idle);
|
||||
|
||||
if (mm != &init_mm) {
|
||||
switch_mm(mm, &init_mm, current);
|
||||
mmgrab_lazy_tlb(&init_mm);
|
||||
local_irq_disable();
|
||||
current->active_mm = &init_mm;
|
||||
switch_mm_irqs_off(mm, &init_mm, current);
|
||||
local_irq_enable();
|
||||
finish_arch_post_lock_switch();
|
||||
mmdrop_lazy_tlb(mm);
|
||||
}
|
||||
|
||||
/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
|
||||
@ -8344,6 +8351,7 @@ int sched_cpu_starting(unsigned int cpu)
|
||||
int sched_cpu_wait_empty(unsigned int cpu)
|
||||
{
|
||||
balance_hotplug_wait();
|
||||
sched_force_init_mm();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user