mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock
Matt reported the following deadlock: CPU0 CPU1 schedule(.prev=migrate/0) <fault> pick_next_task() ... idle_balance() migrate_swap() active_balance() stop_two_cpus() spin_lock(stopper0->lock) spin_lock(stopper1->lock) ttwu(migrate/0) smp_cond_load_acquire() -- waits for schedule() stop_one_cpu(1) spin_lock(stopper1->lock) -- waits for stopper lock Fix this deadlock by taking the wakeups out from under stopper->lock. This allows the active_balance() to queue the stop work and finish the context switch, which in turn allows the wakeup from migrate_swap() to observe the context and complete the wakeup. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reported-by: Matt Fleming <matt@codeblueprint.co.uk> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Matt Fleming <matt@codeblueprint.co.uk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Galbraith <umgwanakikbuti@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20180420095005.GH4064@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
f4ef6a438c
commit
0b26351b91
@ -21,6 +21,7 @@
|
|||||||
#include <linux/smpboot.h>
|
#include <linux/smpboot.h>
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
#include <linux/nmi.h>
|
#include <linux/nmi.h>
|
||||||
|
#include <linux/sched/wake_q.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Structure to determine completion condition and record errors. May
|
* Structure to determine completion condition and record errors. May
|
||||||
@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
|
static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
|
||||||
struct cpu_stop_work *work)
|
struct cpu_stop_work *work,
|
||||||
|
struct wake_q_head *wakeq)
|
||||||
{
|
{
|
||||||
list_add_tail(&work->list, &stopper->works);
|
list_add_tail(&work->list, &stopper->works);
|
||||||
wake_up_process(stopper->thread);
|
wake_q_add(wakeq, stopper->thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* queue @work to @stopper. if offline, @work is completed immediately */
|
/* queue @work to @stopper. if offline, @work is completed immediately */
|
||||||
static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
|
static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
|
||||||
{
|
{
|
||||||
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
|
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
|
||||||
|
DEFINE_WAKE_Q(wakeq);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
bool enabled;
|
bool enabled;
|
||||||
|
|
||||||
spin_lock_irqsave(&stopper->lock, flags);
|
spin_lock_irqsave(&stopper->lock, flags);
|
||||||
enabled = stopper->enabled;
|
enabled = stopper->enabled;
|
||||||
if (enabled)
|
if (enabled)
|
||||||
__cpu_stop_queue_work(stopper, work);
|
__cpu_stop_queue_work(stopper, work, &wakeq);
|
||||||
else if (work->done)
|
else if (work->done)
|
||||||
cpu_stop_signal_done(work->done);
|
cpu_stop_signal_done(work->done);
|
||||||
spin_unlock_irqrestore(&stopper->lock, flags);
|
spin_unlock_irqrestore(&stopper->lock, flags);
|
||||||
|
|
||||||
|
wake_up_q(&wakeq);
|
||||||
|
|
||||||
return enabled;
|
return enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
|
|||||||
{
|
{
|
||||||
struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
|
struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
|
||||||
struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
|
struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
|
||||||
|
DEFINE_WAKE_Q(wakeq);
|
||||||
int err;
|
int err;
|
||||||
retry:
|
retry:
|
||||||
spin_lock_irq(&stopper1->lock);
|
spin_lock_irq(&stopper1->lock);
|
||||||
@ -252,8 +258,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
|
|||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
err = 0;
|
err = 0;
|
||||||
__cpu_stop_queue_work(stopper1, work1);
|
__cpu_stop_queue_work(stopper1, work1, &wakeq);
|
||||||
__cpu_stop_queue_work(stopper2, work2);
|
__cpu_stop_queue_work(stopper2, work2, &wakeq);
|
||||||
unlock:
|
unlock:
|
||||||
spin_unlock(&stopper2->lock);
|
spin_unlock(&stopper2->lock);
|
||||||
spin_unlock_irq(&stopper1->lock);
|
spin_unlock_irq(&stopper1->lock);
|
||||||
@ -263,6 +269,9 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
|
|||||||
cpu_relax();
|
cpu_relax();
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wake_up_q(&wakeq);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user