linux-stable/kernel/sched/completion.c
Oleg Nesterov bc9560155f sched/completion: Serialize completion_done() with complete()
Commit de30ec47302c "Remove unnecessary ->wait.lock serialization when
reading completion state" was not correct, without lock/unlock the code
like stop_machine_from_inactive_cpu()

	while (!completion_done())
		cpu_relax();

can return before complete() finishes its spin_unlock() which writes to
this memory. And spin_unlock_wait().

While at it, change try_wait_for_completion() to use READ_ONCE().

Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reported-by: Davidlohr Bueso <dave@stgolabs.net>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ Added a comment with the barrier. ]
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Mc Guire <der.herr@hofr.at>
Cc: raghavendra.kt@linux.vnet.ibm.com
Cc: waiman.long@hp.com
Fixes: de30ec47302c ("sched/completion: Remove unnecessary ->wait.lock serialization when reading completion state")
Link: http://lkml.kernel.org/r/20150212195913.GA30430@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-02-18 14:27:40 +01:00

318 lines
9.6 KiB
C

/*
* Generic wait-for-completion handler;
*
* It differs from semaphores in that their default case is the opposite,
* wait_for_completion default blocks whereas semaphore default non-block. The
* interface also makes it easy to 'complete' multiple waiting threads,
* something which isn't entirely natural for semaphores.
*
* But more importantly, the primitive documents the usage. Semaphores would
* typically be used for exclusion which gives rise to priority inversion.
* Waiting for completion is a typically sync point, but not an exclusion point.
*/
#include <linux/sched.h>
#include <linux/completion.h>
/**
* complete: - signals a single thread waiting on this completion
* @x: holds the state of this particular completion
*
* This will wake up a single thread waiting on this completion. Threads will be
* awakened in the same order in which they were queued.
*
* See also complete_all(), wait_for_completion() and related routines.
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void complete(struct completion *x)
{
unsigned long flags;
spin_lock_irqsave(&x->wait.lock, flags);
x->done++;
__wake_up_locked(&x->wait, TASK_NORMAL, 1);
spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete);
/**
* complete_all: - signals all threads waiting on this completion
* @x: holds the state of this particular completion
*
* This will wake up all threads waiting on this particular completion event.
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void complete_all(struct completion *x)
{
unsigned long flags;
spin_lock_irqsave(&x->wait.lock, flags);
x->done += UINT_MAX/2;
__wake_up_locked(&x->wait, TASK_NORMAL, 0);
spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete_all);
static inline long __sched
do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
if (!x->done) {
DECLARE_WAITQUEUE(wait, current);
__add_wait_queue_tail_exclusive(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__set_current_state(state);
spin_unlock_irq(&x->wait.lock);
timeout = action(timeout);
spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
__remove_wait_queue(&x->wait, &wait);
if (!x->done)
return timeout;
}
x->done--;
return timeout ?: 1;
}
static inline long __sched
__wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
might_sleep();
spin_lock_irq(&x->wait.lock);
timeout = do_wait_for_common(x, action, timeout, state);
spin_unlock_irq(&x->wait.lock);
return timeout;
}
static long __sched
wait_for_common(struct completion *x, long timeout, int state)
{
return __wait_for_common(x, schedule_timeout, timeout, state);
}
static long __sched
wait_for_common_io(struct completion *x, long timeout, int state)
{
return __wait_for_common(x, io_schedule_timeout, timeout, state);
}
/**
* wait_for_completion: - waits for completion of a task
* @x: holds the state of this particular completion
*
* This waits to be signaled for completion of a specific task. It is NOT
* interruptible and there is no timeout.
*
* See also similar routines (i.e. wait_for_completion_timeout()) with timeout
* and interrupt capability. Also see complete().
*/
void __sched wait_for_completion(struct completion *x)
{
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion);
/**
* wait_for_completion_timeout: - waits for completion of a task (w/timeout)
* @x: holds the state of this particular completion
* @timeout: timeout value in jiffies
*
* This waits for either a completion of a specific task to be signaled or for a
* specified timeout to expire. The timeout is in jiffies. It is not
* interruptible.
*
* Return: 0 if timed out, and positive (at least 1, or number of jiffies left
* till timeout) if completed.
*/
unsigned long __sched
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
{
return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion_timeout);
/**
* wait_for_completion_io: - waits for completion of a task
* @x: holds the state of this particular completion
*
* This waits to be signaled for completion of a specific task. It is NOT
* interruptible and there is no timeout. The caller is accounted as waiting
* for IO (which traditionally means blkio only).
*/
void __sched wait_for_completion_io(struct completion *x)
{
wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion_io);
/**
* wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
* @x: holds the state of this particular completion
* @timeout: timeout value in jiffies
*
* This waits for either a completion of a specific task to be signaled or for a
* specified timeout to expire. The timeout is in jiffies. It is not
* interruptible. The caller is accounted as waiting for IO (which traditionally
* means blkio only).
*
* Return: 0 if timed out, and positive (at least 1, or number of jiffies left
* till timeout) if completed.
*/
unsigned long __sched
wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
{
return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion_io_timeout);
/**
* wait_for_completion_interruptible: - waits for completion of a task (w/intr)
* @x: holds the state of this particular completion
*
* This waits for completion of a specific task to be signaled. It is
* interruptible.
*
* Return: -ERESTARTSYS if interrupted, 0 if completed.
*/
int __sched wait_for_completion_interruptible(struct completion *x)
{
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
if (t == -ERESTARTSYS)
return t;
return 0;
}
EXPORT_SYMBOL(wait_for_completion_interruptible);
/**
* wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
* @x: holds the state of this particular completion
* @timeout: timeout value in jiffies
*
* This waits for either a completion of a specific task to be signaled or for a
* specified timeout to expire. It is interruptible. The timeout is in jiffies.
*
* Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
* or number of jiffies left till timeout) if completed.
*/
long __sched
wait_for_completion_interruptible_timeout(struct completion *x,
unsigned long timeout)
{
return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
/**
* wait_for_completion_killable: - waits for completion of a task (killable)
* @x: holds the state of this particular completion
*
* This waits to be signaled for completion of a specific task. It can be
* interrupted by a kill signal.
*
* Return: -ERESTARTSYS if interrupted, 0 if completed.
*/
int __sched wait_for_completion_killable(struct completion *x)
{
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
if (t == -ERESTARTSYS)
return t;
return 0;
}
EXPORT_SYMBOL(wait_for_completion_killable);
/**
* wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
* @x: holds the state of this particular completion
* @timeout: timeout value in jiffies
*
* This waits for either a completion of a specific task to be
* signaled or for a specified timeout to expire. It can be
* interrupted by a kill signal. The timeout is in jiffies.
*
* Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
* or number of jiffies left till timeout) if completed.
*/
long __sched
wait_for_completion_killable_timeout(struct completion *x,
unsigned long timeout)
{
return wait_for_common(x, timeout, TASK_KILLABLE);
}
EXPORT_SYMBOL(wait_for_completion_killable_timeout);
/**
* try_wait_for_completion - try to decrement a completion without blocking
* @x: completion structure
*
* Return: 0 if a decrement cannot be done without blocking
* 1 if a decrement succeeded.
*
* If a completion is being used as a counting completion,
* attempt to decrement the counter without blocking. This
* enables us to avoid waiting if the resource the completion
* is protecting is not available.
*/
bool try_wait_for_completion(struct completion *x)
{
unsigned long flags;
int ret = 1;
/*
* Since x->done will need to be locked only
* in the non-blocking case, we check x->done
* first without taking the lock so we can
* return early in the blocking case.
*/
if (!READ_ONCE(x->done))
return 0;
spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
else
x->done--;
spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(try_wait_for_completion);
/**
* completion_done - Test to see if a completion has any waiters
* @x: completion structure
*
* Return: 0 if there are waiters (wait_for_completion() in progress)
* 1 if there are no waiters.
*
*/
bool completion_done(struct completion *x)
{
if (!READ_ONCE(x->done))
return false;
/*
* If ->done, we need to wait for complete() to release ->wait.lock
* otherwise we can end up freeing the completion before complete()
* is done referencing it.
*
* The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
* the loads of ->done and ->wait.lock such that we cannot observe
* the lock before complete() acquires it while observing the ->done
* after it's acquired the lock.
*/
smp_rmb();
spin_unlock_wait(&x->wait.lock);
return true;
}
EXPORT_SYMBOL(completion_done);