mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-09 14:50:19 +00:00
85fbd722ad
Freezable kthreads and workqueues are fundamentally problematic in that they effectively introduce a big kernel lock widely used in the kernel and have already been the culprit of several deadlock scenarios. This is the latest occurrence. During resume, libata rescans all the ports and revalidates all pre-existing devices. If it determines that a device has gone missing, the device is removed from the system which involves invalidating block device and flushing bdi while holding driver core layer locks. Unfortunately, this can race with the rest of device resume. Because freezable kthreads and workqueues are thawed after device resume is complete and block device removal depends on freezable workqueues and kthreads (e.g. bdi_wq, jbd2) to make progress, this can lead to deadlock - block device removal can't proceed because kthreads are frozen and kthreads can't be thawed because device resume is blocked behind block device removal. 839a8e8660b6 ("writeback: replace custom worker pool implementation with unbound workqueue") made this particular deadlock scenario more visible but the underlying problem has always been there - the original forker task and jbd2 are freezable too. In fact, this is highly likely just one of many possible deadlock scenarios given that freezer behaves as a big kernel lock and we don't have any debug mechanism around it. I believe the right thing to do is getting rid of freezable kthreads and workqueues. This is something fundamentally broken. For now, implement a funny workaround in libata - just avoid doing block device hot[un]plug while the system is frozen. Kernel engineering at its finest. :( v2: Add EXPORT_SYMBOL_GPL(pm_freezing) for cases where libata is built as a module. v3: Comment updated and polling interval changed to 10ms as suggested by Rafael. v4: Add #ifdef CONFIG_FREEZER around the hack as pm_freezing is not defined when FREEZER is not configured thus breaking build. Reported by kbuild test robot. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Tomaž Šolc <tomaz.solc@tablix.org> Reviewed-by: "Rafael J. Wysocki" <rjw@rjwysocki.net> Link: https://bugzilla.kernel.org/show_bug.cgi?id=62801 Link: http://lkml.kernel.org/r/20131213174932.GA27070@htj.dyndns.org Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Len Brown <len.brown@intel.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: stable@vger.kernel.org Cc: kbuild test robot <fengguang.wu@intel.com>
183 lines
4.6 KiB
C
183 lines
4.6 KiB
C
/*
|
|
* kernel/freezer.c - Function to freeze a process
|
|
*
|
|
* Originally from kernel/power/process.c
|
|
*/
|
|
|
|
#include <linux/interrupt.h>
|
|
#include <linux/suspend.h>
|
|
#include <linux/export.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/freezer.h>
|
|
#include <linux/kthread.h>
|
|
|
|
/* total number of freezing conditions in effect */
|
|
atomic_t system_freezing_cnt = ATOMIC_INIT(0);
|
|
EXPORT_SYMBOL(system_freezing_cnt);
|
|
|
|
/* indicate whether PM freezing is in effect, protected by pm_mutex */
|
|
bool pm_freezing;
|
|
bool pm_nosig_freezing;
|
|
|
|
/*
|
|
* Temporary export for the deadlock workaround in ata_scsi_hotplug().
|
|
* Remove once the hack becomes unnecessary.
|
|
*/
|
|
EXPORT_SYMBOL_GPL(pm_freezing);
|
|
|
|
/* protects freezing and frozen transitions */
|
|
static DEFINE_SPINLOCK(freezer_lock);
|
|
|
|
/**
|
|
* freezing_slow_path - slow path for testing whether a task needs to be frozen
|
|
* @p: task to be tested
|
|
*
|
|
* This function is called by freezing() if system_freezing_cnt isn't zero
|
|
* and tests whether @p needs to enter and stay in frozen state. Can be
|
|
* called under any context. The freezers are responsible for ensuring the
|
|
* target tasks see the updated state.
|
|
*/
|
|
bool freezing_slow_path(struct task_struct *p)
|
|
{
|
|
if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
|
|
return false;
|
|
|
|
if (pm_nosig_freezing || cgroup_freezing(p))
|
|
return true;
|
|
|
|
if (pm_freezing && !(p->flags & PF_KTHREAD))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL(freezing_slow_path);
|
|
|
|
/* Refrigerator is place where frozen processes are stored :-). */
|
|
bool __refrigerator(bool check_kthr_stop)
|
|
{
|
|
/* Hmm, should we be allowed to suspend when there are realtime
|
|
processes around? */
|
|
bool was_frozen = false;
|
|
long save = current->state;
|
|
|
|
pr_debug("%s entered refrigerator\n", current->comm);
|
|
|
|
for (;;) {
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
|
|
|
spin_lock_irq(&freezer_lock);
|
|
current->flags |= PF_FROZEN;
|
|
if (!freezing(current) ||
|
|
(check_kthr_stop && kthread_should_stop()))
|
|
current->flags &= ~PF_FROZEN;
|
|
spin_unlock_irq(&freezer_lock);
|
|
|
|
if (!(current->flags & PF_FROZEN))
|
|
break;
|
|
was_frozen = true;
|
|
schedule();
|
|
}
|
|
|
|
pr_debug("%s left refrigerator\n", current->comm);
|
|
|
|
/*
|
|
* Restore saved task state before returning. The mb'd version
|
|
* needs to be used; otherwise, it might silently break
|
|
* synchronization which depends on ordered task state change.
|
|
*/
|
|
set_current_state(save);
|
|
|
|
return was_frozen;
|
|
}
|
|
EXPORT_SYMBOL(__refrigerator);
|
|
|
|
static void fake_signal_wake_up(struct task_struct *p)
|
|
{
|
|
unsigned long flags;
|
|
|
|
if (lock_task_sighand(p, &flags)) {
|
|
signal_wake_up(p, 0);
|
|
unlock_task_sighand(p, &flags);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* freeze_task - send a freeze request to given task
|
|
* @p: task to send the request to
|
|
*
|
|
* If @p is freezing, the freeze request is sent either by sending a fake
|
|
* signal (if it's not a kernel thread) or waking it up (if it's a kernel
|
|
* thread).
|
|
*
|
|
* RETURNS:
|
|
* %false, if @p is not freezing or already frozen; %true, otherwise
|
|
*/
|
|
bool freeze_task(struct task_struct *p)
|
|
{
|
|
unsigned long flags;
|
|
|
|
/*
|
|
* This check can race with freezer_do_not_count, but worst case that
|
|
* will result in an extra wakeup being sent to the task. It does not
|
|
* race with freezer_count(), the barriers in freezer_count() and
|
|
* freezer_should_skip() ensure that either freezer_count() sees
|
|
* freezing == true in try_to_freeze() and freezes, or
|
|
* freezer_should_skip() sees !PF_FREEZE_SKIP and freezes the task
|
|
* normally.
|
|
*/
|
|
if (freezer_should_skip(p))
|
|
return false;
|
|
|
|
spin_lock_irqsave(&freezer_lock, flags);
|
|
if (!freezing(p) || frozen(p)) {
|
|
spin_unlock_irqrestore(&freezer_lock, flags);
|
|
return false;
|
|
}
|
|
|
|
if (!(p->flags & PF_KTHREAD))
|
|
fake_signal_wake_up(p);
|
|
else
|
|
wake_up_state(p, TASK_INTERRUPTIBLE);
|
|
|
|
spin_unlock_irqrestore(&freezer_lock, flags);
|
|
return true;
|
|
}
|
|
|
|
void __thaw_task(struct task_struct *p)
|
|
{
|
|
unsigned long flags;
|
|
|
|
/*
|
|
* Clear freezing and kick @p if FROZEN. Clearing is guaranteed to
|
|
* be visible to @p as waking up implies wmb. Waking up inside
|
|
* freezer_lock also prevents wakeups from leaking outside
|
|
* refrigerator.
|
|
*/
|
|
spin_lock_irqsave(&freezer_lock, flags);
|
|
if (frozen(p))
|
|
wake_up_process(p);
|
|
spin_unlock_irqrestore(&freezer_lock, flags);
|
|
}
|
|
|
|
/**
|
|
* set_freezable - make %current freezable
|
|
*
|
|
* Mark %current freezable and enter refrigerator if necessary.
|
|
*/
|
|
bool set_freezable(void)
|
|
{
|
|
might_sleep();
|
|
|
|
/*
|
|
* Modify flags while holding freezer_lock. This ensures the
|
|
* freezer notices that we aren't frozen yet or the freezing
|
|
* condition is visible to try_to_freeze() below.
|
|
*/
|
|
spin_lock_irq(&freezer_lock);
|
|
current->flags &= ~PF_NOFREEZE;
|
|
spin_unlock_irq(&freezer_lock);
|
|
|
|
return try_to_freeze();
|
|
}
|
|
EXPORT_SYMBOL(set_freezable);
|