mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-04 12:13:43 +00:00
mm/z3fold.c: fix race between migration and destruction
In z3fold_destroy_pool() we call destroy_workqueue(&pool->compact_wq). However, we have no guarantee that migration isn't happening in the background at that time. Migration directly calls queue_work_on(pool->compact_wq), if destruction wins that race we are using a destroyed workqueue. Link: http://lkml.kernel.org/r/20190809213828.202833-1-henryburns@google.com Signed-off-by: Henry Burns <henryburns@google.com> Cc: Vitaly Wool <vitalywool@gmail.com> Cc: Shakeel Butt <shakeelb@google.com> Cc: Jonathan Adams <jwadams@google.com> Cc: Henry Burns <henrywolfeburns@gmail.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
361469211f
commit
d776aaa989
89
mm/z3fold.c
89
mm/z3fold.c
@ -41,6 +41,7 @@
|
|||||||
#include <linux/workqueue.h>
|
#include <linux/workqueue.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
|
#include <linux/wait.h>
|
||||||
#include <linux/zpool.h>
|
#include <linux/zpool.h>
|
||||||
#include <linux/magic.h>
|
#include <linux/magic.h>
|
||||||
|
|
||||||
@ -145,6 +146,8 @@ struct z3fold_header {
|
|||||||
* @release_wq: workqueue for safe page release
|
* @release_wq: workqueue for safe page release
|
||||||
* @work: work_struct for safe page release
|
* @work: work_struct for safe page release
|
||||||
* @inode: inode for z3fold pseudo filesystem
|
* @inode: inode for z3fold pseudo filesystem
|
||||||
|
* @destroying: bool to stop migration once we start destruction
|
||||||
|
* @isolated: int to count the number of pages currently in isolation
|
||||||
*
|
*
|
||||||
* This structure is allocated at pool creation time and maintains metadata
|
* This structure is allocated at pool creation time and maintains metadata
|
||||||
* pertaining to a particular z3fold pool.
|
* pertaining to a particular z3fold pool.
|
||||||
@ -163,8 +166,11 @@ struct z3fold_pool {
|
|||||||
const struct zpool_ops *zpool_ops;
|
const struct zpool_ops *zpool_ops;
|
||||||
struct workqueue_struct *compact_wq;
|
struct workqueue_struct *compact_wq;
|
||||||
struct workqueue_struct *release_wq;
|
struct workqueue_struct *release_wq;
|
||||||
|
struct wait_queue_head isolate_wait;
|
||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
|
bool destroying;
|
||||||
|
int isolated;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -769,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
|
|||||||
goto out_c;
|
goto out_c;
|
||||||
spin_lock_init(&pool->lock);
|
spin_lock_init(&pool->lock);
|
||||||
spin_lock_init(&pool->stale_lock);
|
spin_lock_init(&pool->stale_lock);
|
||||||
|
init_waitqueue_head(&pool->isolate_wait);
|
||||||
pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
|
pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
|
||||||
if (!pool->unbuddied)
|
if (!pool->unbuddied)
|
||||||
goto out_pool;
|
goto out_pool;
|
||||||
@ -808,6 +815,15 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool pool_isolated_are_drained(struct z3fold_pool *pool)
|
||||||
|
{
|
||||||
|
bool ret;
|
||||||
|
|
||||||
|
spin_lock(&pool->lock);
|
||||||
|
ret = pool->isolated == 0;
|
||||||
|
spin_unlock(&pool->lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* z3fold_destroy_pool() - destroys an existing z3fold pool
|
* z3fold_destroy_pool() - destroys an existing z3fold pool
|
||||||
* @pool: the z3fold pool to be destroyed
|
* @pool: the z3fold pool to be destroyed
|
||||||
@ -817,6 +833,22 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
|
|||||||
static void z3fold_destroy_pool(struct z3fold_pool *pool)
|
static void z3fold_destroy_pool(struct z3fold_pool *pool)
|
||||||
{
|
{
|
||||||
kmem_cache_destroy(pool->c_handle);
|
kmem_cache_destroy(pool->c_handle);
|
||||||
|
/*
|
||||||
|
* We set pool-> destroying under lock to ensure that
|
||||||
|
* z3fold_page_isolate() sees any changes to destroying. This way we
|
||||||
|
* avoid the need for any memory barriers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
spin_lock(&pool->lock);
|
||||||
|
pool->destroying = true;
|
||||||
|
spin_unlock(&pool->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to ensure that no pages are being migrated while we destroy
|
||||||
|
* these workqueues, as migration can queue work on either of the
|
||||||
|
* workqueues.
|
||||||
|
*/
|
||||||
|
wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to destroy pool->compact_wq before pool->release_wq,
|
* We need to destroy pool->compact_wq before pool->release_wq,
|
||||||
@ -1307,6 +1339,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
|
|||||||
return atomic64_read(&pool->pages_nr);
|
return atomic64_read(&pool->pages_nr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* z3fold_dec_isolated() expects to be called while pool->lock is held.
|
||||||
|
*/
|
||||||
|
static void z3fold_dec_isolated(struct z3fold_pool *pool)
|
||||||
|
{
|
||||||
|
assert_spin_locked(&pool->lock);
|
||||||
|
VM_BUG_ON(pool->isolated <= 0);
|
||||||
|
pool->isolated--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we have no more isolated pages, we have to see if
|
||||||
|
* z3fold_destroy_pool() is waiting for a signal.
|
||||||
|
*/
|
||||||
|
if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
|
||||||
|
wake_up_all(&pool->isolate_wait);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void z3fold_inc_isolated(struct z3fold_pool *pool)
|
||||||
|
{
|
||||||
|
pool->isolated++;
|
||||||
|
}
|
||||||
|
|
||||||
static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
|
static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
|
||||||
{
|
{
|
||||||
struct z3fold_header *zhdr;
|
struct z3fold_header *zhdr;
|
||||||
@ -1333,6 +1387,33 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
|
|||||||
spin_lock(&pool->lock);
|
spin_lock(&pool->lock);
|
||||||
if (!list_empty(&page->lru))
|
if (!list_empty(&page->lru))
|
||||||
list_del(&page->lru);
|
list_del(&page->lru);
|
||||||
|
/*
|
||||||
|
* We need to check for destruction while holding pool->lock, as
|
||||||
|
* otherwise destruction could see 0 isolated pages, and
|
||||||
|
* proceed.
|
||||||
|
*/
|
||||||
|
if (unlikely(pool->destroying)) {
|
||||||
|
spin_unlock(&pool->lock);
|
||||||
|
/*
|
||||||
|
* If this page isn't stale, somebody else holds a
|
||||||
|
* reference to it. Let't drop our refcount so that they
|
||||||
|
* can call the release logic.
|
||||||
|
*/
|
||||||
|
if (unlikely(kref_put(&zhdr->refcount,
|
||||||
|
release_z3fold_page_locked))) {
|
||||||
|
/*
|
||||||
|
* If we get here we have kref problems, so we
|
||||||
|
* should freak out.
|
||||||
|
*/
|
||||||
|
WARN(1, "Z3fold is experiencing kref problems\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
z3fold_page_unlock(zhdr);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
z3fold_inc_isolated(pool);
|
||||||
spin_unlock(&pool->lock);
|
spin_unlock(&pool->lock);
|
||||||
z3fold_page_unlock(zhdr);
|
z3fold_page_unlock(zhdr);
|
||||||
return true;
|
return true;
|
||||||
@ -1401,6 +1482,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
|
|||||||
|
|
||||||
queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
|
queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
|
||||||
|
|
||||||
|
spin_lock(&pool->lock);
|
||||||
|
z3fold_dec_isolated(pool);
|
||||||
|
spin_unlock(&pool->lock);
|
||||||
|
|
||||||
page_mapcount_reset(page);
|
page_mapcount_reset(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
return 0;
|
return 0;
|
||||||
@ -1420,10 +1505,14 @@ static void z3fold_page_putback(struct page *page)
|
|||||||
INIT_LIST_HEAD(&page->lru);
|
INIT_LIST_HEAD(&page->lru);
|
||||||
if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
|
if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
|
||||||
atomic64_dec(&pool->pages_nr);
|
atomic64_dec(&pool->pages_nr);
|
||||||
|
spin_lock(&pool->lock);
|
||||||
|
z3fold_dec_isolated(pool);
|
||||||
|
spin_unlock(&pool->lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
spin_lock(&pool->lock);
|
spin_lock(&pool->lock);
|
||||||
list_add(&page->lru, &pool->lru);
|
list_add(&page->lru, &pool->lru);
|
||||||
|
z3fold_dec_isolated(pool);
|
||||||
spin_unlock(&pool->lock);
|
spin_unlock(&pool->lock);
|
||||||
z3fold_page_unlock(zhdr);
|
z3fold_page_unlock(zhdr);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user