mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-18 06:15:12 +00:00
117aad1e9e
Isolated balloon pages can wrongly end up in LRU lists when migrate_pages() finishes its round without draining all the isolated page list. The same issue can happen when reclaim_clean_pages_from_list() tries to reclaim pages from an isolated page list, before migration, in the CMA path. Such balloon page leak opens a race window against LRU lists shrinkers that leads us to the following kernel panic: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 IP: [<ffffffff810c2625>] shrink_page_list+0x24e/0x897 PGD 3cda2067 PUD 3d713067 PMD 0 Oops: 0000 [#1] SMP CPU: 0 PID: 340 Comm: kswapd0 Not tainted 3.12.0-rc1-22626-g4367597 #87 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 RIP: shrink_page_list+0x24e/0x897 RSP: 0000:ffff88003da499b8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88003e82bd60 RCX: 00000000000657d5 RDX: 0000000000000000 RSI: 000000000000031f RDI: ffff88003e82bd40 RBP: ffff88003da49ab0 R08: 0000000000000001 R09: 0000000081121a45 R10: ffffffff81121a45 R11: ffff88003c4a9a28 R12: ffff88003e82bd40 R13: ffff88003da0e800 R14: 0000000000000001 R15: ffff88003da49d58 FS: 0000000000000000(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000067d9000 CR3: 000000003ace5000 CR4: 00000000000407b0 Call Trace: shrink_inactive_list+0x240/0x3de shrink_lruvec+0x3e0/0x566 __shrink_zone+0x94/0x178 shrink_zone+0x3a/0x82 balance_pgdat+0x32a/0x4c2 kswapd+0x2f0/0x372 kthread+0xa2/0xaa ret_from_fork+0x7c/0xb0 Code: 80 7d 8f 01 48 83 95 68 ff ff ff 00 4c 89 e7 e8 5a 7b 00 00 48 85 c0 49 89 c5 75 08 80 7d 8f 00 74 3e eb 31 48 8b 80 18 01 00 00 <48> 8b 74 0d 48 8b 78 30 be 02 00 00 00 ff d2 eb RIP [<ffffffff810c2625>] shrink_page_list+0x24e/0x897 RSP <ffff88003da499b8> CR2: 0000000000000028 ---[ end trace 703d2451af6ffbfd ]--- Kernel panic - not syncing: Fatal exception This patch fixes the issue, by assuring the proper tests are made at putback_movable_pages() & reclaim_clean_pages_from_list() to avoid isolated balloon pages being wrongly reinserted in LRU lists. [akpm@linux-foundation.org: clarify awkward comment text] Signed-off-by: Rafael Aquini <aquini@redhat.com> Reported-by: Luiz Capitulino <lcapitulino@redhat.com> Tested-by: Luiz Capitulino <lcapitulino@redhat.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Rik van Riel <riel@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
298 lines
9.8 KiB
C
298 lines
9.8 KiB
C
/*
|
|
* include/linux/balloon_compaction.h
|
|
*
|
|
* Common interface definitions for making balloon pages movable by compaction.
|
|
*
|
|
* Despite being perfectly possible to perform ballooned pages migration, they
|
|
* make a special corner case to compaction scans because balloon pages are not
|
|
* enlisted at any LRU list like the other pages we do compact / migrate.
|
|
*
|
|
* As the page isolation scanning step a compaction thread does is a lockless
|
|
* procedure (from a page standpoint), it might bring some racy situations while
|
|
* performing balloon page compaction. In order to sort out these racy scenarios
|
|
* and safely perform balloon's page compaction and migration we must, always,
|
|
* ensure following these three simple rules:
|
|
*
|
|
* i. when updating a balloon's page ->mapping element, strictly do it under
|
|
* the following lock order, independently of the far superior
|
|
* locking scheme (lru_lock, balloon_lock):
|
|
* +-page_lock(page);
|
|
* +--spin_lock_irq(&b_dev_info->pages_lock);
|
|
* ... page->mapping updates here ...
|
|
*
|
|
* ii. before isolating or dequeueing a balloon page from the balloon device
|
|
* pages list, the page reference counter must be raised by one and the
|
|
* extra refcount must be dropped when the page is enqueued back into
|
|
* the balloon device page list, thus a balloon page keeps its reference
|
|
* counter raised only while it is under our special handling;
|
|
*
|
|
* iii. after the lockless scan step have selected a potential balloon page for
|
|
* isolation, re-test the page->mapping flags and the page ref counter
|
|
* under the proper page lock, to ensure isolating a valid balloon page
|
|
* (not yet isolated, nor under release procedure)
|
|
*
|
|
* The functions provided by this interface are placed to help on coping with
|
|
* the aforementioned balloon page corner case, as well as to ensure the simple
|
|
* set of exposed rules are satisfied while we are dealing with balloon pages
|
|
* compaction / migration.
|
|
*
|
|
* Copyright (C) 2012, Red Hat, Inc. Rafael Aquini <aquini@redhat.com>
|
|
*/
|
|
#ifndef _LINUX_BALLOON_COMPACTION_H
|
|
#define _LINUX_BALLOON_COMPACTION_H
|
|
#include <linux/pagemap.h>
|
|
#include <linux/page-flags.h>
|
|
#include <linux/migrate.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/err.h>
|
|
|
|
/*
|
|
* Balloon device information descriptor.
|
|
* This struct is used to allow the common balloon compaction interface
|
|
* procedures to find the proper balloon device holding memory pages they'll
|
|
* have to cope for page compaction / migration, as well as it serves the
|
|
* balloon driver as a page book-keeper for its registered balloon devices.
|
|
*/
|
|
struct balloon_dev_info {
|
|
void *balloon_device; /* balloon device descriptor */
|
|
struct address_space *mapping; /* balloon special page->mapping */
|
|
unsigned long isolated_pages; /* # of isolated pages for migration */
|
|
spinlock_t pages_lock; /* Protection to pages list */
|
|
struct list_head pages; /* Pages enqueued & handled to Host */
|
|
};
|
|
|
|
extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info);
|
|
extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info);
|
|
extern struct balloon_dev_info *balloon_devinfo_alloc(
|
|
void *balloon_dev_descriptor);
|
|
|
|
static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
|
|
{
|
|
kfree(b_dev_info);
|
|
}
|
|
|
|
/*
|
|
* balloon_page_free - release a balloon page back to the page free lists
|
|
* @page: ballooned page to be set free
|
|
*
|
|
* This function must be used to properly set free an isolated/dequeued balloon
|
|
* page at the end of a sucessful page migration, or at the balloon driver's
|
|
* page release procedure.
|
|
*/
|
|
static inline void balloon_page_free(struct page *page)
|
|
{
|
|
/*
|
|
* Balloon pages always get an extra refcount before being isolated
|
|
* and before being dequeued to help on sorting out fortuite colisions
|
|
* between a thread attempting to isolate and another thread attempting
|
|
* to release the very same balloon page.
|
|
*
|
|
* Before we handle the page back to Buddy, lets drop its extra refcnt.
|
|
*/
|
|
put_page(page);
|
|
__free_page(page);
|
|
}
|
|
|
|
#ifdef CONFIG_BALLOON_COMPACTION
|
|
extern bool balloon_page_isolate(struct page *page);
|
|
extern void balloon_page_putback(struct page *page);
|
|
extern int balloon_page_migrate(struct page *newpage,
|
|
struct page *page, enum migrate_mode mode);
|
|
extern struct address_space
|
|
*balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
|
|
const struct address_space_operations *a_ops);
|
|
|
|
static inline void balloon_mapping_free(struct address_space *balloon_mapping)
|
|
{
|
|
kfree(balloon_mapping);
|
|
}
|
|
|
|
/*
|
|
* page_flags_cleared - helper to perform balloon @page ->flags tests.
|
|
*
|
|
* As balloon pages are obtained from buddy and we do not play with page->flags
|
|
* at driver level (exception made when we get the page lock for compaction),
|
|
* we can safely identify a ballooned page by checking if the
|
|
* PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared. This approach also
|
|
* helps us skip ballooned pages that are locked for compaction or release, thus
|
|
* mitigating their racy check at balloon_page_movable()
|
|
*/
|
|
static inline bool page_flags_cleared(struct page *page)
|
|
{
|
|
return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP);
|
|
}
|
|
|
|
/*
|
|
* __is_movable_balloon_page - helper to perform @page mapping->flags tests
|
|
*/
|
|
static inline bool __is_movable_balloon_page(struct page *page)
|
|
{
|
|
struct address_space *mapping = page->mapping;
|
|
return mapping_balloon(mapping);
|
|
}
|
|
|
|
/*
|
|
* balloon_page_movable - test page->mapping->flags to identify balloon pages
|
|
* that can be moved by compaction/migration.
|
|
*
|
|
* This function is used at core compaction's page isolation scheme, therefore
|
|
* most pages exposed to it are not enlisted as balloon pages and so, to avoid
|
|
* undesired side effects like racing against __free_pages(), we cannot afford
|
|
* holding the page locked while testing page->mapping->flags here.
|
|
*
|
|
* As we might return false positives in the case of a balloon page being just
|
|
* released under us, the page->mapping->flags need to be re-tested later,
|
|
* under the proper page lock, at the functions that will be coping with the
|
|
* balloon page case.
|
|
*/
|
|
static inline bool balloon_page_movable(struct page *page)
|
|
{
|
|
/*
|
|
* Before dereferencing and testing mapping->flags, let's make sure
|
|
* this is not a page that uses ->mapping in a different way
|
|
*/
|
|
if (page_flags_cleared(page) && !page_mapped(page) &&
|
|
page_count(page) == 1)
|
|
return __is_movable_balloon_page(page);
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* isolated_balloon_page - identify an isolated balloon page on private
|
|
* compaction/migration page lists.
|
|
*
|
|
* After a compaction thread isolates a balloon page for migration, it raises
|
|
* the page refcount to prevent concurrent compaction threads from re-isolating
|
|
* the same page. For that reason putback_movable_pages(), or other routines
|
|
* that need to identify isolated balloon pages on private pagelists, cannot
|
|
* rely on balloon_page_movable() to accomplish the task.
|
|
*/
|
|
static inline bool isolated_balloon_page(struct page *page)
|
|
{
|
|
/* Already isolated balloon pages, by default, have a raised refcount */
|
|
if (page_flags_cleared(page) && !page_mapped(page) &&
|
|
page_count(page) >= 2)
|
|
return __is_movable_balloon_page(page);
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* balloon_page_insert - insert a page into the balloon's page list and make
|
|
* the page->mapping assignment accordingly.
|
|
* @page : page to be assigned as a 'balloon page'
|
|
* @mapping : allocated special 'balloon_mapping'
|
|
* @head : balloon's device page list head
|
|
*
|
|
* Caller must ensure the page is locked and the spin_lock protecting balloon
|
|
* pages list is held before inserting a page into the balloon device.
|
|
*/
|
|
static inline void balloon_page_insert(struct page *page,
|
|
struct address_space *mapping,
|
|
struct list_head *head)
|
|
{
|
|
page->mapping = mapping;
|
|
list_add(&page->lru, head);
|
|
}
|
|
|
|
/*
|
|
* balloon_page_delete - delete a page from balloon's page list and clear
|
|
* the page->mapping assignement accordingly.
|
|
* @page : page to be released from balloon's page list
|
|
*
|
|
* Caller must ensure the page is locked and the spin_lock protecting balloon
|
|
* pages list is held before deleting a page from the balloon device.
|
|
*/
|
|
static inline void balloon_page_delete(struct page *page)
|
|
{
|
|
page->mapping = NULL;
|
|
list_del(&page->lru);
|
|
}
|
|
|
|
/*
|
|
* balloon_page_device - get the b_dev_info descriptor for the balloon device
|
|
* that enqueues the given page.
|
|
*/
|
|
static inline struct balloon_dev_info *balloon_page_device(struct page *page)
|
|
{
|
|
struct address_space *mapping = page->mapping;
|
|
if (likely(mapping))
|
|
return mapping->private_data;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static inline gfp_t balloon_mapping_gfp_mask(void)
|
|
{
|
|
return GFP_HIGHUSER_MOVABLE;
|
|
}
|
|
|
|
static inline bool balloon_compaction_check(void)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
#else /* !CONFIG_BALLOON_COMPACTION */
|
|
|
|
static inline void *balloon_mapping_alloc(void *balloon_device,
|
|
const struct address_space_operations *a_ops)
|
|
{
|
|
return ERR_PTR(-EOPNOTSUPP);
|
|
}
|
|
|
|
static inline void balloon_mapping_free(struct address_space *balloon_mapping)
|
|
{
|
|
return;
|
|
}
|
|
|
|
static inline void balloon_page_insert(struct page *page,
|
|
struct address_space *mapping,
|
|
struct list_head *head)
|
|
{
|
|
list_add(&page->lru, head);
|
|
}
|
|
|
|
static inline void balloon_page_delete(struct page *page)
|
|
{
|
|
list_del(&page->lru);
|
|
}
|
|
|
|
static inline bool balloon_page_movable(struct page *page)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool isolated_balloon_page(struct page *page)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool balloon_page_isolate(struct page *page)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline void balloon_page_putback(struct page *page)
|
|
{
|
|
return;
|
|
}
|
|
|
|
static inline int balloon_page_migrate(struct page *newpage,
|
|
struct page *page, enum migrate_mode mode)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline gfp_t balloon_mapping_gfp_mask(void)
|
|
{
|
|
return GFP_HIGHUSER;
|
|
}
|
|
|
|
static inline bool balloon_compaction_check(void)
|
|
{
|
|
return false;
|
|
}
|
|
#endif /* CONFIG_BALLOON_COMPACTION */
|
|
#endif /* _LINUX_BALLOON_COMPACTION_H */
|