Hugh Dickins e9995ef978 ksm: rmap_walk to remove_migation_ptes
A side-effect of making ksm pages swappable is that they have to be placed
on the LRUs: which then exposes them to isolate_lru_page() and hence to
page migration.

Add rmap_walk() for remove_migration_ptes() to use: rmap_walk_anon() and
rmap_walk_file() in rmap.c, but rmap_walk_ksm() in ksm.c.  Perhaps some
consolidation with existing code is possible, but don't attempt that yet
(try_to_unmap needs to handle nonlinears, but migration pte removal does
not).

rmap_walk() is sadly less general than it appears: rmap_walk_anon(), like
remove_anon_migration_ptes() which it replaces, avoids calling
page_lock_anon_vma(), because that includes a page_mapped() test which
fails when all migration ptes are in place.  That was valid when NUMA page
migration was introduced (holding mmap_sem provided the missing guarantee
that anon_vma's slab had not already been destroyed), but I believe not
valid in the memory hotremove case added since.

For now do the same as before, and consider the best way to fix that
unlikely race later on.  When fixed, we can probably use rmap_walk() on
hwpoisoned ksm pages too: for now, they remain among hwpoison's various
exceptions (its PageKsm test comes before the page is locked, but its
page_lock_anon_vma fails safely if an anon gets upgraded).

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Chris Wright <chrisw@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-12-15 08:53:20 -08:00

146 lines
4.0 KiB
C

#ifndef __LINUX_KSM_H
#define __LINUX_KSM_H
/*
* Memory merging support.
*
* This code enables dynamic sharing of identical pages found in different
* memory areas, even if they are not shared by fork().
*/
#include <linux/bitops.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/sched.h>
struct stable_node;
struct mem_cgroup;
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags);
int __ksm_enter(struct mm_struct *mm);
void __ksm_exit(struct mm_struct *mm);
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
return __ksm_enter(mm);
return 0;
}
static inline void ksm_exit(struct mm_struct *mm)
{
if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
__ksm_exit(mm);
}
/*
* A KSM page is one of those write-protected "shared pages" or "merged pages"
* which KSM maps into multiple mms, wherever identical anonymous page content
* is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any
* anon_vma, but to that page's node of the stable tree.
*/
static inline int PageKsm(struct page *page)
{
return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
}
static inline struct stable_node *page_stable_node(struct page *page)
{
return PageKsm(page) ? page_rmapping(page) : NULL;
}
static inline void set_page_stable_node(struct page *page,
struct stable_node *stable_node)
{
page->mapping = (void *)stable_node +
(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
}
/*
* When do_swap_page() first faults in from swap what used to be a KSM page,
* no problem, it will be assigned to this vma's anon_vma; but thereafter,
* it might be faulted into a different anon_vma (or perhaps to a different
* offset in the same anon_vma). do_swap_page() cannot do all the locking
* needed to reconstitute a cross-anon_vma KSM page: for now it has to make
* a copy, and leave remerging the pages to a later pass of ksmd.
*
* We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
* but what if the vma was unmerged while the page was swapped out?
*/
struct page *ksm_does_need_to_copy(struct page *page,
struct vm_area_struct *vma, unsigned long address);
static inline struct page *ksm_might_need_to_copy(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
struct anon_vma *anon_vma = page_anon_vma(page);
if (!anon_vma ||
(anon_vma == vma->anon_vma &&
page->index == linear_page_index(vma, address)))
return page;
return ksm_does_need_to_copy(page, vma, address);
}
int page_referenced_ksm(struct page *page,
struct mem_cgroup *memcg, unsigned long *vm_flags);
int try_to_unmap_ksm(struct page *page, enum ttu_flags flags);
int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
struct vm_area_struct *, unsigned long, void *), void *arg);
void ksm_migrate_page(struct page *newpage, struct page *oldpage);
#else /* !CONFIG_KSM */
static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags)
{
return 0;
}
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
return 0;
}
static inline void ksm_exit(struct mm_struct *mm)
{
}
static inline int PageKsm(struct page *page)
{
return 0;
}
static inline struct page *ksm_might_need_to_copy(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
return page;
}
static inline int page_referenced_ksm(struct page *page,
struct mem_cgroup *memcg, unsigned long *vm_flags)
{
return 0;
}
static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
{
return 0;
}
static inline int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page*,
struct vm_area_struct *, unsigned long, void *), void *arg)
{
return 0;
}
static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
{
}
#endif /* !CONFIG_KSM */
#endif /* __LINUX_KSM_H */