mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-04 04:04:19 +00:00
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with a many-threaded application which concurrently initializes different parts of a large anonymous area. This patch corrects that, by using a separate spinlock per page table page, to guard the page table entries in that page, instead of using the mm's single page_table_lock. (But even then, page_table_lock is still used to guard page table allocation, and anon_vma allocation.) In this implementation, the spinlock is tucked inside the struct page of the page table page: with a BUILD_BUG_ON in case it overflows - which it would in the case of 32-bit PA-RISC with spinlock debugging enabled. Splitting the lock is not quite for free: another cacheline access. Ideally, I suppose we would use split ptlock only for multi-threaded processes on multi-cpu machines; but deciding that dynamically would have its own costs. So for now enable it by config, at some number of cpus - since the Kconfig language doesn't support inequalities, let preprocessor compare that with NR_CPUS. But I don't think it's worth being user-configurable: for good testing of both split and unsplit configs, split now at 4 cpus, and perhaps change that to 8 later. There is a benefit even for singly threaded processes: kswapd can be attacking one part of the mm while another part is busy faulting. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
b38c6845b6
commit
4c21e2f244
@ -229,6 +229,7 @@ void free_pgd_slow(pgd_t *pgd)
|
||||
pte = pmd_page(*pmd);
|
||||
pmd_clear(pmd);
|
||||
dec_page_state(nr_page_table_pages);
|
||||
pte_lock_deinit(pte);
|
||||
pte_free(pte);
|
||||
pmd_free(pmd);
|
||||
free:
|
||||
|
@ -87,14 +87,14 @@ static inline void pgd_list_add(pgd_t *pgd)
|
||||
if (pgd_list)
|
||||
pgd_list->private = (unsigned long) &page->index;
|
||||
pgd_list = page;
|
||||
page->private = (unsigned long) &pgd_list;
|
||||
set_page_private(page, (unsigned long)&pgd_list);
|
||||
}
|
||||
|
||||
static inline void pgd_list_del(pgd_t *pgd)
|
||||
{
|
||||
struct page *next, **pprev, *page = virt_to_page(pgd);
|
||||
next = (struct page *) page->index;
|
||||
pprev = (struct page **) page->private;
|
||||
pprev = (struct page **)page_private(page);
|
||||
*pprev = next;
|
||||
if (next)
|
||||
next->private = (unsigned long) pprev;
|
||||
|
@ -188,19 +188,19 @@ static inline void pgd_list_add(pgd_t *pgd)
|
||||
struct page *page = virt_to_page(pgd);
|
||||
page->index = (unsigned long)pgd_list;
|
||||
if (pgd_list)
|
||||
pgd_list->private = (unsigned long)&page->index;
|
||||
set_page_private(pgd_list, (unsigned long)&page->index);
|
||||
pgd_list = page;
|
||||
page->private = (unsigned long)&pgd_list;
|
||||
set_page_private(page, (unsigned long)&pgd_list);
|
||||
}
|
||||
|
||||
static inline void pgd_list_del(pgd_t *pgd)
|
||||
{
|
||||
struct page *next, **pprev, *page = virt_to_page(pgd);
|
||||
next = (struct page *)page->index;
|
||||
pprev = (struct page **)page->private;
|
||||
pprev = (struct page **)page_private(page);
|
||||
*pprev = next;
|
||||
if (next)
|
||||
next->private = (unsigned long)pprev;
|
||||
set_page_private(next, (unsigned long)pprev);
|
||||
}
|
||||
|
||||
void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
|
||||
|
@ -144,6 +144,7 @@ void destroy_context_skas(struct mm_struct *mm)
|
||||
|
||||
if(!proc_mm || !ptrace_faultinfo){
|
||||
free_page(mmu->id.stack);
|
||||
pte_lock_deinit(virt_to_page(mmu->last_page_table));
|
||||
pte_free_kernel((pte_t *) mmu->last_page_table);
|
||||
dec_page_state(nr_page_table_pages);
|
||||
#ifdef CONFIG_3_LEVEL_PGTABLES
|
||||
|
@ -291,8 +291,8 @@ static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
|
||||
cachefs_uncache_page(vnode->cache, page);
|
||||
#endif
|
||||
|
||||
pageio = (struct cachefs_page *) page->private;
|
||||
page->private = 0;
|
||||
pageio = (struct cachefs_page *) page_private(page);
|
||||
set_page_private(page, 0);
|
||||
ClearPagePrivate(page);
|
||||
|
||||
if (pageio)
|
||||
|
@ -96,7 +96,7 @@ static void
|
||||
__clear_page_buffers(struct page *page)
|
||||
{
|
||||
ClearPagePrivate(page);
|
||||
page->private = 0;
|
||||
set_page_private(page, 0);
|
||||
page_cache_release(page);
|
||||
}
|
||||
|
||||
|
@ -86,7 +86,7 @@ struct meta_anchor {
|
||||
atomic_t io_count;
|
||||
struct metapage *mp[MPS_PER_PAGE];
|
||||
};
|
||||
#define mp_anchor(page) ((struct meta_anchor *)page->private)
|
||||
#define mp_anchor(page) ((struct meta_anchor *)page_private(page))
|
||||
|
||||
static inline struct metapage *page_to_mp(struct page *page, uint offset)
|
||||
{
|
||||
@ -108,7 +108,7 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
|
||||
if (!a)
|
||||
return -ENOMEM;
|
||||
memset(a, 0, sizeof(struct meta_anchor));
|
||||
page->private = (unsigned long)a;
|
||||
set_page_private(page, (unsigned long)a);
|
||||
SetPagePrivate(page);
|
||||
kmap(page);
|
||||
}
|
||||
@ -136,7 +136,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
|
||||
a->mp[index] = NULL;
|
||||
if (--a->mp_count == 0) {
|
||||
kfree(a);
|
||||
page->private = 0;
|
||||
set_page_private(page, 0);
|
||||
ClearPagePrivate(page);
|
||||
kunmap(page);
|
||||
}
|
||||
@ -156,13 +156,13 @@ static inline void dec_io(struct page *page, void (*handler) (struct page *))
|
||||
#else
|
||||
static inline struct metapage *page_to_mp(struct page *page, uint offset)
|
||||
{
|
||||
return PagePrivate(page) ? (struct metapage *)page->private : NULL;
|
||||
return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
|
||||
}
|
||||
|
||||
static inline int insert_metapage(struct page *page, struct metapage *mp)
|
||||
{
|
||||
if (mp) {
|
||||
page->private = (unsigned long)mp;
|
||||
set_page_private(page, (unsigned long)mp);
|
||||
SetPagePrivate(page);
|
||||
kmap(page);
|
||||
}
|
||||
@ -171,7 +171,7 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
|
||||
|
||||
static inline void remove_metapage(struct page *page, struct metapage *mp)
|
||||
{
|
||||
page->private = 0;
|
||||
set_page_private(page, 0);
|
||||
ClearPagePrivate(page);
|
||||
kunmap(page);
|
||||
}
|
||||
|
@ -181,8 +181,9 @@ set_page_region(
|
||||
size_t offset,
|
||||
size_t length)
|
||||
{
|
||||
page->private |= page_region_mask(offset, length);
|
||||
if (page->private == ~0UL)
|
||||
set_page_private(page,
|
||||
page_private(page) | page_region_mask(offset, length));
|
||||
if (page_private(page) == ~0UL)
|
||||
SetPageUptodate(page);
|
||||
}
|
||||
|
||||
@ -194,7 +195,7 @@ test_page_region(
|
||||
{
|
||||
unsigned long mask = page_region_mask(offset, length);
|
||||
|
||||
return (mask && (page->private & mask) == mask);
|
||||
return (mask && (page_private(page) & mask) == mask);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -126,8 +126,8 @@ BUFFER_FNS(Eopnotsupp, eopnotsupp)
|
||||
/* If we *know* page->private refers to buffer_heads */
|
||||
#define page_buffers(page) \
|
||||
({ \
|
||||
BUG_ON(!PagePrivate(page)); \
|
||||
((struct buffer_head *)(page)->private); \
|
||||
BUG_ON(!PagePrivate(page)); \
|
||||
((struct buffer_head *)page_private(page)); \
|
||||
})
|
||||
#define page_has_buffers(page) PagePrivate(page)
|
||||
|
||||
@ -219,7 +219,7 @@ static inline void attach_page_buffers(struct page *page,
|
||||
{
|
||||
page_cache_get(page);
|
||||
SetPagePrivate(page);
|
||||
page->private = (unsigned long)head;
|
||||
set_page_private(page, (unsigned long)head);
|
||||
}
|
||||
|
||||
static inline void get_bh(struct buffer_head *bh)
|
||||
|
@ -226,13 +226,18 @@ struct page {
|
||||
* to show when page is mapped
|
||||
* & limit reverse map searches.
|
||||
*/
|
||||
unsigned long private; /* Mapping-private opaque data:
|
||||
union {
|
||||
unsigned long private; /* Mapping-private opaque data:
|
||||
* usually used for buffer_heads
|
||||
* if PagePrivate set; used for
|
||||
* swp_entry_t if PageSwapCache
|
||||
* When page is free, this indicates
|
||||
* order in the buddy system.
|
||||
*/
|
||||
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
|
||||
spinlock_t ptl;
|
||||
#endif
|
||||
} u;
|
||||
struct address_space *mapping; /* If low bit clear, points to
|
||||
* inode address_space, or NULL.
|
||||
* If page mapped as anonymous
|
||||
@ -260,6 +265,9 @@ struct page {
|
||||
#endif /* WANT_PAGE_VIRTUAL */
|
||||
};
|
||||
|
||||
#define page_private(page) ((page)->u.private)
|
||||
#define set_page_private(page, v) ((page)->u.private = (v))
|
||||
|
||||
/*
|
||||
* FIXME: take this include out, include page-flags.h in
|
||||
* files which need it (119 of them)
|
||||
@ -311,17 +319,17 @@ extern void FASTCALL(__page_cache_release(struct page *));
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
|
||||
static inline int page_count(struct page *p)
|
||||
static inline int page_count(struct page *page)
|
||||
{
|
||||
if (PageCompound(p))
|
||||
p = (struct page *)p->private;
|
||||
return atomic_read(&(p)->_count) + 1;
|
||||
if (PageCompound(page))
|
||||
page = (struct page *)page_private(page);
|
||||
return atomic_read(&page->_count) + 1;
|
||||
}
|
||||
|
||||
static inline void get_page(struct page *page)
|
||||
{
|
||||
if (unlikely(PageCompound(page)))
|
||||
page = (struct page *)page->private;
|
||||
page = (struct page *)page_private(page);
|
||||
atomic_inc(&page->_count);
|
||||
}
|
||||
|
||||
@ -587,7 +595,7 @@ static inline int PageAnon(struct page *page)
|
||||
static inline pgoff_t page_index(struct page *page)
|
||||
{
|
||||
if (unlikely(PageSwapCache(page)))
|
||||
return page->private;
|
||||
return page_private(page);
|
||||
return page->index;
|
||||
}
|
||||
|
||||
@ -779,9 +787,31 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
|
||||
}
|
||||
#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
|
||||
|
||||
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
|
||||
/*
|
||||
* We tuck a spinlock to guard each pagetable page into its struct page,
|
||||
* at page->private, with BUILD_BUG_ON to make sure that this will not
|
||||
* overflow into the next struct page (as it might with DEBUG_SPINLOCK).
|
||||
* When freeing, reset page->mapping so free_pages_check won't complain.
|
||||
*/
|
||||
#define __pte_lockptr(page) &((page)->u.ptl)
|
||||
#define pte_lock_init(_page) do { \
|
||||
spin_lock_init(__pte_lockptr(_page)); \
|
||||
} while (0)
|
||||
#define pte_lock_deinit(page) ((page)->mapping = NULL)
|
||||
#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
|
||||
#else
|
||||
/*
|
||||
* We use mm->page_table_lock to guard all pagetable pages of the mm.
|
||||
*/
|
||||
#define pte_lock_init(page) do {} while (0)
|
||||
#define pte_lock_deinit(page) do {} while (0)
|
||||
#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
|
||||
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
|
||||
|
||||
#define pte_offset_map_lock(mm, pmd, address, ptlp) \
|
||||
({ \
|
||||
spinlock_t *__ptl = &(mm)->page_table_lock; \
|
||||
spinlock_t *__ptl = pte_lockptr(mm, pmd); \
|
||||
pte_t *__pte = pte_offset_map(pmd, address); \
|
||||
*(ptlp) = __ptl; \
|
||||
spin_lock(__ptl); \
|
||||
|
@ -334,7 +334,7 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
|
||||
if (pages) {
|
||||
unsigned int count, i;
|
||||
pages->mapping = NULL;
|
||||
pages->private = order;
|
||||
set_page_private(pages, order);
|
||||
count = 1 << order;
|
||||
for (i = 0; i < count; i++)
|
||||
SetPageReserved(pages + i);
|
||||
@ -347,7 +347,7 @@ static void kimage_free_pages(struct page *page)
|
||||
{
|
||||
unsigned int order, count, i;
|
||||
|
||||
order = page->private;
|
||||
order = page_private(page);
|
||||
count = 1 << order;
|
||||
for (i = 0; i < count; i++)
|
||||
ClearPageReserved(page + i);
|
||||
|
13
mm/Kconfig
13
mm/Kconfig
@ -111,3 +111,16 @@ config SPARSEMEM_STATIC
|
||||
config SPARSEMEM_EXTREME
|
||||
def_bool y
|
||||
depends on SPARSEMEM && !SPARSEMEM_STATIC
|
||||
|
||||
# Heavily threaded applications may benefit from splitting the mm-wide
|
||||
# page_table_lock, so that faults on different parts of the user address
|
||||
# space can be handled with less contention: split it at this NR_CPUS.
|
||||
# Default to 4 for wider testing, though 8 might be more appropriate.
|
||||
# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
|
||||
# PA-RISC's debug spinlock_t is too large for the 32-bit struct page.
|
||||
#
|
||||
config SPLIT_PTLOCK_CPUS
|
||||
int
|
||||
default "4096" if ARM && !CPU_CACHE_VIPT
|
||||
default "4096" if PARISC && DEBUG_SPINLOCK && !64BIT
|
||||
default "4"
|
||||
|
@ -152,7 +152,7 @@ static int sync_page(void *word)
|
||||
* in the ->sync_page() methods make essential use of the
|
||||
* page_mapping(), merely passing the page down to the backing
|
||||
* device's unplug functions when it's non-NULL, which in turn
|
||||
* ignore it for all cases but swap, where only page->private is
|
||||
* ignore it for all cases but swap, where only page_private(page) is
|
||||
* of interest. When page_mapping() does go NULL, the entire
|
||||
* call stack gracefully ignores the page and returns.
|
||||
* -- wli
|
||||
|
24
mm/memory.c
24
mm/memory.c
@ -114,6 +114,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
|
||||
{
|
||||
struct page *page = pmd_page(*pmd);
|
||||
pmd_clear(pmd);
|
||||
pte_lock_deinit(page);
|
||||
pte_free_tlb(tlb, page);
|
||||
dec_page_state(nr_page_table_pages);
|
||||
tlb->mm->nr_ptes--;
|
||||
@ -294,10 +295,12 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
pte_lock_init(new);
|
||||
spin_lock(&mm->page_table_lock);
|
||||
if (pmd_present(*pmd)) /* Another has populated it */
|
||||
if (pmd_present(*pmd)) { /* Another has populated it */
|
||||
pte_lock_deinit(new);
|
||||
pte_free(new);
|
||||
else {
|
||||
} else {
|
||||
mm->nr_ptes++;
|
||||
inc_page_state(nr_page_table_pages);
|
||||
pmd_populate(mm, pmd, new);
|
||||
@ -432,7 +435,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
if (!dst_pte)
|
||||
return -ENOMEM;
|
||||
src_pte = pte_offset_map_nested(src_pmd, addr);
|
||||
src_ptl = &src_mm->page_table_lock;
|
||||
src_ptl = pte_lockptr(src_mm, src_pmd);
|
||||
spin_lock(src_ptl);
|
||||
|
||||
do {
|
||||
@ -1194,15 +1197,16 @@ EXPORT_SYMBOL(remap_pfn_range);
|
||||
* (but do_wp_page is only called after already making such a check;
|
||||
* and do_anonymous_page and do_no_page can safely check later on).
|
||||
*/
|
||||
static inline int pte_unmap_same(struct mm_struct *mm,
|
||||
static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
|
||||
pte_t *page_table, pte_t orig_pte)
|
||||
{
|
||||
int same = 1;
|
||||
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
|
||||
if (sizeof(pte_t) > sizeof(unsigned long)) {
|
||||
spin_lock(&mm->page_table_lock);
|
||||
spinlock_t *ptl = pte_lockptr(mm, pmd);
|
||||
spin_lock(ptl);
|
||||
same = pte_same(*page_table, orig_pte);
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
spin_unlock(ptl);
|
||||
}
|
||||
#endif
|
||||
pte_unmap(page_table);
|
||||
@ -1655,7 +1659,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
pte_t pte;
|
||||
int ret = VM_FAULT_MINOR;
|
||||
|
||||
if (!pte_unmap_same(mm, page_table, orig_pte))
|
||||
if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
|
||||
goto out;
|
||||
|
||||
entry = pte_to_swp_entry(orig_pte);
|
||||
@ -1773,7 +1777,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
page_cache_get(page);
|
||||
entry = mk_pte(page, vma->vm_page_prot);
|
||||
|
||||
ptl = &mm->page_table_lock;
|
||||
ptl = pte_lockptr(mm, pmd);
|
||||
spin_lock(ptl);
|
||||
if (!pte_none(*page_table))
|
||||
goto release;
|
||||
@ -1934,7 +1938,7 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
pgoff_t pgoff;
|
||||
int err;
|
||||
|
||||
if (!pte_unmap_same(mm, page_table, orig_pte))
|
||||
if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
|
||||
return VM_FAULT_MINOR;
|
||||
|
||||
if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
|
||||
@ -1992,7 +1996,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
|
||||
pte, pmd, write_access, entry);
|
||||
}
|
||||
|
||||
ptl = &mm->page_table_lock;
|
||||
ptl = pte_lockptr(mm, pmd);
|
||||
spin_lock(ptl);
|
||||
if (unlikely(!pte_same(*pte, entry)))
|
||||
goto unlock;
|
||||
|
11
mm/mremap.c
11
mm/mremap.c
@ -72,7 +72,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
|
||||
struct address_space *mapping = NULL;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
pte_t *old_pte, *new_pte, pte;
|
||||
spinlock_t *old_ptl;
|
||||
spinlock_t *old_ptl, *new_ptl;
|
||||
|
||||
if (vma->vm_file) {
|
||||
/*
|
||||
@ -88,8 +88,15 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
|
||||
new_vma->vm_truncate_count = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't have to worry about the ordering of src and dst
|
||||
* pte locks because exclusive mmap_sem prevents deadlock.
|
||||
*/
|
||||
old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
|
||||
new_pte = pte_offset_map_nested(new_pmd, new_addr);
|
||||
new_ptl = pte_lockptr(mm, new_pmd);
|
||||
if (new_ptl != old_ptl)
|
||||
spin_lock(new_ptl);
|
||||
|
||||
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
|
||||
new_pte++, new_addr += PAGE_SIZE) {
|
||||
@ -101,6 +108,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
|
||||
set_pte_at(mm, new_addr, new_pte, pte);
|
||||
}
|
||||
|
||||
if (new_ptl != old_ptl)
|
||||
spin_unlock(new_ptl);
|
||||
pte_unmap_nested(new_pte - 1);
|
||||
pte_unmap_unlock(old_pte - 1, old_ptl);
|
||||
if (mapping)
|
||||
|
@ -154,7 +154,7 @@ static void prep_compound_page(struct page *page, unsigned long order)
|
||||
struct page *p = page + i;
|
||||
|
||||
SetPageCompound(p);
|
||||
p->private = (unsigned long)page;
|
||||
set_page_private(p, (unsigned long)page);
|
||||
}
|
||||
}
|
||||
|
||||
@ -174,7 +174,7 @@ static void destroy_compound_page(struct page *page, unsigned long order)
|
||||
|
||||
if (!PageCompound(p))
|
||||
bad_page(__FUNCTION__, page);
|
||||
if (p->private != (unsigned long)page)
|
||||
if (page_private(p) != (unsigned long)page)
|
||||
bad_page(__FUNCTION__, page);
|
||||
ClearPageCompound(p);
|
||||
}
|
||||
@ -187,18 +187,18 @@ static void destroy_compound_page(struct page *page, unsigned long order)
|
||||
* So, we don't need atomic page->flags operations here.
|
||||
*/
|
||||
static inline unsigned long page_order(struct page *page) {
|
||||
return page->private;
|
||||
return page_private(page);
|
||||
}
|
||||
|
||||
static inline void set_page_order(struct page *page, int order) {
|
||||
page->private = order;
|
||||
set_page_private(page, order);
|
||||
__SetPagePrivate(page);
|
||||
}
|
||||
|
||||
static inline void rmv_page_order(struct page *page)
|
||||
{
|
||||
__ClearPagePrivate(page);
|
||||
page->private = 0;
|
||||
set_page_private(page, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -238,7 +238,7 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
|
||||
* (a) the buddy is free &&
|
||||
* (b) the buddy is on the buddy system &&
|
||||
* (c) a page and its buddy have the same order.
|
||||
* for recording page's order, we use page->private and PG_private.
|
||||
* for recording page's order, we use page_private(page) and PG_private.
|
||||
*
|
||||
*/
|
||||
static inline int page_is_buddy(struct page *page, int order)
|
||||
@ -264,7 +264,7 @@ static inline int page_is_buddy(struct page *page, int order)
|
||||
* parts of the VM system.
|
||||
* At each level, we keep a list of pages, which are heads of continuous
|
||||
* free pages of length of (1 << order) and marked with PG_Private.Page's
|
||||
* order is recorded in page->private field.
|
||||
* order is recorded in page_private(page) field.
|
||||
* So when we are allocating or freeing one, we can derive the state of the
|
||||
* other. That is, if we allocate a small block, and both were
|
||||
* free, the remainder of the region must be split into blocks.
|
||||
@ -463,7 +463,7 @@ static void prep_new_page(struct page *page, int order)
|
||||
page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
|
||||
1 << PG_referenced | 1 << PG_arch_1 |
|
||||
1 << PG_checked | 1 << PG_mappedtodisk);
|
||||
page->private = 0;
|
||||
set_page_private(page, 0);
|
||||
set_page_refs(page, order);
|
||||
kernel_map_pages(page, 1 << order, 1);
|
||||
}
|
||||
|
@ -91,7 +91,8 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
|
||||
unlock_page(page);
|
||||
goto out;
|
||||
}
|
||||
bio = get_swap_bio(GFP_NOIO, page->private, page, end_swap_bio_write);
|
||||
bio = get_swap_bio(GFP_NOIO, page_private(page), page,
|
||||
end_swap_bio_write);
|
||||
if (bio == NULL) {
|
||||
set_page_dirty(page);
|
||||
unlock_page(page);
|
||||
@ -115,7 +116,8 @@ int swap_readpage(struct file *file, struct page *page)
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
ClearPageUptodate(page);
|
||||
bio = get_swap_bio(GFP_KERNEL, page->private, page, end_swap_bio_read);
|
||||
bio = get_swap_bio(GFP_KERNEL, page_private(page), page,
|
||||
end_swap_bio_read);
|
||||
if (bio == NULL) {
|
||||
unlock_page(page);
|
||||
ret = -ENOMEM;
|
||||
|
@ -274,7 +274,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ptl = &mm->page_table_lock;
|
||||
ptl = pte_lockptr(mm, pmd);
|
||||
spin_lock(ptl);
|
||||
if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
|
||||
*ptlp = ptl;
|
||||
@ -550,7 +550,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
|
||||
update_hiwater_rss(mm);
|
||||
|
||||
if (PageAnon(page)) {
|
||||
swp_entry_t entry = { .val = page->private };
|
||||
swp_entry_t entry = { .val = page_private(page) };
|
||||
/*
|
||||
* Store the swap location in the pte.
|
||||
* See handle_pte_fault() ...
|
||||
|
22
mm/shmem.c
22
mm/shmem.c
@ -71,9 +71,6 @@
|
||||
/* Pretend that each entry is of this size in directory's i_size */
|
||||
#define BOGO_DIRENT_SIZE 20
|
||||
|
||||
/* Keep swapped page count in private field of indirect struct page */
|
||||
#define nr_swapped private
|
||||
|
||||
/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
|
||||
enum sgp_type {
|
||||
SGP_QUICK, /* don't try more than file page cache lookup */
|
||||
@ -324,8 +321,10 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns
|
||||
|
||||
entry->val = value;
|
||||
info->swapped += incdec;
|
||||
if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
|
||||
kmap_atomic_to_page(entry)->nr_swapped += incdec;
|
||||
if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
|
||||
struct page *page = kmap_atomic_to_page(entry);
|
||||
set_page_private(page, page_private(page) + incdec);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -368,9 +367,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
|
||||
|
||||
spin_unlock(&info->lock);
|
||||
page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
|
||||
if (page) {
|
||||
page->nr_swapped = 0;
|
||||
}
|
||||
if (page)
|
||||
set_page_private(page, 0);
|
||||
spin_lock(&info->lock);
|
||||
|
||||
if (!page) {
|
||||
@ -561,7 +559,7 @@ static void shmem_truncate(struct inode *inode)
|
||||
diroff = 0;
|
||||
}
|
||||
subdir = dir[diroff];
|
||||
if (subdir && subdir->nr_swapped) {
|
||||
if (subdir && page_private(subdir)) {
|
||||
size = limit - idx;
|
||||
if (size > ENTRIES_PER_PAGE)
|
||||
size = ENTRIES_PER_PAGE;
|
||||
@ -572,10 +570,10 @@ static void shmem_truncate(struct inode *inode)
|
||||
nr_swaps_freed += freed;
|
||||
if (offset)
|
||||
spin_lock(&info->lock);
|
||||
subdir->nr_swapped -= freed;
|
||||
set_page_private(subdir, page_private(subdir) - freed);
|
||||
if (offset)
|
||||
spin_unlock(&info->lock);
|
||||
BUG_ON(subdir->nr_swapped > offset);
|
||||
BUG_ON(page_private(subdir) > offset);
|
||||
}
|
||||
if (offset)
|
||||
offset = 0;
|
||||
@ -743,7 +741,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
|
||||
dir = shmem_dir_map(subdir);
|
||||
}
|
||||
subdir = *dir;
|
||||
if (subdir && subdir->nr_swapped) {
|
||||
if (subdir && page_private(subdir)) {
|
||||
ptr = shmem_swp_map(subdir);
|
||||
size = limit - idx;
|
||||
if (size > ENTRIES_PER_PAGE)
|
||||
|
@ -39,7 +39,7 @@ int page_cluster;
|
||||
void put_page(struct page *page)
|
||||
{
|
||||
if (unlikely(PageCompound(page))) {
|
||||
page = (struct page *)page->private;
|
||||
page = (struct page *)page_private(page);
|
||||
if (put_page_testzero(page)) {
|
||||
void (*dtor)(struct page *page);
|
||||
|
||||
|
@ -83,7 +83,7 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
|
||||
page_cache_get(page);
|
||||
SetPageLocked(page);
|
||||
SetPageSwapCache(page);
|
||||
page->private = entry.val;
|
||||
set_page_private(page, entry.val);
|
||||
total_swapcache_pages++;
|
||||
pagecache_acct(1);
|
||||
}
|
||||
@ -126,8 +126,8 @@ void __delete_from_swap_cache(struct page *page)
|
||||
BUG_ON(PageWriteback(page));
|
||||
BUG_ON(PagePrivate(page));
|
||||
|
||||
radix_tree_delete(&swapper_space.page_tree, page->private);
|
||||
page->private = 0;
|
||||
radix_tree_delete(&swapper_space.page_tree, page_private(page));
|
||||
set_page_private(page, 0);
|
||||
ClearPageSwapCache(page);
|
||||
total_swapcache_pages--;
|
||||
pagecache_acct(-1);
|
||||
@ -197,7 +197,7 @@ void delete_from_swap_cache(struct page *page)
|
||||
{
|
||||
swp_entry_t entry;
|
||||
|
||||
entry.val = page->private;
|
||||
entry.val = page_private(page);
|
||||
|
||||
write_lock_irq(&swapper_space.tree_lock);
|
||||
__delete_from_swap_cache(page);
|
||||
|
@ -61,7 +61,7 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
|
||||
swp_entry_t entry;
|
||||
|
||||
down_read(&swap_unplug_sem);
|
||||
entry.val = page->private;
|
||||
entry.val = page_private(page);
|
||||
if (PageSwapCache(page)) {
|
||||
struct block_device *bdev = swap_info[swp_type(entry)].bdev;
|
||||
struct backing_dev_info *bdi;
|
||||
@ -69,8 +69,8 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
|
||||
/*
|
||||
* If the page is removed from swapcache from under us (with a
|
||||
* racy try_to_unuse/swapoff) we need an additional reference
|
||||
* count to avoid reading garbage from page->private above. If
|
||||
* the WARN_ON triggers during a swapoff it maybe the race
|
||||
* count to avoid reading garbage from page_private(page) above.
|
||||
* If the WARN_ON triggers during a swapoff it maybe the race
|
||||
* condition and it's harmless. However if it triggers without
|
||||
* swapoff it signals a problem.
|
||||
*/
|
||||
@ -294,7 +294,7 @@ static inline int page_swapcount(struct page *page)
|
||||
struct swap_info_struct *p;
|
||||
swp_entry_t entry;
|
||||
|
||||
entry.val = page->private;
|
||||
entry.val = page_private(page);
|
||||
p = swap_info_get(entry);
|
||||
if (p) {
|
||||
/* Subtract the 1 for the swap cache itself */
|
||||
@ -339,7 +339,7 @@ int remove_exclusive_swap_page(struct page *page)
|
||||
if (page_count(page) != 2) /* 2: us + cache */
|
||||
return 0;
|
||||
|
||||
entry.val = page->private;
|
||||
entry.val = page_private(page);
|
||||
p = swap_info_get(entry);
|
||||
if (!p)
|
||||
return 0;
|
||||
@ -1042,7 +1042,7 @@ int page_queue_congested(struct page *page)
|
||||
BUG_ON(!PageLocked(page)); /* It pins the swap_info_struct */
|
||||
|
||||
if (PageSwapCache(page)) {
|
||||
swp_entry_t entry = { .val = page->private };
|
||||
swp_entry_t entry = { .val = page_private(page) };
|
||||
struct swap_info_struct *sis;
|
||||
|
||||
sis = get_swap_info_struct(swp_type(entry));
|
||||
|
@ -521,7 +521,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
|
||||
|
||||
#ifdef CONFIG_SWAP
|
||||
if (PageSwapCache(page)) {
|
||||
swp_entry_t swap = { .val = page->private };
|
||||
swp_entry_t swap = { .val = page_private(page) };
|
||||
__delete_from_swap_cache(page);
|
||||
write_unlock_irq(&mapping->tree_lock);
|
||||
swap_free(swap);
|
||||
|
Loading…
Reference in New Issue
Block a user