mm: workingset: age nonresident information alongside anonymous pages

Patch series "fix for "mm: balance LRU lists based on relative
thrashing" patchset"

This patchset fixes some problems of the patchset, "mm: balance LRU
lists based on relative thrashing", which is now merged on the mainline.

Patch "mm: workingset: let cache workingset challenge anon fix" is the
result of discussion with Johannes.  See following link.

  http://lkml.kernel.org/r/20200520232525.798933-6-hannes@cmpxchg.org

And, the other two are minor things which are found when I try to rebase
my patchset.

This patch (of 3):

After ("mm: workingset: let cache workingset challenge anon fix"), we
compare refault distances to active_file + anon.  But age of the
non-resident information is only driven by the file LRU.  As a result,
we may overestimate the recency of any incoming refaults and activate
them too eagerly, causing unnecessary LRU churn in certain situations.

Make anon aging drive nonresident age as well to address that.

Link: http://lkml.kernel.org/r/1592288204-27734-1-git-send-email-iamjoonsoo.kim@lge.com
Link: http://lkml.kernel.org/r/1592288204-27734-2-git-send-email-iamjoonsoo.kim@lge.com
Fixes: 34e58cac6d ("mm: workingset: let cache workingset challenge anon")
Reported-by: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Johannes Weiner 2020-06-25 20:30:31 -07:00 committed by Linus Torvalds
parent 2a8bef3217
commit 31d8fcac00
4 changed files with 33 additions and 21 deletions

View File

@ -257,8 +257,8 @@ struct lruvec {
*/ */
unsigned long anon_cost; unsigned long anon_cost;
unsigned long file_cost; unsigned long file_cost;
/* Evictions & activations on the inactive file list */ /* Non-resident age, driven by LRU movement */
atomic_long_t inactive_age; atomic_long_t nonresident_age;
/* Refaults at the time of last reclaim cycle */ /* Refaults at the time of last reclaim cycle */
unsigned long refaults; unsigned long refaults;
/* Various lruvec state flags (enum lruvec_flags) */ /* Various lruvec state flags (enum lruvec_flags) */

View File

@ -313,6 +313,7 @@ struct vma_swap_readahead {
}; };
/* linux/mm/workingset.c */ /* linux/mm/workingset.c */
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
void workingset_refault(struct page *page, void *shadow); void workingset_refault(struct page *page, void *shadow);
void workingset_activation(struct page *page); void workingset_activation(struct page *page);

View File

@ -904,6 +904,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
__delete_from_swap_cache(page, swap); __delete_from_swap_cache(page, swap);
xa_unlock_irqrestore(&mapping->i_pages, flags); xa_unlock_irqrestore(&mapping->i_pages, flags);
put_swap_page(page, swap); put_swap_page(page, swap);
workingset_eviction(page, target_memcg);
} else { } else {
void (*freepage)(struct page *); void (*freepage)(struct page *);
void *shadow = NULL; void *shadow = NULL;
@ -1884,6 +1885,8 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
list_add(&page->lru, &pages_to_free); list_add(&page->lru, &pages_to_free);
} else { } else {
nr_moved += nr_pages; nr_moved += nr_pages;
if (PageActive(page))
workingset_age_nonresident(lruvec, nr_pages);
} }
} }

View File

@ -156,8 +156,8 @@
* *
* Implementation * Implementation
* *
* For each node's file LRU lists, a counter for inactive evictions * For each node's LRU lists, a counter for inactive evictions and
* and activations is maintained (node->inactive_age). * activations is maintained (node->nonresident_age).
* *
* On eviction, a snapshot of this counter (along with some bits to * On eviction, a snapshot of this counter (along with some bits to
* identify the node) is stored in the now empty page cache * identify the node) is stored in the now empty page cache
@ -213,7 +213,17 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
*workingsetp = workingset; *workingsetp = workingset;
} }
static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat) /**
* workingset_age_nonresident - age non-resident entries as LRU ages
* @memcg: the lruvec that was aged
* @nr_pages: the number of pages to count
*
* As in-memory pages are aged, non-resident pages need to be aged as
* well, in order for the refault distances later on to be comparable
* to the in-memory dimensions. This function allows reclaim and LRU
* operations to drive the non-resident aging along in parallel.
*/
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages)
{ {
/* /*
* Reclaiming a cgroup means reclaiming all its children in a * Reclaiming a cgroup means reclaiming all its children in a
@ -227,11 +237,8 @@ static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
* the root cgroup's, age as well. * the root cgroup's, age as well.
*/ */
do { do {
struct lruvec *lruvec; atomic_long_add(nr_pages, &lruvec->nonresident_age);
} while ((lruvec = parent_lruvec(lruvec)));
lruvec = mem_cgroup_lruvec(memcg, pgdat);
atomic_long_inc(&lruvec->inactive_age);
} while (memcg && (memcg = parent_mem_cgroup(memcg)));
} }
/** /**
@ -254,12 +261,11 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(page_count(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page);
advance_inactive_age(page_memcg(page), pgdat);
lruvec = mem_cgroup_lruvec(target_memcg, pgdat); lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
/* XXX: target_memcg can be NULL, go through lruvec */ /* XXX: target_memcg can be NULL, go through lruvec */
memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
eviction = atomic_long_read(&lruvec->inactive_age); eviction = atomic_long_read(&lruvec->nonresident_age);
return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page)); return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
} }
@ -309,20 +315,20 @@ void workingset_refault(struct page *page, void *shadow)
if (!mem_cgroup_disabled() && !eviction_memcg) if (!mem_cgroup_disabled() && !eviction_memcg)
goto out; goto out;
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
refault = atomic_long_read(&eviction_lruvec->inactive_age); refault = atomic_long_read(&eviction_lruvec->nonresident_age);
/* /*
* Calculate the refault distance * Calculate the refault distance
* *
* The unsigned subtraction here gives an accurate distance * The unsigned subtraction here gives an accurate distance
* across inactive_age overflows in most cases. There is a * across nonresident_age overflows in most cases. There is a
* special case: usually, shadow entries have a short lifetime * special case: usually, shadow entries have a short lifetime
* and are either refaulted or reclaimed along with the inode * and are either refaulted or reclaimed along with the inode
* before they get too old. But it is not impossible for the * before they get too old. But it is not impossible for the
* inactive_age to lap a shadow entry in the field, which can * nonresident_age to lap a shadow entry in the field, which
* then result in a false small refault distance, leading to a * can then result in a false small refault distance, leading
* false activation should this old entry actually refault * to a false activation should this old entry actually
* again. However, earlier kernels used to deactivate * refault again. However, earlier kernels used to deactivate
* unconditionally with *every* reclaim invocation for the * unconditionally with *every* reclaim invocation for the
* longest time, so the occasional inappropriate activation * longest time, so the occasional inappropriate activation
* leading to pressure on the active list is not a problem. * leading to pressure on the active list is not a problem.
@ -359,7 +365,7 @@ void workingset_refault(struct page *page, void *shadow)
goto out; goto out;
SetPageActive(page); SetPageActive(page);
advance_inactive_age(memcg, pgdat); workingset_age_nonresident(lruvec, hpage_nr_pages(page));
inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE); inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
/* Page was active prior to eviction */ /* Page was active prior to eviction */
@ -382,6 +388,7 @@ void workingset_refault(struct page *page, void *shadow)
void workingset_activation(struct page *page) void workingset_activation(struct page *page)
{ {
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
struct lruvec *lruvec;
rcu_read_lock(); rcu_read_lock();
/* /*
@ -394,7 +401,8 @@ void workingset_activation(struct page *page)
memcg = page_memcg_rcu(page); memcg = page_memcg_rcu(page);
if (!mem_cgroup_disabled() && !memcg) if (!mem_cgroup_disabled() && !memcg)
goto out; goto out;
advance_inactive_age(memcg, page_pgdat(page)); lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
out: out:
rcu_read_unlock(); rcu_read_unlock();
} }