mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-12 00:38:55 +00:00
6b4f7799c6
The slab shrinkers are currently invoked from the zonelist walkers in kswapd, direct reclaim, and zone reclaim, all of which roughly gauge the eligible LRU pages and assemble a nodemask to pass to NUMA-aware shrinkers, which then again have to walk over the nodemask. This is redundant code, extra runtime work, and fairly inaccurate when it comes to the estimation of actually scannable LRU pages. The code duplication will only get worse when making the shrinkers cgroup-aware and requiring them to have out-of-band cgroup hierarchy walks as well. Instead, invoke the shrinkers from shrink_zone(), which is where all reclaimers end up, to avoid this duplication. Take the count for eligible LRU pages out of get_scan_count(), which considers many more factors than just the availability of swap space, like zone_reclaimable_pages() currently does. Accumulate the number over all visited lruvecs to get the per-zone value. Some nodes have multiple zones due to memory addressing restrictions. To avoid putting too much pressure on the shrinkers, only invoke them once for each such node, using the class zone of the allocation as the pivot zone. For now, this integrates the slab shrinking better into the reclaim logic and gets rid of duplicative invocations from kswapd, direct reclaim, and zone reclaim. It also prepares for cgroup-awareness, allowing memcg-capable shrinkers to be added at the lruvec level without much duplication of both code and runtime work. This changes kswapd behavior, which used to invoke the shrinkers for each zone, but with scan ratios gathered from the entire node, resulting in meaningless pressure quantities on multi-zone nodes. Zone reclaim behavior also changes. It used to shrink slabs until the same amount of pages were shrunk as were reclaimed from the LRUs. Now it merely invokes the shrinkers once with the zone's scan ratio, which makes the shrinkers go easier on caches that implement aging and would prefer feeding back pressure from recently used slab objects to unused LRU pages. [vdavydov@parallels.com: assure class zone is populated] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Dave Chinner <david@fromorbit.com> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
82 lines
1.8 KiB
C
82 lines
1.8 KiB
C
/*
|
|
* Implement the manual drop-all-pagecache function
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/writeback.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/gfp.h>
|
|
#include "internal.h"
|
|
|
|
/* A global variable is a bit ugly, but it keeps the code simple */
|
|
int sysctl_drop_caches;
|
|
|
|
static void drop_pagecache_sb(struct super_block *sb, void *unused)
|
|
{
|
|
struct inode *inode, *toput_inode = NULL;
|
|
|
|
spin_lock(&inode_sb_list_lock);
|
|
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
|
|
spin_lock(&inode->i_lock);
|
|
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
|
|
(inode->i_mapping->nrpages == 0)) {
|
|
spin_unlock(&inode->i_lock);
|
|
continue;
|
|
}
|
|
__iget(inode);
|
|
spin_unlock(&inode->i_lock);
|
|
spin_unlock(&inode_sb_list_lock);
|
|
invalidate_mapping_pages(inode->i_mapping, 0, -1);
|
|
iput(toput_inode);
|
|
toput_inode = inode;
|
|
spin_lock(&inode_sb_list_lock);
|
|
}
|
|
spin_unlock(&inode_sb_list_lock);
|
|
iput(toput_inode);
|
|
}
|
|
|
|
static void drop_slab(void)
|
|
{
|
|
int nr_objects;
|
|
|
|
do {
|
|
int nid;
|
|
|
|
nr_objects = 0;
|
|
for_each_online_node(nid)
|
|
nr_objects += shrink_node_slabs(GFP_KERNEL, nid,
|
|
1000, 1000);
|
|
} while (nr_objects > 10);
|
|
}
|
|
|
|
int drop_caches_sysctl_handler(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *length, loff_t *ppos)
|
|
{
|
|
int ret;
|
|
|
|
ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
|
|
if (ret)
|
|
return ret;
|
|
if (write) {
|
|
static int stfu;
|
|
|
|
if (sysctl_drop_caches & 1) {
|
|
iterate_supers(drop_pagecache_sb, NULL);
|
|
count_vm_event(DROP_PAGECACHE);
|
|
}
|
|
if (sysctl_drop_caches & 2) {
|
|
drop_slab();
|
|
count_vm_event(DROP_SLAB);
|
|
}
|
|
if (!stfu) {
|
|
pr_info("%s (%d): drop_caches: %d\n",
|
|
current->comm, task_pid_nr(current),
|
|
sysctl_drop_caches);
|
|
}
|
|
stfu |= sysctl_drop_caches & 4;
|
|
}
|
|
return 0;
|
|
}
|