slub: move synchronize_sched out of slab_mutex on shrink

synchronize_sched() is a heavy operation and calling it per each cache
owned by a memory cgroup being destroyed may take quite some time.  What
is worse, it's currently called under the slab_mutex, stalling all works
doing cache creation/destruction.

Actually, there isn't much point in calling synchronize_sched() for each
cache - it's enough to call it just once - after setting cpu_partial for
all caches and before shrinking them.  This way, we can also move it out
of the slab_mutex, which we have to hold for iterating over the slab
cache list.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=172991
Link: http://lkml.kernel.org/r/0a10d71ecae3db00fb4421bcd3f82bcc911f4be4.1475329751.git.vdavydov.dev@gmail.com
Signed-off-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Reported-by: Doug Smythies <dsmythies@telus.net>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Vladimir Davydov 2016-12-12 16:41:32 -08:00 committed by Linus Torvalds
parent 13583c3d32
commit 89e364db71
5 changed files with 31 additions and 23 deletions

View File

@ -2332,7 +2332,7 @@ static int drain_freelist(struct kmem_cache *cache,
return nr_freed; return nr_freed;
} }
int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate) int __kmem_cache_shrink(struct kmem_cache *cachep)
{ {
int ret = 0; int ret = 0;
int node; int node;
@ -2352,7 +2352,7 @@ int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate)
int __kmem_cache_shutdown(struct kmem_cache *cachep) int __kmem_cache_shutdown(struct kmem_cache *cachep)
{ {
return __kmem_cache_shrink(cachep, false); return __kmem_cache_shrink(cachep);
} }
void __kmem_cache_release(struct kmem_cache *cachep) void __kmem_cache_release(struct kmem_cache *cachep)

View File

@ -146,7 +146,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
int __kmem_cache_shutdown(struct kmem_cache *); int __kmem_cache_shutdown(struct kmem_cache *);
void __kmem_cache_release(struct kmem_cache *); void __kmem_cache_release(struct kmem_cache *);
int __kmem_cache_shrink(struct kmem_cache *, bool); int __kmem_cache_shrink(struct kmem_cache *);
void slab_kmem_cache_release(struct kmem_cache *); void slab_kmem_cache_release(struct kmem_cache *);
struct seq_file; struct seq_file;

View File

@ -573,6 +573,29 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
get_online_cpus(); get_online_cpus();
get_online_mems(); get_online_mems();
#ifdef CONFIG_SLUB
/*
* In case of SLUB, we need to disable empty slab caching to
* avoid pinning the offline memory cgroup by freeable kmem
* pages charged to it. SLAB doesn't need this, as it
* periodically purges unused slabs.
*/
mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list) {
c = is_root_cache(s) ? cache_from_memcg_idx(s, idx) : NULL;
if (c) {
c->cpu_partial = 0;
c->min_partial = 0;
}
}
mutex_unlock(&slab_mutex);
/*
* kmem_cache->cpu_partial is checked locklessly (see
* put_cpu_partial()). Make sure the change is visible.
*/
synchronize_sched();
#endif
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list) { list_for_each_entry(s, &slab_caches, list) {
if (!is_root_cache(s)) if (!is_root_cache(s))
@ -584,7 +607,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
if (!c) if (!c)
continue; continue;
__kmem_cache_shrink(c, true); __kmem_cache_shrink(c);
arr->entries[idx] = NULL; arr->entries[idx] = NULL;
} }
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
@ -755,7 +778,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
get_online_cpus(); get_online_cpus();
get_online_mems(); get_online_mems();
kasan_cache_shrink(cachep); kasan_cache_shrink(cachep);
ret = __kmem_cache_shrink(cachep, false); ret = __kmem_cache_shrink(cachep);
put_online_mems(); put_online_mems();
put_online_cpus(); put_online_cpus();
return ret; return ret;

View File

@ -634,7 +634,7 @@ void __kmem_cache_release(struct kmem_cache *c)
{ {
} }
int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate) int __kmem_cache_shrink(struct kmem_cache *d)
{ {
return 0; return 0;
} }

View File

@ -3883,7 +3883,7 @@ EXPORT_SYMBOL(kfree);
* being allocated from last increasing the chance that the last objects * being allocated from last increasing the chance that the last objects
* are freed in them. * are freed in them.
*/ */
int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) int __kmem_cache_shrink(struct kmem_cache *s)
{ {
int node; int node;
int i; int i;
@ -3895,21 +3895,6 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
unsigned long flags; unsigned long flags;
int ret = 0; int ret = 0;
if (deactivate) {
/*
* Disable empty slabs caching. Used to avoid pinning offline
* memory cgroups by kmem pages that can be freed.
*/
s->cpu_partial = 0;
s->min_partial = 0;
/*
* s->cpu_partial is checked locklessly (see put_cpu_partial),
* so we have to make sure the change is visible.
*/
synchronize_sched();
}
flush_all(s); flush_all(s);
for_each_kmem_cache_node(s, node, n) { for_each_kmem_cache_node(s, node, n) {
INIT_LIST_HEAD(&discard); INIT_LIST_HEAD(&discard);
@ -3966,7 +3951,7 @@ static int slab_mem_going_offline_callback(void *arg)
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list) list_for_each_entry(s, &slab_caches, list)
__kmem_cache_shrink(s, false); __kmem_cache_shrink(s);
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
return 0; return 0;