mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-16 09:56:46 +00:00
48a7a0996a
With the new shrinker APIs, there is no action such as prealloc, so rename {prealloc|unregister}_memcg_shrinker() to shrinker_memcg_{alloc|remove}(), which corresponds to the idr_{alloc|remove}() inside the function. Link: https://lkml.kernel.org/r/20230911094444.68966-42-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> Reviewed-by: Muchun Song <songmuchun@bytedance.com> Cc: Abhinav Kumar <quic_abhinavk@quicinc.com> Cc: Alasdair Kergon <agk@redhat.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Cc: Andreas Dilger <adilger.kernel@dilger.ca> Cc: Andreas Gruenbacher <agruenba@redhat.com> Cc: Anna Schumaker <anna@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Bob Peterson <rpeterso@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Carlos Llamas <cmllamas@google.com> Cc: Chandan Babu R <chandan.babu@oracle.com> Cc: Chao Yu <chao@kernel.org> Cc: Chris Mason <clm@fb.com> Cc: Christian Brauner <brauner@kernel.org> Cc: Christian Koenig <christian.koenig@amd.com> Cc: Chuck Lever <cel@kernel.org> Cc: Coly Li <colyli@suse.de> Cc: Dai Ngo <Dai.Ngo@oracle.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: "Darrick J. Wong" <djwong@kernel.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Airlie <airlied@gmail.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Sterba <dsterba@suse.com> Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> Cc: Gao Xiang <hsiangkao@linux.alibaba.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Huang Rui <ray.huang@amd.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jaegeuk Kim <jaegeuk@kernel.org> Cc: Jani Nikula <jani.nikula@linux.intel.com> Cc: Jan Kara <jack@suse.cz> Cc: Jason Wang <jasowang@redhat.com> Cc: Jeff Layton <jlayton@kernel.org> Cc: Jeffle Xu <jefflexu@linux.alibaba.com> Cc: Joel Fernandes (Google) <joel@joelfernandes.org> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Josef Bacik <josef@toxicpanda.com> Cc: Juergen Gross <jgross@suse.com> Cc: Kent Overstreet <kent.overstreet@gmail.com> Cc: Kirill Tkhai <tkhai@ya.ru> Cc: Marijn Suijten <marijn.suijten@somainline.org> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Mike Snitzer <snitzer@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Nadav Amit <namit@vmware.com> Cc: Neil Brown <neilb@suse.de> Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> Cc: Olga Kornievskaia <kolga@netapp.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Richard Weinberger <richard@nod.at> Cc: Rob Clark <robdclark@gmail.com> Cc: Rob Herring <robh@kernel.org> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Sean Paul <sean@poorly.run> Cc: Sergey Senozhatsky <senozhatsky@chromium.org> Cc: Song Liu <song@kernel.org> Cc: Stefano Stabellini <sstabellini@kernel.org> Cc: Steven Price <steven.price@arm.com> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com> Cc: Tom Talpey <tom@talpey.com> Cc: Trond Myklebust <trond.myklebust@hammerspace.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com> Cc: Yue Hu <huyue2@coolpad.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
716 lines
18 KiB
C
716 lines
18 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/memcontrol.h>
|
|
#include <linux/rwsem.h>
|
|
#include <linux/shrinker.h>
|
|
#include <trace/events/vmscan.h>
|
|
|
|
#include "internal.h"
|
|
|
|
LIST_HEAD(shrinker_list);
|
|
DECLARE_RWSEM(shrinker_rwsem);
|
|
|
|
#ifdef CONFIG_MEMCG
|
|
static int shrinker_nr_max;
|
|
|
|
static inline int shrinker_unit_size(int nr_items)
|
|
{
|
|
return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *));
|
|
}
|
|
|
|
static inline void shrinker_unit_free(struct shrinker_info *info, int start)
|
|
{
|
|
struct shrinker_info_unit **unit;
|
|
int nr, i;
|
|
|
|
if (!info)
|
|
return;
|
|
|
|
unit = info->unit;
|
|
nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS);
|
|
|
|
for (i = start; i < nr; i++) {
|
|
if (!unit[i])
|
|
break;
|
|
|
|
kfree(unit[i]);
|
|
unit[i] = NULL;
|
|
}
|
|
}
|
|
|
|
static inline int shrinker_unit_alloc(struct shrinker_info *new,
|
|
struct shrinker_info *old, int nid)
|
|
{
|
|
struct shrinker_info_unit *unit;
|
|
int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS);
|
|
int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0;
|
|
int i;
|
|
|
|
for (i = start; i < nr; i++) {
|
|
unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid);
|
|
if (!unit) {
|
|
shrinker_unit_free(new, start);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
new->unit[i] = unit;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void free_shrinker_info(struct mem_cgroup *memcg)
|
|
{
|
|
struct mem_cgroup_per_node *pn;
|
|
struct shrinker_info *info;
|
|
int nid;
|
|
|
|
for_each_node(nid) {
|
|
pn = memcg->nodeinfo[nid];
|
|
info = rcu_dereference_protected(pn->shrinker_info, true);
|
|
shrinker_unit_free(info, 0);
|
|
kvfree(info);
|
|
rcu_assign_pointer(pn->shrinker_info, NULL);
|
|
}
|
|
}
|
|
|
|
int alloc_shrinker_info(struct mem_cgroup *memcg)
|
|
{
|
|
struct shrinker_info *info;
|
|
int nid, ret = 0;
|
|
int array_size = 0;
|
|
|
|
down_write(&shrinker_rwsem);
|
|
array_size = shrinker_unit_size(shrinker_nr_max);
|
|
for_each_node(nid) {
|
|
info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid);
|
|
if (!info)
|
|
goto err;
|
|
info->map_nr_max = shrinker_nr_max;
|
|
if (shrinker_unit_alloc(info, NULL, nid))
|
|
goto err;
|
|
rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
|
|
}
|
|
up_write(&shrinker_rwsem);
|
|
|
|
return ret;
|
|
|
|
err:
|
|
up_write(&shrinker_rwsem);
|
|
free_shrinker_info(memcg);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
|
|
int nid)
|
|
{
|
|
return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
|
|
lockdep_is_held(&shrinker_rwsem));
|
|
}
|
|
|
|
static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size,
|
|
int old_size, int new_nr_max)
|
|
{
|
|
struct shrinker_info *new, *old;
|
|
struct mem_cgroup_per_node *pn;
|
|
int nid;
|
|
|
|
for_each_node(nid) {
|
|
pn = memcg->nodeinfo[nid];
|
|
old = shrinker_info_protected(memcg, nid);
|
|
/* Not yet online memcg */
|
|
if (!old)
|
|
return 0;
|
|
|
|
/* Already expanded this shrinker_info */
|
|
if (new_nr_max <= old->map_nr_max)
|
|
continue;
|
|
|
|
new = kvmalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid);
|
|
if (!new)
|
|
return -ENOMEM;
|
|
|
|
new->map_nr_max = new_nr_max;
|
|
|
|
memcpy(new->unit, old->unit, old_size);
|
|
if (shrinker_unit_alloc(new, old, nid)) {
|
|
kvfree(new);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
rcu_assign_pointer(pn->shrinker_info, new);
|
|
kvfree_rcu(old, rcu);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int expand_shrinker_info(int new_id)
|
|
{
|
|
int ret = 0;
|
|
int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS);
|
|
int new_size, old_size = 0;
|
|
struct mem_cgroup *memcg;
|
|
|
|
if (!root_mem_cgroup)
|
|
goto out;
|
|
|
|
lockdep_assert_held(&shrinker_rwsem);
|
|
|
|
new_size = shrinker_unit_size(new_nr_max);
|
|
old_size = shrinker_unit_size(shrinker_nr_max);
|
|
|
|
memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
|
do {
|
|
ret = expand_one_shrinker_info(memcg, new_size, old_size,
|
|
new_nr_max);
|
|
if (ret) {
|
|
mem_cgroup_iter_break(NULL, memcg);
|
|
goto out;
|
|
}
|
|
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
|
|
out:
|
|
if (!ret)
|
|
shrinker_nr_max = new_nr_max;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline int shrinker_id_to_index(int shrinker_id)
|
|
{
|
|
return shrinker_id / SHRINKER_UNIT_BITS;
|
|
}
|
|
|
|
static inline int shrinker_id_to_offset(int shrinker_id)
|
|
{
|
|
return shrinker_id % SHRINKER_UNIT_BITS;
|
|
}
|
|
|
|
static inline int calc_shrinker_id(int index, int offset)
|
|
{
|
|
return index * SHRINKER_UNIT_BITS + offset;
|
|
}
|
|
|
|
void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
|
|
{
|
|
if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
|
|
struct shrinker_info *info;
|
|
struct shrinker_info_unit *unit;
|
|
|
|
rcu_read_lock();
|
|
info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
|
|
unit = info->unit[shrinker_id_to_index(shrinker_id)];
|
|
if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
|
|
/* Pairs with smp mb in shrink_slab() */
|
|
smp_mb__before_atomic();
|
|
set_bit(shrinker_id_to_offset(shrinker_id), unit->map);
|
|
}
|
|
rcu_read_unlock();
|
|
}
|
|
}
|
|
|
|
static DEFINE_IDR(shrinker_idr);
|
|
|
|
static int shrinker_memcg_alloc(struct shrinker *shrinker)
|
|
{
|
|
int id, ret = -ENOMEM;
|
|
|
|
if (mem_cgroup_disabled())
|
|
return -ENOSYS;
|
|
|
|
down_write(&shrinker_rwsem);
|
|
/* This may call shrinker, so it must use down_read_trylock() */
|
|
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
|
|
if (id < 0)
|
|
goto unlock;
|
|
|
|
if (id >= shrinker_nr_max) {
|
|
if (expand_shrinker_info(id)) {
|
|
idr_remove(&shrinker_idr, id);
|
|
goto unlock;
|
|
}
|
|
}
|
|
shrinker->id = id;
|
|
ret = 0;
|
|
unlock:
|
|
up_write(&shrinker_rwsem);
|
|
return ret;
|
|
}
|
|
|
|
static void shrinker_memcg_remove(struct shrinker *shrinker)
|
|
{
|
|
int id = shrinker->id;
|
|
|
|
BUG_ON(id < 0);
|
|
|
|
lockdep_assert_held(&shrinker_rwsem);
|
|
|
|
idr_remove(&shrinker_idr, id);
|
|
}
|
|
|
|
static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
|
|
struct mem_cgroup *memcg)
|
|
{
|
|
struct shrinker_info *info;
|
|
struct shrinker_info_unit *unit;
|
|
|
|
info = shrinker_info_protected(memcg, nid);
|
|
unit = info->unit[shrinker_id_to_index(shrinker->id)];
|
|
return atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0);
|
|
}
|
|
|
|
static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
|
|
struct mem_cgroup *memcg)
|
|
{
|
|
struct shrinker_info *info;
|
|
struct shrinker_info_unit *unit;
|
|
|
|
info = shrinker_info_protected(memcg, nid);
|
|
unit = info->unit[shrinker_id_to_index(shrinker->id)];
|
|
return atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]);
|
|
}
|
|
|
|
void reparent_shrinker_deferred(struct mem_cgroup *memcg)
|
|
{
|
|
int nid, index, offset;
|
|
long nr;
|
|
struct mem_cgroup *parent;
|
|
struct shrinker_info *child_info, *parent_info;
|
|
struct shrinker_info_unit *child_unit, *parent_unit;
|
|
|
|
parent = parent_mem_cgroup(memcg);
|
|
if (!parent)
|
|
parent = root_mem_cgroup;
|
|
|
|
/* Prevent from concurrent shrinker_info expand */
|
|
down_read(&shrinker_rwsem);
|
|
for_each_node(nid) {
|
|
child_info = shrinker_info_protected(memcg, nid);
|
|
parent_info = shrinker_info_protected(parent, nid);
|
|
for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) {
|
|
child_unit = child_info->unit[index];
|
|
parent_unit = parent_info->unit[index];
|
|
for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) {
|
|
nr = atomic_long_read(&child_unit->nr_deferred[offset]);
|
|
atomic_long_add(nr, &parent_unit->nr_deferred[offset]);
|
|
}
|
|
}
|
|
}
|
|
up_read(&shrinker_rwsem);
|
|
}
|
|
#else
|
|
static int shrinker_memcg_alloc(struct shrinker *shrinker)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
|
|
static void shrinker_memcg_remove(struct shrinker *shrinker)
|
|
{
|
|
}
|
|
|
|
static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
|
|
struct mem_cgroup *memcg)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
|
|
struct mem_cgroup *memcg)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_MEMCG */
|
|
|
|
static long xchg_nr_deferred(struct shrinker *shrinker,
|
|
struct shrink_control *sc)
|
|
{
|
|
int nid = sc->nid;
|
|
|
|
if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
|
|
nid = 0;
|
|
|
|
if (sc->memcg &&
|
|
(shrinker->flags & SHRINKER_MEMCG_AWARE))
|
|
return xchg_nr_deferred_memcg(nid, shrinker,
|
|
sc->memcg);
|
|
|
|
return atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
|
|
}
|
|
|
|
|
|
static long add_nr_deferred(long nr, struct shrinker *shrinker,
|
|
struct shrink_control *sc)
|
|
{
|
|
int nid = sc->nid;
|
|
|
|
if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
|
|
nid = 0;
|
|
|
|
if (sc->memcg &&
|
|
(shrinker->flags & SHRINKER_MEMCG_AWARE))
|
|
return add_nr_deferred_memcg(nr, nid, shrinker,
|
|
sc->memcg);
|
|
|
|
return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]);
|
|
}
|
|
|
|
#define SHRINK_BATCH 128
|
|
|
|
static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
|
|
struct shrinker *shrinker, int priority)
|
|
{
|
|
unsigned long freed = 0;
|
|
unsigned long long delta;
|
|
long total_scan;
|
|
long freeable;
|
|
long nr;
|
|
long new_nr;
|
|
long batch_size = shrinker->batch ? shrinker->batch
|
|
: SHRINK_BATCH;
|
|
long scanned = 0, next_deferred;
|
|
|
|
freeable = shrinker->count_objects(shrinker, shrinkctl);
|
|
if (freeable == 0 || freeable == SHRINK_EMPTY)
|
|
return freeable;
|
|
|
|
/*
|
|
* copy the current shrinker scan count into a local variable
|
|
* and zero it so that other concurrent shrinker invocations
|
|
* don't also do this scanning work.
|
|
*/
|
|
nr = xchg_nr_deferred(shrinker, shrinkctl);
|
|
|
|
if (shrinker->seeks) {
|
|
delta = freeable >> priority;
|
|
delta *= 4;
|
|
do_div(delta, shrinker->seeks);
|
|
} else {
|
|
/*
|
|
* These objects don't require any IO to create. Trim
|
|
* them aggressively under memory pressure to keep
|
|
* them from causing refetches in the IO caches.
|
|
*/
|
|
delta = freeable / 2;
|
|
}
|
|
|
|
total_scan = nr >> priority;
|
|
total_scan += delta;
|
|
total_scan = min(total_scan, (2 * freeable));
|
|
|
|
trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
|
|
freeable, delta, total_scan, priority);
|
|
|
|
/*
|
|
* Normally, we should not scan less than batch_size objects in one
|
|
* pass to avoid too frequent shrinker calls, but if the slab has less
|
|
* than batch_size objects in total and we are really tight on memory,
|
|
* we will try to reclaim all available objects, otherwise we can end
|
|
* up failing allocations although there are plenty of reclaimable
|
|
* objects spread over several slabs with usage less than the
|
|
* batch_size.
|
|
*
|
|
* We detect the "tight on memory" situations by looking at the total
|
|
* number of objects we want to scan (total_scan). If it is greater
|
|
* than the total number of objects on slab (freeable), we must be
|
|
* scanning at high prio and therefore should try to reclaim as much as
|
|
* possible.
|
|
*/
|
|
while (total_scan >= batch_size ||
|
|
total_scan >= freeable) {
|
|
unsigned long ret;
|
|
unsigned long nr_to_scan = min(batch_size, total_scan);
|
|
|
|
shrinkctl->nr_to_scan = nr_to_scan;
|
|
shrinkctl->nr_scanned = nr_to_scan;
|
|
ret = shrinker->scan_objects(shrinker, shrinkctl);
|
|
if (ret == SHRINK_STOP)
|
|
break;
|
|
freed += ret;
|
|
|
|
count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned);
|
|
total_scan -= shrinkctl->nr_scanned;
|
|
scanned += shrinkctl->nr_scanned;
|
|
|
|
cond_resched();
|
|
}
|
|
|
|
/*
|
|
* The deferred work is increased by any new work (delta) that wasn't
|
|
* done, decreased by old deferred work that was done now.
|
|
*
|
|
* And it is capped to two times of the freeable items.
|
|
*/
|
|
next_deferred = max_t(long, (nr + delta - scanned), 0);
|
|
next_deferred = min(next_deferred, (2 * freeable));
|
|
|
|
/*
|
|
* move the unused scan count back into the shrinker in a
|
|
* manner that handles concurrent updates.
|
|
*/
|
|
new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl);
|
|
|
|
trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan);
|
|
return freed;
|
|
}
|
|
|
|
#ifdef CONFIG_MEMCG
|
|
static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
|
|
struct mem_cgroup *memcg, int priority)
|
|
{
|
|
struct shrinker_info *info;
|
|
unsigned long ret, freed = 0;
|
|
int offset, index = 0;
|
|
|
|
if (!mem_cgroup_online(memcg))
|
|
return 0;
|
|
|
|
if (!down_read_trylock(&shrinker_rwsem))
|
|
return 0;
|
|
|
|
info = shrinker_info_protected(memcg, nid);
|
|
if (unlikely(!info))
|
|
goto unlock;
|
|
|
|
for (; index < shrinker_id_to_index(info->map_nr_max); index++) {
|
|
struct shrinker_info_unit *unit;
|
|
|
|
unit = info->unit[index];
|
|
|
|
for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) {
|
|
struct shrink_control sc = {
|
|
.gfp_mask = gfp_mask,
|
|
.nid = nid,
|
|
.memcg = memcg,
|
|
};
|
|
struct shrinker *shrinker;
|
|
int shrinker_id = calc_shrinker_id(index, offset);
|
|
|
|
shrinker = idr_find(&shrinker_idr, shrinker_id);
|
|
if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) {
|
|
if (!shrinker)
|
|
clear_bit(offset, unit->map);
|
|
continue;
|
|
}
|
|
|
|
/* Call non-slab shrinkers even though kmem is disabled */
|
|
if (!memcg_kmem_online() &&
|
|
!(shrinker->flags & SHRINKER_NONSLAB))
|
|
continue;
|
|
|
|
ret = do_shrink_slab(&sc, shrinker, priority);
|
|
if (ret == SHRINK_EMPTY) {
|
|
clear_bit(offset, unit->map);
|
|
/*
|
|
* After the shrinker reported that it had no objects to
|
|
* free, but before we cleared the corresponding bit in
|
|
* the memcg shrinker map, a new object might have been
|
|
* added. To make sure, we have the bit set in this
|
|
* case, we invoke the shrinker one more time and reset
|
|
* the bit if it reports that it is not empty anymore.
|
|
* The memory barrier here pairs with the barrier in
|
|
* set_shrinker_bit():
|
|
*
|
|
* list_lru_add() shrink_slab_memcg()
|
|
* list_add_tail() clear_bit()
|
|
* <MB> <MB>
|
|
* set_bit() do_shrink_slab()
|
|
*/
|
|
smp_mb__after_atomic();
|
|
ret = do_shrink_slab(&sc, shrinker, priority);
|
|
if (ret == SHRINK_EMPTY)
|
|
ret = 0;
|
|
else
|
|
set_shrinker_bit(memcg, nid, shrinker_id);
|
|
}
|
|
freed += ret;
|
|
|
|
if (rwsem_is_contended(&shrinker_rwsem)) {
|
|
freed = freed ? : 1;
|
|
goto unlock;
|
|
}
|
|
}
|
|
}
|
|
unlock:
|
|
up_read(&shrinker_rwsem);
|
|
return freed;
|
|
}
|
|
#else /* !CONFIG_MEMCG */
|
|
static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
|
|
struct mem_cgroup *memcg, int priority)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_MEMCG */
|
|
|
|
/**
|
|
* shrink_slab - shrink slab caches
|
|
* @gfp_mask: allocation context
|
|
* @nid: node whose slab caches to target
|
|
* @memcg: memory cgroup whose slab caches to target
|
|
* @priority: the reclaim priority
|
|
*
|
|
* Call the shrink functions to age shrinkable caches.
|
|
*
|
|
* @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
|
|
* unaware shrinkers will receive a node id of 0 instead.
|
|
*
|
|
* @memcg specifies the memory cgroup to target. Unaware shrinkers
|
|
* are called only if it is the root cgroup.
|
|
*
|
|
* @priority is sc->priority, we take the number of objects and >> by priority
|
|
* in order to get the scan target.
|
|
*
|
|
* Returns the number of reclaimed slab objects.
|
|
*/
|
|
unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
|
|
int priority)
|
|
{
|
|
unsigned long ret, freed = 0;
|
|
struct shrinker *shrinker;
|
|
|
|
/*
|
|
* The root memcg might be allocated even though memcg is disabled
|
|
* via "cgroup_disable=memory" boot parameter. This could make
|
|
* mem_cgroup_is_root() return false, then just run memcg slab
|
|
* shrink, but skip global shrink. This may result in premature
|
|
* oom.
|
|
*/
|
|
if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
|
|
return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
|
|
|
|
if (!down_read_trylock(&shrinker_rwsem))
|
|
goto out;
|
|
|
|
list_for_each_entry(shrinker, &shrinker_list, list) {
|
|
struct shrink_control sc = {
|
|
.gfp_mask = gfp_mask,
|
|
.nid = nid,
|
|
.memcg = memcg,
|
|
};
|
|
|
|
ret = do_shrink_slab(&sc, shrinker, priority);
|
|
if (ret == SHRINK_EMPTY)
|
|
ret = 0;
|
|
freed += ret;
|
|
/*
|
|
* Bail out if someone want to register a new shrinker to
|
|
* prevent the registration from being stalled for long periods
|
|
* by parallel ongoing shrinking.
|
|
*/
|
|
if (rwsem_is_contended(&shrinker_rwsem)) {
|
|
freed = freed ? : 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
up_read(&shrinker_rwsem);
|
|
out:
|
|
cond_resched();
|
|
return freed;
|
|
}
|
|
|
|
struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...)
|
|
{
|
|
struct shrinker *shrinker;
|
|
unsigned int size;
|
|
va_list ap;
|
|
int err;
|
|
|
|
shrinker = kzalloc(sizeof(struct shrinker), GFP_KERNEL);
|
|
if (!shrinker)
|
|
return NULL;
|
|
|
|
va_start(ap, fmt);
|
|
err = shrinker_debugfs_name_alloc(shrinker, fmt, ap);
|
|
va_end(ap);
|
|
if (err)
|
|
goto err_name;
|
|
|
|
shrinker->flags = flags | SHRINKER_ALLOCATED;
|
|
shrinker->seeks = DEFAULT_SEEKS;
|
|
|
|
if (flags & SHRINKER_MEMCG_AWARE) {
|
|
err = shrinker_memcg_alloc(shrinker);
|
|
if (err == -ENOSYS) {
|
|
/* Memcg is not supported, fallback to non-memcg-aware shrinker. */
|
|
shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
|
|
goto non_memcg;
|
|
}
|
|
|
|
if (err)
|
|
goto err_flags;
|
|
|
|
return shrinker;
|
|
}
|
|
|
|
non_memcg:
|
|
/*
|
|
* The nr_deferred is available on per memcg level for memcg aware
|
|
* shrinkers, so only allocate nr_deferred in the following cases:
|
|
* - non-memcg-aware shrinkers
|
|
* - !CONFIG_MEMCG
|
|
* - memcg is disabled by kernel command line
|
|
*/
|
|
size = sizeof(*shrinker->nr_deferred);
|
|
if (flags & SHRINKER_NUMA_AWARE)
|
|
size *= nr_node_ids;
|
|
|
|
shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
|
|
if (!shrinker->nr_deferred)
|
|
goto err_flags;
|
|
|
|
return shrinker;
|
|
|
|
err_flags:
|
|
shrinker_debugfs_name_free(shrinker);
|
|
err_name:
|
|
kfree(shrinker);
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(shrinker_alloc);
|
|
|
|
void shrinker_register(struct shrinker *shrinker)
|
|
{
|
|
if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) {
|
|
pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker");
|
|
return;
|
|
}
|
|
|
|
down_write(&shrinker_rwsem);
|
|
list_add_tail(&shrinker->list, &shrinker_list);
|
|
shrinker->flags |= SHRINKER_REGISTERED;
|
|
shrinker_debugfs_add(shrinker);
|
|
up_write(&shrinker_rwsem);
|
|
}
|
|
EXPORT_SYMBOL_GPL(shrinker_register);
|
|
|
|
void shrinker_free(struct shrinker *shrinker)
|
|
{
|
|
struct dentry *debugfs_entry = NULL;
|
|
int debugfs_id;
|
|
|
|
if (!shrinker)
|
|
return;
|
|
|
|
down_write(&shrinker_rwsem);
|
|
if (shrinker->flags & SHRINKER_REGISTERED) {
|
|
list_del(&shrinker->list);
|
|
debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
|
|
shrinker->flags &= ~SHRINKER_REGISTERED;
|
|
}
|
|
|
|
shrinker_debugfs_name_free(shrinker);
|
|
|
|
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
|
|
shrinker_memcg_remove(shrinker);
|
|
up_write(&shrinker_rwsem);
|
|
|
|
if (debugfs_entry)
|
|
shrinker_debugfs_remove(debugfs_entry, debugfs_id);
|
|
|
|
kfree(shrinker->nr_deferred);
|
|
shrinker->nr_deferred = NULL;
|
|
|
|
kfree(shrinker);
|
|
}
|
|
EXPORT_SYMBOL_GPL(shrinker_free);
|