linux-next/include/linux/shrinker.h
Joan Bruguera Micó 26e239b37e mm: shrinkers: fix race condition on debugfs cleanup
When something registers and unregisters many shrinkers, such as:
    for x in $(seq 10000); do unshare -Ui true; done

Sometimes the following error is printed to the kernel log:
    debugfs: Directory '...' with parent 'shrinker' already present!

This occurs since commit badc28d4924b ("mm: shrinkers: fix deadlock in
shrinker debugfs") / v6.2: Since the call to `debugfs_remove_recursive`
was moved outside the `shrinker_rwsem`/`shrinker_mutex` lock, but the call
to `ida_free` stayed inside, a newly registered shrinker can be
re-assigned that ID and attempt to create the debugfs directory before the
directory from the previous shrinker has been removed.

The locking changes in commit f95bdb700bc6 ("mm: vmscan: make global slab
shrink lockless") made the race condition more likely, though it existed
before then.

Commit badc28d4924b ("mm: shrinkers: fix deadlock in shrinker debugfs")
could be reverted since the issue is addressed should no longer occur
since the count and scan operations are lockless since commit 20cd1892fcc3
("mm: shrinkers: make count and scan in shrinker debugfs lockless"). 
However, since this is a contended lock, prefer instead moving `ida_free`
outside the lock to avoid the race.

Link: https://lkml.kernel.org/r/20230503013232.299211-1-joanbrugueram@gmail.com
Fixes: badc28d4924b ("mm: shrinkers: fix deadlock in shrinker debugfs")
Signed-off-by: Joan Bruguera Micó <joanbrugueram@gmail.com>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-05-17 15:24:33 -07:00

138 lines
4.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SHRINKER_H
#define _LINUX_SHRINKER_H
#include <linux/atomic.h>
#include <linux/types.h>
/*
* This struct is used to pass information from page reclaim to the shrinkers.
* We consolidate the values for easier extension later.
*
* The 'gfpmask' refers to the allocation we are currently trying to
* fulfil.
*/
struct shrink_control {
gfp_t gfp_mask;
/* current node being shrunk (for NUMA aware shrinkers) */
int nid;
/*
* How many objects scan_objects should scan and try to reclaim.
* This is reset before every call, so it is safe for callees
* to modify.
*/
unsigned long nr_to_scan;
/*
* How many objects did scan_objects process?
* This defaults to nr_to_scan before every call, but the callee
* should track its actual progress.
*/
unsigned long nr_scanned;
/* current memcg being shrunk (for memcg aware shrinkers) */
struct mem_cgroup *memcg;
};
#define SHRINK_STOP (~0UL)
#define SHRINK_EMPTY (~0UL - 1)
/*
* A callback you can register to apply pressure to ageable caches.
*
* @count_objects should return the number of freeable items in the cache. If
* there are no objects to free, it should return SHRINK_EMPTY, while 0 is
* returned in cases of the number of freeable items cannot be determined
* or shrinker should skip this cache for this time (e.g., their number
* is below shrinkable limit). No deadlock checks should be done during the
* count callback - the shrinker relies on aggregating scan counts that couldn't
* be executed due to potential deadlocks to be run at a later call when the
* deadlock condition is no longer pending.
*
* @scan_objects will only be called if @count_objects returned a non-zero
* value for the number of freeable objects. The callout should scan the cache
* and attempt to free items from the cache. It should then return the number
* of objects freed during the scan, or SHRINK_STOP if progress cannot be made
* due to potential deadlocks. If SHRINK_STOP is returned, then no further
* attempts to call the @scan_objects will be made from the current reclaim
* context.
*
* @flags determine the shrinker abilities, like numa awareness
*/
struct shrinker {
unsigned long (*count_objects)(struct shrinker *,
struct shrink_control *sc);
unsigned long (*scan_objects)(struct shrinker *,
struct shrink_control *sc);
long batch; /* reclaim batch size, 0 = default */
int seeks; /* seeks to recreate an obj */
unsigned flags;
/* These are for internal use */
struct list_head list;
#ifdef CONFIG_MEMCG
/* ID in shrinker_idr */
int id;
#endif
#ifdef CONFIG_SHRINKER_DEBUG
int debugfs_id;
const char *name;
struct dentry *debugfs_entry;
#endif
/* objs pending delete, per node */
atomic_long_t *nr_deferred;
};
#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
/* Flags */
#define SHRINKER_REGISTERED (1 << 0)
#define SHRINKER_NUMA_AWARE (1 << 1)
#define SHRINKER_MEMCG_AWARE (1 << 2)
/*
* It just makes sense when the shrinker is also MEMCG_AWARE for now,
* non-MEMCG_AWARE shrinker should not have this flag set.
*/
#define SHRINKER_NONSLAB (1 << 3)
extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker,
const char *fmt, ...);
extern void register_shrinker_prepared(struct shrinker *shrinker);
extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker,
const char *fmt, ...);
extern void unregister_shrinker(struct shrinker *shrinker);
extern void free_prealloced_shrinker(struct shrinker *shrinker);
extern void synchronize_shrinkers(void);
#ifdef CONFIG_SHRINKER_DEBUG
extern int shrinker_debugfs_add(struct shrinker *shrinker);
extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
int *debugfs_id);
extern void shrinker_debugfs_remove(struct dentry *debugfs_entry,
int debugfs_id);
extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
const char *fmt, ...);
#else /* CONFIG_SHRINKER_DEBUG */
static inline int shrinker_debugfs_add(struct shrinker *shrinker)
{
return 0;
}
static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
int *debugfs_id)
{
*debugfs_id = -1;
return NULL;
}
static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
int debugfs_id)
{
}
static inline __printf(2, 3)
int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
{
return 0;
}
#endif /* CONFIG_SHRINKER_DEBUG */
#endif /* _LINUX_SHRINKER_H */