linux-next/include/linux/shrinker.h
Qi Zheng badc28d492 mm: shrinkers: fix deadlock in shrinker debugfs
The debugfs_remove_recursive() is invoked by unregister_shrinker(), which
is holding the write lock of shrinker_rwsem.  It will waits for the
handler of debugfs file complete.  The handler also needs to hold the read
lock of shrinker_rwsem to do something.  So it may cause the following
deadlock:

 	CPU0				CPU1

debugfs_file_get()
shrinker_debugfs_count_show()/shrinker_debugfs_scan_write()

     				unregister_shrinker()
				--> down_write(&shrinker_rwsem);
				    debugfs_remove_recursive()
					// wait for (A)
				    --> wait_for_completion();

    // wait for (B)
--> down_read_killable(&shrinker_rwsem)
debugfs_file_put() -- (A)

				    up_write() -- (B)

The down_read_killable() can be killed, so that the above deadlock can be
recovered.  But it still requires an extra kill action, otherwise it will
block all subsequent shrinker-related operations, so it's better to fix
it.

[akpm@linux-foundation.org: fix CONFIG_SHRINKER_DEBUG=n stub]
Link: https://lkml.kernel.org/r/20230202105612.64641-1-zhengqi.arch@bytedance.com
Fixes: 5035ebc644 ("mm: shrinkers: introduce debugfs interface for memory shrinkers")
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-02-09 15:56:51 -08:00

129 lines
4.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SHRINKER_H
#define _LINUX_SHRINKER_H
#include <linux/atomic.h>
#include <linux/types.h>
/*
* This struct is used to pass information from page reclaim to the shrinkers.
* We consolidate the values for easier extension later.
*
* The 'gfpmask' refers to the allocation we are currently trying to
* fulfil.
*/
struct shrink_control {
gfp_t gfp_mask;
/* current node being shrunk (for NUMA aware shrinkers) */
int nid;
/*
* How many objects scan_objects should scan and try to reclaim.
* This is reset before every call, so it is safe for callees
* to modify.
*/
unsigned long nr_to_scan;
/*
* How many objects did scan_objects process?
* This defaults to nr_to_scan before every call, but the callee
* should track its actual progress.
*/
unsigned long nr_scanned;
/* current memcg being shrunk (for memcg aware shrinkers) */
struct mem_cgroup *memcg;
};
#define SHRINK_STOP (~0UL)
#define SHRINK_EMPTY (~0UL - 1)
/*
* A callback you can register to apply pressure to ageable caches.
*
* @count_objects should return the number of freeable items in the cache. If
* there are no objects to free, it should return SHRINK_EMPTY, while 0 is
* returned in cases of the number of freeable items cannot be determined
* or shrinker should skip this cache for this time (e.g., their number
* is below shrinkable limit). No deadlock checks should be done during the
* count callback - the shrinker relies on aggregating scan counts that couldn't
* be executed due to potential deadlocks to be run at a later call when the
* deadlock condition is no longer pending.
*
* @scan_objects will only be called if @count_objects returned a non-zero
* value for the number of freeable objects. The callout should scan the cache
* and attempt to free items from the cache. It should then return the number
* of objects freed during the scan, or SHRINK_STOP if progress cannot be made
* due to potential deadlocks. If SHRINK_STOP is returned, then no further
* attempts to call the @scan_objects will be made from the current reclaim
* context.
*
* @flags determine the shrinker abilities, like numa awareness
*/
struct shrinker {
unsigned long (*count_objects)(struct shrinker *,
struct shrink_control *sc);
unsigned long (*scan_objects)(struct shrinker *,
struct shrink_control *sc);
long batch; /* reclaim batch size, 0 = default */
int seeks; /* seeks to recreate an obj */
unsigned flags;
/* These are for internal use */
struct list_head list;
#ifdef CONFIG_MEMCG
/* ID in shrinker_idr */
int id;
#endif
#ifdef CONFIG_SHRINKER_DEBUG
int debugfs_id;
const char *name;
struct dentry *debugfs_entry;
#endif
/* objs pending delete, per node */
atomic_long_t *nr_deferred;
};
#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
/* Flags */
#define SHRINKER_REGISTERED (1 << 0)
#define SHRINKER_NUMA_AWARE (1 << 1)
#define SHRINKER_MEMCG_AWARE (1 << 2)
/*
* It just makes sense when the shrinker is also MEMCG_AWARE for now,
* non-MEMCG_AWARE shrinker should not have this flag set.
*/
#define SHRINKER_NONSLAB (1 << 3)
extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker,
const char *fmt, ...);
extern void register_shrinker_prepared(struct shrinker *shrinker);
extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker,
const char *fmt, ...);
extern void unregister_shrinker(struct shrinker *shrinker);
extern void free_prealloced_shrinker(struct shrinker *shrinker);
extern void synchronize_shrinkers(void);
#ifdef CONFIG_SHRINKER_DEBUG
extern int shrinker_debugfs_add(struct shrinker *shrinker);
extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker);
extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
const char *fmt, ...);
#else /* CONFIG_SHRINKER_DEBUG */
static inline int shrinker_debugfs_add(struct shrinker *shrinker)
{
return 0;
}
static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
{
return NULL;
}
static inline __printf(2, 3)
int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
{
return 0;
}
#endif /* CONFIG_SHRINKER_DEBUG */
#endif /* _LINUX_SHRINKER_H */