mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
503c358cf1
Kmem accounting of memcg is unusable now, because it lacks slab shrinker support. That means when we hit the limit we will get ENOMEM w/o any chance to recover. What we should do then is to call shrink_slab, which would reclaim old inode/dentry caches from this cgroup. This is what this patch set is intended to do. Basically, it does two things. First, it introduces the notion of per-memcg slab shrinker. A shrinker that wants to reclaim objects per cgroup should mark itself as SHRINKER_MEMCG_AWARE. Then it will be passed the memory cgroup to scan from in shrink_control->memcg. For such shrinkers shrink_slab iterates over the whole cgroup subtree under the target cgroup and calls the shrinker for each kmem-active memory cgroup. Secondly, this patch set makes the list_lru structure per-memcg. It's done transparently to list_lru users - everything they have to do is to tell list_lru_init that they want memcg-aware list_lru. Then the list_lru will automatically distribute objects among per-memcg lists basing on which cgroup the object is accounted to. This way to make FS shrinkers (icache, dcache) memcg-aware we only need to make them use memcg-aware list_lru, and this is what this patch set does. As before, this patch set only enables per-memcg kmem reclaim when the pressure goes from memory.limit, not from memory.kmem.limit. Handling memory.kmem.limit is going to be tricky due to GFP_NOFS allocations, and it is still unclear whether we will have this knob in the unified hierarchy. This patch (of 9): NUMA aware slab shrinkers use the list_lru structure to distribute objects coming from different NUMA nodes to different lists. Whenever such a shrinker needs to count or scan objects from a particular node, it issues commands like this: count = list_lru_count_node(lru, sc->nid); freed = list_lru_walk_node(lru, sc->nid, isolate_func, isolate_arg, &sc->nr_to_scan); where sc is an instance of the shrink_control structure passed to it from vmscan. To simplify this, let's add special list_lru functions to be used by shrinkers, list_lru_shrink_count() and list_lru_shrink_walk(), which consolidate the nid and nr_to_scan arguments in the shrink_control structure. This will also allow us to avoid patching shrinkers that use list_lru when we make shrink_slab() per-memcg - all we will have to do is extend the shrink_control structure to include the target memcg and make list_lru_shrink_{count,walk} handle this appropriately. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Suggested-by: Dave Chinner <david@fromorbit.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Greg Thelen <gthelen@google.com> Cc: Glauber Costa <glommer@gmail.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
154 lines
3.6 KiB
C
154 lines
3.6 KiB
C
/* fs/ internal definitions
|
|
*
|
|
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
struct super_block;
|
|
struct file_system_type;
|
|
struct linux_binprm;
|
|
struct path;
|
|
struct mount;
|
|
struct shrink_control;
|
|
|
|
/*
|
|
* block_dev.c
|
|
*/
|
|
#ifdef CONFIG_BLOCK
|
|
extern void __init bdev_cache_init(void);
|
|
|
|
extern int __sync_blockdev(struct block_device *bdev, int wait);
|
|
|
|
#else
|
|
static inline void bdev_cache_init(void)
|
|
{
|
|
}
|
|
|
|
static inline int __sync_blockdev(struct block_device *bdev, int wait)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* buffer.c
|
|
*/
|
|
extern void guard_bio_eod(int rw, struct bio *bio);
|
|
|
|
/*
|
|
* char_dev.c
|
|
*/
|
|
extern void __init chrdev_init(void);
|
|
|
|
/*
|
|
* namei.c
|
|
*/
|
|
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
|
|
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
|
|
const char *, unsigned int, struct path *);
|
|
|
|
/*
|
|
* namespace.c
|
|
*/
|
|
extern int copy_mount_options(const void __user *, unsigned long *);
|
|
extern char *copy_mount_string(const void __user *);
|
|
|
|
extern struct vfsmount *lookup_mnt(struct path *);
|
|
extern int finish_automount(struct vfsmount *, struct path *);
|
|
|
|
extern int sb_prepare_remount_readonly(struct super_block *);
|
|
|
|
extern void __init mnt_init(void);
|
|
|
|
extern int __mnt_want_write(struct vfsmount *);
|
|
extern int __mnt_want_write_file(struct file *);
|
|
extern void __mnt_drop_write(struct vfsmount *);
|
|
extern void __mnt_drop_write_file(struct file *);
|
|
|
|
/*
|
|
* fs_struct.c
|
|
*/
|
|
extern void chroot_fs_refs(const struct path *, const struct path *);
|
|
|
|
/*
|
|
* file_table.c
|
|
*/
|
|
extern struct file *get_empty_filp(void);
|
|
|
|
/*
|
|
* super.c
|
|
*/
|
|
extern int do_remount_sb(struct super_block *, int, void *, int);
|
|
extern bool grab_super_passive(struct super_block *sb);
|
|
extern struct dentry *mount_fs(struct file_system_type *,
|
|
int, const char *, void *);
|
|
extern struct super_block *user_get_super(dev_t);
|
|
|
|
/*
|
|
* open.c
|
|
*/
|
|
struct open_flags {
|
|
int open_flag;
|
|
umode_t mode;
|
|
int acc_mode;
|
|
int intent;
|
|
int lookup_flags;
|
|
};
|
|
extern struct file *do_filp_open(int dfd, struct filename *pathname,
|
|
const struct open_flags *op);
|
|
extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
|
|
const char *, const struct open_flags *);
|
|
|
|
extern long do_handle_open(int mountdirfd,
|
|
struct file_handle __user *ufh, int open_flag);
|
|
extern int open_check_o_direct(struct file *f);
|
|
|
|
/*
|
|
* inode.c
|
|
*/
|
|
extern spinlock_t inode_sb_list_lock;
|
|
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
|
|
extern void inode_add_lru(struct inode *inode);
|
|
|
|
/*
|
|
* fs-writeback.c
|
|
*/
|
|
extern void inode_wb_list_del(struct inode *inode);
|
|
|
|
extern long get_nr_dirty_inodes(void);
|
|
extern void evict_inodes(struct super_block *);
|
|
extern int invalidate_inodes(struct super_block *, bool);
|
|
|
|
/*
|
|
* dcache.c
|
|
*/
|
|
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
|
|
extern int d_set_mounted(struct dentry *dentry);
|
|
extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
|
|
|
|
/*
|
|
* read_write.c
|
|
*/
|
|
extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
|
|
|
|
/*
|
|
* pipe.c
|
|
*/
|
|
extern const struct file_operations pipefifo_fops;
|
|
|
|
/*
|
|
* fs_pin.c
|
|
*/
|
|
extern void sb_pin_kill(struct super_block *sb);
|
|
extern void mnt_pin_kill(struct mount *m);
|
|
|
|
/*
|
|
* fs/nsfs.c
|
|
*/
|
|
extern struct dentry_operations ns_dentry_operations;
|