Merge branch 'for-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:
 "Nothing major. Two notable fixes are Li's second stab at fixing the
  long-standing race condition in the mount path and suppression of
  spurious warning from cgroup_get(). All other changes are trivial"

* 'for-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: mark cgroup_get() with __maybe_unused
  cgroup: avoid attaching a cgroup root to two different superblocks, take 2
  cgroup: fix spurious warnings on cgroup_is_dead() from cgroup_sk_alloc()
  cgroup: move cgroup_subsys_state parent field for cache locality
  cpuset: Remove cpuset_update_active_cpus()'s parameter.
  cgroup: switch to BUG_ON()
  cgroup: drop duplicate header nsproxy.h
  kernel: convert css_set.refcount from atomic_t to refcount_t
  kernel: convert cgroup_namespace.count from atomic_t to refcount_t
This commit is contained in:
Linus Torvalds 2017-05-01 13:52:24 -07:00
commit 9410091dd5
9 changed files with 67 additions and 41 deletions

View File

@ -13,6 +13,7 @@
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/percpu-refcount.h> #include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h> #include <linux/percpu-rwsem.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
@ -106,9 +107,6 @@ struct cgroup_subsys_state {
/* reference count - access via css_[try]get() and css_put() */ /* reference count - access via css_[try]get() and css_put() */
struct percpu_ref refcnt; struct percpu_ref refcnt;
/* PI: the parent css */
struct cgroup_subsys_state *parent;
/* siblings list anchored at the parent's ->children */ /* siblings list anchored at the parent's ->children */
struct list_head sibling; struct list_head sibling;
struct list_head children; struct list_head children;
@ -138,6 +136,12 @@ struct cgroup_subsys_state {
/* percpu_ref killing and RCU release */ /* percpu_ref killing and RCU release */
struct rcu_head rcu_head; struct rcu_head rcu_head;
struct work_struct destroy_work; struct work_struct destroy_work;
/*
* PI: the parent css. Placed here for cache proximity to following
* fields of the containing structure.
*/
struct cgroup_subsys_state *parent;
}; };
/* /*
@ -156,7 +160,7 @@ struct css_set {
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
/* reference count */ /* reference count */
atomic_t refcount; refcount_t refcount;
/* the default cgroup associated with this css_set */ /* the default cgroup associated with this css_set */
struct cgroup *dfl_cgrp; struct cgroup *dfl_cgrp;

View File

@ -17,11 +17,11 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/kernfs.h> #include <linux/kernfs.h>
#include <linux/jump_label.h> #include <linux/jump_label.h>
#include <linux/nsproxy.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/ns_common.h> #include <linux/ns_common.h>
#include <linux/nsproxy.h> #include <linux/nsproxy.h>
#include <linux/user_namespace.h> #include <linux/user_namespace.h>
#include <linux/refcount.h>
#include <linux/cgroup-defs.h> #include <linux/cgroup-defs.h>
@ -661,7 +661,7 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
#endif /* CONFIG_CGROUP_DATA */ #endif /* CONFIG_CGROUP_DATA */
struct cgroup_namespace { struct cgroup_namespace {
atomic_t count; refcount_t count;
struct ns_common ns; struct ns_common ns;
struct user_namespace *user_ns; struct user_namespace *user_ns;
struct ucounts *ucounts; struct ucounts *ucounts;
@ -696,12 +696,12 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
static inline void get_cgroup_ns(struct cgroup_namespace *ns) static inline void get_cgroup_ns(struct cgroup_namespace *ns)
{ {
if (ns) if (ns)
atomic_inc(&ns->count); refcount_inc(&ns->count);
} }
static inline void put_cgroup_ns(struct cgroup_namespace *ns) static inline void put_cgroup_ns(struct cgroup_namespace *ns)
{ {
if (ns && atomic_dec_and_test(&ns->count)) if (ns && refcount_dec_and_test(&ns->count))
free_cgroup_ns(ns); free_cgroup_ns(ns);
} }

View File

@ -42,7 +42,7 @@ static inline void cpuset_dec(void)
extern int cpuset_init(void); extern int cpuset_init(void);
extern void cpuset_init_smp(void); extern void cpuset_init_smp(void);
extern void cpuset_update_active_cpus(bool cpu_online); extern void cpuset_update_active_cpus(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
@ -155,7 +155,7 @@ static inline bool cpusets_enabled(void) { return false; }
static inline int cpuset_init(void) { return 0; } static inline int cpuset_init(void) { return 0; }
static inline void cpuset_init_smp(void) {} static inline void cpuset_init_smp(void) {}
static inline void cpuset_update_active_cpus(bool cpu_online) static inline void cpuset_update_active_cpus(void)
{ {
partition_sched_domains(1, NULL, NULL); partition_sched_domains(1, NULL, NULL);
} }

View File

@ -5,6 +5,7 @@
#include <linux/kernfs.h> #include <linux/kernfs.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/refcount.h>
/* /*
* A cgroup can be associated with multiple css_sets as different tasks may * A cgroup can be associated with multiple css_sets as different tasks may
@ -134,7 +135,7 @@ static inline void put_css_set(struct css_set *cset)
* can see it. Similar to atomic_dec_and_lock(), but for an * can see it. Similar to atomic_dec_and_lock(), but for an
* rwlock * rwlock
*/ */
if (atomic_add_unless(&cset->refcount, -1, 1)) if (refcount_dec_not_one(&cset->refcount))
return; return;
spin_lock_irqsave(&css_set_lock, flags); spin_lock_irqsave(&css_set_lock, flags);
@ -147,7 +148,7 @@ static inline void put_css_set(struct css_set *cset)
*/ */
static inline void get_css_set(struct css_set *cset) static inline void get_css_set(struct css_set *cset)
{ {
atomic_inc(&cset->refcount); refcount_inc(&cset->refcount);
} }
bool cgroup_ssid_enabled(int ssid); bool cgroup_ssid_enabled(int ssid);
@ -163,7 +164,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
void cgroup_free_root(struct cgroup_root *root); void cgroup_free_root(struct cgroup_root *root);
void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags);
int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
struct cgroup_root *root, unsigned long magic, struct cgroup_root *root, unsigned long magic,

View File

@ -346,7 +346,7 @@ static int cgroup_task_count(const struct cgroup *cgrp)
spin_lock_irq(&css_set_lock); spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link) list_for_each_entry(link, &cgrp->cset_links, cset_link)
count += atomic_read(&link->cset->refcount); count += refcount_read(&link->cset->refcount);
spin_unlock_irq(&css_set_lock); spin_unlock_irq(&css_set_lock);
return count; return count;
} }
@ -1072,6 +1072,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
struct dentry *dentry; struct dentry *dentry;
int i, ret; int i, ret;
bool new_root = false;
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
@ -1181,10 +1182,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
ret = -ENOMEM; ret = -ENOMEM;
goto out_unlock; goto out_unlock;
} }
new_root = true;
init_cgroup_root(root, &opts); init_cgroup_root(root, &opts);
ret = cgroup_setup_root(root, opts.subsys_mask); ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD);
if (ret) if (ret)
cgroup_free_root(root); cgroup_free_root(root);
@ -1200,6 +1202,18 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
CGROUP_SUPER_MAGIC, ns); CGROUP_SUPER_MAGIC, ns);
/*
* There's a race window after we release cgroup_mutex and before
* allocating a superblock. Make sure a concurrent process won't
* be able to re-use the root during this window by delaying the
* initialization of root refcnt.
*/
if (new_root) {
mutex_lock(&cgroup_mutex);
percpu_ref_reinit(&root->cgrp.self.refcnt);
mutex_unlock(&cgroup_mutex);
}
/* /*
* If @pinned_sb, we're reusing an existing root and holding an * If @pinned_sb, we're reusing an existing root and holding an
* extra ref on its sb. Mount is complete. Put the extra ref. * extra ref on its sb. Mount is complete. Put the extra ref.
@ -1286,7 +1300,7 @@ static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
u64 count; u64 count;
rcu_read_lock(); rcu_read_lock();
count = atomic_read(&task_css_set(current)->refcount); count = refcount_read(&task_css_set(current)->refcount);
rcu_read_unlock(); rcu_read_unlock();
return count; return count;
} }

View File

@ -189,7 +189,7 @@ static u16 have_canfork_callback __read_mostly;
/* cgroup namespace for init task */ /* cgroup namespace for init task */
struct cgroup_namespace init_cgroup_ns = { struct cgroup_namespace init_cgroup_ns = {
.count = { .counter = 2, }, .count = REFCOUNT_INIT(2),
.user_ns = &init_user_ns, .user_ns = &init_user_ns,
.ns.ops = &cgroupns_operations, .ns.ops = &cgroupns_operations,
.ns.inum = PROC_CGROUP_INIT_INO, .ns.inum = PROC_CGROUP_INIT_INO,
@ -436,7 +436,12 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
return css; return css;
} }
static void cgroup_get(struct cgroup *cgrp) static void __maybe_unused cgroup_get(struct cgroup *cgrp)
{
css_get(&cgrp->self);
}
static void cgroup_get_live(struct cgroup *cgrp)
{ {
WARN_ON_ONCE(cgroup_is_dead(cgrp)); WARN_ON_ONCE(cgroup_is_dead(cgrp));
css_get(&cgrp->self); css_get(&cgrp->self);
@ -554,7 +559,7 @@ EXPORT_SYMBOL_GPL(of_css);
* haven't been created. * haven't been created.
*/ */
struct css_set init_css_set = { struct css_set init_css_set = {
.refcount = ATOMIC_INIT(1), .refcount = REFCOUNT_INIT(1),
.tasks = LIST_HEAD_INIT(init_css_set.tasks), .tasks = LIST_HEAD_INIT(init_css_set.tasks),
.mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
.task_iters = LIST_HEAD_INIT(init_css_set.task_iters), .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
@ -724,7 +729,7 @@ void put_css_set_locked(struct css_set *cset)
lockdep_assert_held(&css_set_lock); lockdep_assert_held(&css_set_lock);
if (!atomic_dec_and_test(&cset->refcount)) if (!refcount_dec_and_test(&cset->refcount))
return; return;
/* This css_set is dead. unlink it and release cgroup and css refs */ /* This css_set is dead. unlink it and release cgroup and css refs */
@ -932,7 +937,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
list_add_tail(&link->cgrp_link, &cset->cgrp_links); list_add_tail(&link->cgrp_link, &cset->cgrp_links);
if (cgroup_parent(cgrp)) if (cgroup_parent(cgrp))
cgroup_get(cgrp); cgroup_get_live(cgrp);
} }
/** /**
@ -977,7 +982,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
return NULL; return NULL;
} }
atomic_set(&cset->refcount, 1); refcount_set(&cset->refcount, 1);
INIT_LIST_HEAD(&cset->tasks); INIT_LIST_HEAD(&cset->tasks);
INIT_LIST_HEAD(&cset->mg_tasks); INIT_LIST_HEAD(&cset->mg_tasks);
INIT_LIST_HEAD(&cset->task_iters); INIT_LIST_HEAD(&cset->task_iters);
@ -1640,7 +1645,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
} }
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
{ {
LIST_HEAD(tmp_links); LIST_HEAD(tmp_links);
struct cgroup *root_cgrp = &root->cgrp; struct cgroup *root_cgrp = &root->cgrp;
@ -1656,8 +1661,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
root_cgrp->id = ret; root_cgrp->id = ret;
root_cgrp->ancestor_ids[0] = ret; root_cgrp->ancestor_ids[0] = ret;
ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0, ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
GFP_KERNEL); ref_flags, GFP_KERNEL);
if (ret) if (ret)
goto out; goto out;
@ -1802,7 +1807,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
cgrp_dfl_visible = true; cgrp_dfl_visible = true;
cgroup_get(&cgrp_dfl_root.cgrp); cgroup_get_live(&cgrp_dfl_root.cgrp);
dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
CGROUP2_SUPER_MAGIC, ns); CGROUP2_SUPER_MAGIC, ns);
@ -2576,7 +2581,7 @@ void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
if (!css || !percpu_ref_is_dying(&css->refcnt)) if (!css || !percpu_ref_is_dying(&css->refcnt))
continue; continue;
cgroup_get(dsct); cgroup_get_live(dsct);
prepare_to_wait(&dsct->offline_waitq, &wait, prepare_to_wait(&dsct->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
@ -3947,7 +3952,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
{ {
lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&cgroup_mutex);
cgroup_get(cgrp); cgroup_get_live(cgrp);
memset(css, 0, sizeof(*css)); memset(css, 0, sizeof(*css));
css->cgroup = cgrp; css->cgroup = cgrp;
@ -4123,7 +4128,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
/* allocation complete, commit to creation */ /* allocation complete, commit to creation */
list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
atomic_inc(&root->nr_cgrps); atomic_inc(&root->nr_cgrps);
cgroup_get(parent); cgroup_get_live(parent);
/* /*
* @cgrp is now fully operational. If something fails after this * @cgrp is now fully operational. If something fails after this
@ -4513,7 +4518,7 @@ int __init cgroup_init(void)
hash_add(css_set_table, &init_css_set.hlist, hash_add(css_set_table, &init_css_set.hlist,
css_set_hash(init_css_set.subsys)); css_set_hash(init_css_set.subsys));
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0));
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
@ -4947,7 +4952,7 @@ struct cgroup *cgroup_get_from_path(const char *path)
if (kn) { if (kn) {
if (kernfs_type(kn) == KERNFS_DIR) { if (kernfs_type(kn) == KERNFS_DIR) {
cgrp = kn->priv; cgrp = kn->priv;
cgroup_get(cgrp); cgroup_get_live(cgrp);
} else { } else {
cgrp = ERR_PTR(-ENOTDIR); cgrp = ERR_PTR(-ENOTDIR);
} }
@ -5027,6 +5032,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
/* Socket clone path */ /* Socket clone path */
if (skcd->val) { if (skcd->val) {
/*
* We might be cloning a socket which is left in an empty
* cgroup and the cgroup might have already been rmdir'd.
* Don't use cgroup_get_live().
*/
cgroup_get(sock_cgroup_ptr(skcd)); cgroup_get(sock_cgroup_ptr(skcd));
return; return;
} }

View File

@ -2121,10 +2121,8 @@ int __init cpuset_init(void)
{ {
int err = 0; int err = 0;
if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)) BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
BUG(); BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
BUG();
cpumask_setall(top_cpuset.cpus_allowed); cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed); nodes_setall(top_cpuset.mems_allowed);
@ -2139,8 +2137,7 @@ int __init cpuset_init(void)
if (err < 0) if (err < 0)
return err; return err;
if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)) BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
BUG();
return 0; return 0;
} }
@ -2354,7 +2351,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
rebuild_sched_domains(); rebuild_sched_domains();
} }
void cpuset_update_active_cpus(bool cpu_online) void cpuset_update_active_cpus(void)
{ {
/* /*
* We're inside cpu hotplug critical region which usually nests * We're inside cpu hotplug critical region which usually nests

View File

@ -31,7 +31,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
kfree(new_ns); kfree(new_ns);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
atomic_set(&new_ns->count, 1); refcount_set(&new_ns->count, 1);
new_ns->ns.ops = &cgroupns_operations; new_ns->ns.ops = &cgroupns_operations;
return new_ns; return new_ns;
} }

View File

@ -5732,7 +5732,7 @@ static void cpuset_cpu_active(void)
* cpuset configurations. * cpuset configurations.
*/ */
} }
cpuset_update_active_cpus(true); cpuset_update_active_cpus();
} }
static int cpuset_cpu_inactive(unsigned int cpu) static int cpuset_cpu_inactive(unsigned int cpu)
@ -5755,7 +5755,7 @@ static int cpuset_cpu_inactive(unsigned int cpu)
if (overflow) if (overflow)
return -EBUSY; return -EBUSY;
cpuset_update_active_cpus(false); cpuset_update_active_cpus();
} else { } else {
num_cpus_frozen++; num_cpus_frozen++;
partition_sched_domains(1, NULL, NULL); partition_sched_domains(1, NULL, NULL);