mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-18 10:56:14 +00:00
a05d4fd917
The net_cls controller controls the classid field of each socket which is associated with the cgroup. Because the classid is per-socket attribute, when a task migrates to another cgroup or the configured classid of the cgroup changes, the controller needs to walk all sockets and update the classid value, which was implemented by 3b13758f51de ("cgroups: Allow dynamically changing net_classid"). While the approach is not scalable, migrating tasks which have a lot of fds attached to them is rare and the cost is born by the ones initiating the operations. However, for simplicity, both the migration and classid config change paths call update_classid() which scans all fds of all tasks in the target css. This is an overkill for the migration path which only needs to cover a much smaller subset of tasks which are actually getting migrated in. On cgroup v1, this can lead to unexpected scalability issues when one tries to migrate a task or process into a net_cls cgroup which already contains a lot of fds. Even if the migration traget doesn't have many to get scanned, update_classid() ends up scanning all fds in the target cgroup which can be extremely numerous. Unfortunately, on cgroup v2 which doesn't use net_cls, the problem is even worse. Before bfc2cf6f61fc ("cgroup: call subsys->*attach() only for subsystems which are actually affected by migration"), cgroup core would call the ->css_attach callback even for controllers which don't see actual migration to a different css. As net_cls is always disabled but still mounted on cgroup v2, whenever a process is migrated on the cgroup v2 hierarchy, net_cls sees identity migration from root to root and cgroup core used to call ->css_attach callback for those. The net_cls ->css_attach ends up calling update_classid() on the root net_cls css to which all processes on the system belong to as the controller isn't used. This makes any cgroup v2 migration O(total_number_of_fds_on_the_system) which is horrible and easily leads to noticeable stalls triggering RCU stall warnings and so on. The worst symptom is already fixed in upstream by bfc2cf6f61fc ("cgroup: call subsys->*attach() only for subsystems which are actually affected by migration"); however, backporting that commit is too invasive and we want to avoid other cases too. This patch updates net_cls's cgrp_attach() to iterate fds of only the processes which are actually getting migrated. This removes the surprising migration cost which is dependent on the total number of fds in the target cgroup. As this leaves write_classid() the only user of update_classid(), open-code the helper into write_classid(). Reported-by: David Goode <dgoode@fb.com> Fixes: 3b13758f51de ("cgroups: Allow dynamically changing net_classid") Cc: stable@vger.kernel.org # v4.4+ Cc: Nina Schiff <ninasc@fb.com> Cc: David S. Miller <davem@davemloft.net> Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
131 lines
2.9 KiB
C
131 lines
2.9 KiB
C
/*
|
|
* net/core/netclassid_cgroup.c Classid Cgroupfs Handling
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
* Authors: Thomas Graf <tgraf@suug.ch>
|
|
*/
|
|
|
|
#include <linux/slab.h>
|
|
#include <linux/cgroup.h>
|
|
#include <linux/fdtable.h>
|
|
#include <linux/sched/task.h>
|
|
|
|
#include <net/cls_cgroup.h>
|
|
#include <net/sock.h>
|
|
|
|
static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
|
|
{
|
|
return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
|
|
}
|
|
|
|
struct cgroup_cls_state *task_cls_state(struct task_struct *p)
|
|
{
|
|
return css_cls_state(task_css_check(p, net_cls_cgrp_id,
|
|
rcu_read_lock_bh_held()));
|
|
}
|
|
EXPORT_SYMBOL_GPL(task_cls_state);
|
|
|
|
static struct cgroup_subsys_state *
|
|
cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
{
|
|
struct cgroup_cls_state *cs;
|
|
|
|
cs = kzalloc(sizeof(*cs), GFP_KERNEL);
|
|
if (!cs)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
return &cs->css;
|
|
}
|
|
|
|
static int cgrp_css_online(struct cgroup_subsys_state *css)
|
|
{
|
|
struct cgroup_cls_state *cs = css_cls_state(css);
|
|
struct cgroup_cls_state *parent = css_cls_state(css->parent);
|
|
|
|
if (parent)
|
|
cs->classid = parent->classid;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void cgrp_css_free(struct cgroup_subsys_state *css)
|
|
{
|
|
kfree(css_cls_state(css));
|
|
}
|
|
|
|
static int update_classid_sock(const void *v, struct file *file, unsigned n)
|
|
{
|
|
int err;
|
|
struct socket *sock = sock_from_file(file, &err);
|
|
|
|
if (sock) {
|
|
spin_lock(&cgroup_sk_update_lock);
|
|
sock_cgroup_set_classid(&sock->sk->sk_cgrp_data,
|
|
(unsigned long)v);
|
|
spin_unlock(&cgroup_sk_update_lock);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void cgrp_attach(struct cgroup_taskset *tset)
|
|
{
|
|
struct cgroup_subsys_state *css;
|
|
struct task_struct *p;
|
|
|
|
cgroup_taskset_for_each(p, css, tset) {
|
|
task_lock(p);
|
|
iterate_fd(p->files, 0, update_classid_sock,
|
|
(void *)(unsigned long)css_cls_state(css)->classid);
|
|
task_unlock(p);
|
|
}
|
|
}
|
|
|
|
static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
|
|
{
|
|
return css_cls_state(css)->classid;
|
|
}
|
|
|
|
static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
|
|
u64 value)
|
|
{
|
|
struct cgroup_cls_state *cs = css_cls_state(css);
|
|
struct css_task_iter it;
|
|
struct task_struct *p;
|
|
|
|
cgroup_sk_alloc_disable();
|
|
|
|
cs->classid = (u32)value;
|
|
|
|
css_task_iter_start(css, &it);
|
|
while ((p = css_task_iter_next(&it))) {
|
|
task_lock(p);
|
|
iterate_fd(p->files, 0, update_classid_sock,
|
|
(void *)(unsigned long)cs->classid);
|
|
task_unlock(p);
|
|
}
|
|
css_task_iter_end(&it);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct cftype ss_files[] = {
|
|
{
|
|
.name = "classid",
|
|
.read_u64 = read_classid,
|
|
.write_u64 = write_classid,
|
|
},
|
|
{ } /* terminate */
|
|
};
|
|
|
|
struct cgroup_subsys net_cls_cgrp_subsys = {
|
|
.css_alloc = cgrp_css_alloc,
|
|
.css_online = cgrp_css_online,
|
|
.css_free = cgrp_css_free,
|
|
.attach = cgrp_attach,
|
|
.legacy_cftypes = ss_files,
|
|
};
|