2017-06-13 17:18:03 -04:00
|
|
|
/*
|
|
|
|
* Debug controller
|
|
|
|
*
|
|
|
|
* WARNING: This controller is for cgroup core debugging only.
|
|
|
|
* Its interfaces are unstable and subject to changes at any time.
|
|
|
|
*/
|
2017-06-13 17:18:02 -04:00
|
|
|
#include <linux/ctype.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
|
|
|
|
#include "cgroup-internal.h"
|
|
|
|
|
|
|
|
static struct cgroup_subsys_state *
|
|
|
|
debug_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
|
|
{
|
|
|
|
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
|
|
|
|
|
|
|
|
if (!css)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
return css;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void debug_css_free(struct cgroup_subsys_state *css)
|
|
|
|
{
|
|
|
|
kfree(css);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* debug_taskcount_read - return the number of tasks in a cgroup.
|
|
|
|
* @cgrp: the cgroup in question
|
|
|
|
*/
|
|
|
|
static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
|
|
|
|
struct cftype *cft)
|
|
|
|
{
|
|
|
|
return cgroup_task_count(css->cgroup);
|
|
|
|
}
|
|
|
|
|
2017-06-13 17:18:04 -04:00
|
|
|
static int current_css_set_read(struct seq_file *seq, void *v)
|
2017-06-13 17:18:02 -04:00
|
|
|
{
|
2017-06-14 16:01:41 -04:00
|
|
|
struct kernfs_open_file *of = seq->private;
|
2017-06-13 17:18:04 -04:00
|
|
|
struct css_set *cset;
|
|
|
|
struct cgroup_subsys *ss;
|
|
|
|
struct cgroup_subsys_state *css;
|
|
|
|
int i, refcnt;
|
|
|
|
|
2017-06-14 16:01:41 -04:00
|
|
|
if (!cgroup_kn_lock_live(of->kn, false))
|
|
|
|
return -ENODEV;
|
|
|
|
|
2017-06-13 17:18:04 -04:00
|
|
|
spin_lock_irq(&css_set_lock);
|
|
|
|
rcu_read_lock();
|
|
|
|
cset = rcu_dereference(current->cgroups);
|
|
|
|
refcnt = refcount_read(&cset->refcount);
|
|
|
|
seq_printf(seq, "css_set %pK %d", cset, refcnt);
|
|
|
|
if (refcnt > cset->nr_tasks)
|
|
|
|
seq_printf(seq, " +%d", refcnt - cset->nr_tasks);
|
|
|
|
seq_puts(seq, "\n");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Print the css'es stored in the current css_set.
|
|
|
|
*/
|
|
|
|
for_each_subsys(ss, i) {
|
|
|
|
css = cset->subsys[ss->id];
|
|
|
|
if (!css)
|
|
|
|
continue;
|
|
|
|
seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name,
|
|
|
|
(unsigned long)css, css->id);
|
|
|
|
}
|
|
|
|
rcu_read_unlock();
|
|
|
|
spin_unlock_irq(&css_set_lock);
|
2017-06-14 16:01:41 -04:00
|
|
|
cgroup_kn_unlock(of->kn);
|
2017-06-13 17:18:04 -04:00
|
|
|
return 0;
|
2017-06-13 17:18:02 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
|
|
|
|
struct cftype *cft)
|
|
|
|
{
|
|
|
|
u64 count;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
count = refcount_read(&task_css_set(current)->refcount);
|
|
|
|
rcu_read_unlock();
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
|
|
|
|
{
|
|
|
|
struct cgrp_cset_link *link;
|
|
|
|
struct css_set *cset;
|
|
|
|
char *name_buf;
|
|
|
|
|
|
|
|
name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
|
|
|
|
if (!name_buf)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
spin_lock_irq(&css_set_lock);
|
|
|
|
rcu_read_lock();
|
|
|
|
cset = rcu_dereference(current->cgroups);
|
|
|
|
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
|
|
|
|
struct cgroup *c = link->cgrp;
|
|
|
|
|
|
|
|
cgroup_name(c, name_buf, NAME_MAX + 1);
|
|
|
|
seq_printf(seq, "Root %d group %s\n",
|
|
|
|
c->root->hierarchy_id, name_buf);
|
|
|
|
}
|
|
|
|
rcu_read_unlock();
|
|
|
|
spin_unlock_irq(&css_set_lock);
|
|
|
|
kfree(name_buf);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define MAX_TASKS_SHOWN_PER_CSS 25
|
|
|
|
static int cgroup_css_links_read(struct seq_file *seq, void *v)
|
|
|
|
{
|
|
|
|
struct cgroup_subsys_state *css = seq_css(seq);
|
|
|
|
struct cgrp_cset_link *link;
|
2017-07-21 11:14:51 -04:00
|
|
|
int dead_cnt = 0, extra_refs = 0, threaded_csets = 0;
|
2017-06-13 17:18:02 -04:00
|
|
|
|
|
|
|
spin_lock_irq(&css_set_lock);
|
2017-07-21 11:14:51 -04:00
|
|
|
|
2017-06-13 17:18:02 -04:00
|
|
|
list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
|
|
|
|
struct css_set *cset = link->cset;
|
|
|
|
struct task_struct *task;
|
|
|
|
int count = 0;
|
2017-06-13 17:18:04 -04:00
|
|
|
int refcnt = refcount_read(&cset->refcount);
|
2017-06-13 17:18:02 -04:00
|
|
|
|
2017-07-21 11:14:51 -04:00
|
|
|
/*
|
|
|
|
* Print out the proc_cset and threaded_cset relationship
|
|
|
|
* and highlight difference between refcount and task_count.
|
|
|
|
*/
|
|
|
|
seq_printf(seq, "css_set %pK", cset);
|
|
|
|
if (rcu_dereference_protected(cset->dom_cset, 1) != cset) {
|
|
|
|
threaded_csets++;
|
|
|
|
seq_printf(seq, "=>%pK", cset->dom_cset);
|
|
|
|
}
|
|
|
|
if (!list_empty(&cset->threaded_csets)) {
|
|
|
|
struct css_set *tcset;
|
|
|
|
int idx = 0;
|
|
|
|
|
|
|
|
list_for_each_entry(tcset, &cset->threaded_csets,
|
|
|
|
threaded_csets_node) {
|
|
|
|
seq_puts(seq, idx ? "," : "<=");
|
|
|
|
seq_printf(seq, "%pK", tcset);
|
|
|
|
idx++;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
seq_printf(seq, " %d", refcnt);
|
|
|
|
if (refcnt - cset->nr_tasks > 0) {
|
|
|
|
int extra = refcnt - cset->nr_tasks;
|
|
|
|
|
|
|
|
seq_printf(seq, " +%d", extra);
|
|
|
|
/*
|
|
|
|
* Take out the one additional reference in
|
|
|
|
* init_css_set.
|
|
|
|
*/
|
|
|
|
if (cset == &init_css_set)
|
|
|
|
extra--;
|
|
|
|
extra_refs += extra;
|
|
|
|
}
|
2017-06-13 17:18:04 -04:00
|
|
|
}
|
|
|
|
seq_puts(seq, "\n");
|
2017-06-13 17:18:02 -04:00
|
|
|
|
|
|
|
list_for_each_entry(task, &cset->tasks, cg_list) {
|
2017-06-13 17:18:04 -04:00
|
|
|
if (count++ <= MAX_TASKS_SHOWN_PER_CSS)
|
|
|
|
seq_printf(seq, " task %d\n",
|
|
|
|
task_pid_vnr(task));
|
2017-06-13 17:18:02 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
list_for_each_entry(task, &cset->mg_tasks, cg_list) {
|
2017-06-13 17:18:04 -04:00
|
|
|
if (count++ <= MAX_TASKS_SHOWN_PER_CSS)
|
|
|
|
seq_printf(seq, " task %d\n",
|
|
|
|
task_pid_vnr(task));
|
2017-06-13 17:18:02 -04:00
|
|
|
}
|
2017-06-13 17:18:04 -04:00
|
|
|
/* show # of overflowed tasks */
|
|
|
|
if (count > MAX_TASKS_SHOWN_PER_CSS)
|
|
|
|
seq_printf(seq, " ... (%d)\n",
|
|
|
|
count - MAX_TASKS_SHOWN_PER_CSS);
|
|
|
|
|
|
|
|
if (cset->dead) {
|
|
|
|
seq_puts(seq, " [dead]\n");
|
|
|
|
dead_cnt++;
|
|
|
|
}
|
|
|
|
|
|
|
|
WARN_ON(count != cset->nr_tasks);
|
2017-06-13 17:18:02 -04:00
|
|
|
}
|
|
|
|
spin_unlock_irq(&css_set_lock);
|
2017-06-13 17:18:04 -04:00
|
|
|
|
2017-07-21 11:14:51 -04:00
|
|
|
if (!dead_cnt && !extra_refs && !threaded_csets)
|
2017-06-13 17:18:04 -04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
seq_puts(seq, "\n");
|
2017-07-21 11:14:51 -04:00
|
|
|
if (threaded_csets)
|
|
|
|
seq_printf(seq, "threaded css_sets = %d\n", threaded_csets);
|
2017-06-13 17:18:04 -04:00
|
|
|
if (extra_refs)
|
|
|
|
seq_printf(seq, "extra references = %d\n", extra_refs);
|
|
|
|
if (dead_cnt)
|
|
|
|
seq_printf(seq, "dead css_sets = %d\n", dead_cnt);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cgroup_subsys_states_read(struct seq_file *seq, void *v)
|
|
|
|
{
|
2017-06-14 16:01:41 -04:00
|
|
|
struct kernfs_open_file *of = seq->private;
|
|
|
|
struct cgroup *cgrp;
|
2017-06-13 17:18:04 -04:00
|
|
|
struct cgroup_subsys *ss;
|
|
|
|
struct cgroup_subsys_state *css;
|
|
|
|
char pbuf[16];
|
|
|
|
int i;
|
|
|
|
|
2017-06-14 16:01:41 -04:00
|
|
|
cgrp = cgroup_kn_lock_live(of->kn, false);
|
|
|
|
if (!cgrp)
|
|
|
|
return -ENODEV;
|
|
|
|
|
2017-06-13 17:18:04 -04:00
|
|
|
for_each_subsys(ss, i) {
|
|
|
|
css = rcu_dereference_check(cgrp->subsys[ss->id], true);
|
|
|
|
if (!css)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
pbuf[0] = '\0';
|
|
|
|
|
|
|
|
/* Show the parent CSS if applicable*/
|
|
|
|
if (css->parent)
|
|
|
|
snprintf(pbuf, sizeof(pbuf) - 1, " P=%d",
|
|
|
|
css->parent->id);
|
|
|
|
seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name,
|
|
|
|
(unsigned long)css, css->id,
|
|
|
|
atomic_read(&css->online_cnt), pbuf);
|
|
|
|
}
|
2017-06-14 16:01:41 -04:00
|
|
|
|
|
|
|
cgroup_kn_unlock(of->kn);
|
2017-06-13 17:18:04 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-06-14 16:01:36 -04:00
|
|
|
static void cgroup_masks_read_one(struct seq_file *seq, const char *name,
|
|
|
|
u16 mask)
|
2017-06-13 17:18:04 -04:00
|
|
|
{
|
|
|
|
struct cgroup_subsys *ss;
|
2017-06-14 16:01:36 -04:00
|
|
|
int ssid;
|
|
|
|
bool first = true;
|
2017-06-13 17:18:04 -04:00
|
|
|
|
2017-06-14 16:01:36 -04:00
|
|
|
seq_printf(seq, "%-17s: ", name);
|
|
|
|
for_each_subsys(ss, ssid) {
|
|
|
|
if (!(mask & (1 << ssid)))
|
|
|
|
continue;
|
|
|
|
if (!first)
|
|
|
|
seq_puts(seq, ", ");
|
|
|
|
seq_puts(seq, ss->name);
|
|
|
|
first = false;
|
2017-06-13 17:18:04 -04:00
|
|
|
}
|
2017-06-14 16:01:36 -04:00
|
|
|
seq_putc(seq, '\n');
|
|
|
|
}
|
2017-06-13 17:18:04 -04:00
|
|
|
|
2017-06-14 16:01:36 -04:00
|
|
|
static int cgroup_masks_read(struct seq_file *seq, void *v)
|
|
|
|
{
|
2017-06-14 16:01:41 -04:00
|
|
|
struct kernfs_open_file *of = seq->private;
|
|
|
|
struct cgroup *cgrp;
|
|
|
|
|
|
|
|
cgrp = cgroup_kn_lock_live(of->kn, false);
|
|
|
|
if (!cgrp)
|
|
|
|
return -ENODEV;
|
2017-06-14 16:01:36 -04:00
|
|
|
|
|
|
|
cgroup_masks_read_one(seq, "subtree_control", cgrp->subtree_control);
|
|
|
|
cgroup_masks_read_one(seq, "subtree_ss_mask", cgrp->subtree_ss_mask);
|
2017-06-14 16:01:41 -04:00
|
|
|
|
|
|
|
cgroup_kn_unlock(of->kn);
|
2017-06-13 17:18:02 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
|
|
|
|
{
|
|
|
|
return (!cgroup_is_populated(css->cgroup) &&
|
|
|
|
!css_has_online_children(&css->cgroup->self));
|
|
|
|
}
|
|
|
|
|
2017-06-14 16:01:32 -04:00
|
|
|
static struct cftype debug_legacy_files[] = {
|
2017-06-13 17:18:02 -04:00
|
|
|
{
|
|
|
|
.name = "taskcount",
|
|
|
|
.read_u64 = debug_taskcount_read,
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "current_css_set",
|
2017-06-13 17:18:04 -04:00
|
|
|
.seq_show = current_css_set_read,
|
|
|
|
.flags = CFTYPE_ONLY_ON_ROOT,
|
2017-06-13 17:18:02 -04:00
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "current_css_set_refcount",
|
|
|
|
.read_u64 = current_css_set_refcount_read,
|
2017-06-13 17:18:04 -04:00
|
|
|
.flags = CFTYPE_ONLY_ON_ROOT,
|
2017-06-13 17:18:02 -04:00
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "current_css_set_cg_links",
|
|
|
|
.seq_show = current_css_set_cg_links_read,
|
2017-06-13 17:18:04 -04:00
|
|
|
.flags = CFTYPE_ONLY_ON_ROOT,
|
2017-06-13 17:18:02 -04:00
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "cgroup_css_links",
|
|
|
|
.seq_show = cgroup_css_links_read,
|
|
|
|
},
|
|
|
|
|
2017-06-13 17:18:04 -04:00
|
|
|
{
|
|
|
|
.name = "cgroup_subsys_states",
|
|
|
|
.seq_show = cgroup_subsys_states_read,
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "cgroup_masks",
|
|
|
|
.seq_show = cgroup_masks_read,
|
|
|
|
},
|
|
|
|
|
2017-06-13 17:18:02 -04:00
|
|
|
{
|
|
|
|
.name = "releasable",
|
|
|
|
.read_u64 = releasable_read,
|
|
|
|
},
|
|
|
|
|
|
|
|
{ } /* terminate */
|
|
|
|
};
|
|
|
|
|
2017-06-14 16:01:32 -04:00
|
|
|
static struct cftype debug_files[] = {
|
|
|
|
{
|
|
|
|
.name = "taskcount",
|
|
|
|
.read_u64 = debug_taskcount_read,
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "current_css_set",
|
|
|
|
.seq_show = current_css_set_read,
|
|
|
|
.flags = CFTYPE_ONLY_ON_ROOT,
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "current_css_set_refcount",
|
|
|
|
.read_u64 = current_css_set_refcount_read,
|
|
|
|
.flags = CFTYPE_ONLY_ON_ROOT,
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "current_css_set_cg_links",
|
|
|
|
.seq_show = current_css_set_cg_links_read,
|
|
|
|
.flags = CFTYPE_ONLY_ON_ROOT,
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "css_links",
|
|
|
|
.seq_show = cgroup_css_links_read,
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "csses",
|
|
|
|
.seq_show = cgroup_subsys_states_read,
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
.name = "masks",
|
|
|
|
.seq_show = cgroup_masks_read,
|
|
|
|
},
|
|
|
|
|
|
|
|
{ } /* terminate */
|
|
|
|
};
|
|
|
|
|
2017-06-13 17:18:02 -04:00
|
|
|
struct cgroup_subsys debug_cgrp_subsys = {
|
2017-06-13 17:18:04 -04:00
|
|
|
.css_alloc = debug_css_alloc,
|
|
|
|
.css_free = debug_css_free,
|
2017-06-14 16:01:32 -04:00
|
|
|
.legacy_cftypes = debug_legacy_files,
|
2017-06-13 17:18:02 -04:00
|
|
|
};
|
2017-06-14 16:01:32 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* On v2, debug is an implicit controller enabled by "cgroup_debug" boot
|
|
|
|
* parameter.
|
|
|
|
*/
|
|
|
|
static int __init enable_cgroup_debug(char *str)
|
|
|
|
{
|
|
|
|
debug_cgrp_subsys.dfl_cftypes = debug_files;
|
|
|
|
debug_cgrp_subsys.implicit_on_dfl = true;
|
cgroup: implement cgroup v2 thread support
This patch implements cgroup v2 thread support. The goal of the
thread mode is supporting hierarchical accounting and control at
thread granularity while staying inside the resource domain model
which allows coordination across different resource controllers and
handling of anonymous resource consumptions.
A cgroup is always created as a domain and can be made threaded by
writing to the "cgroup.type" file. When a cgroup becomes threaded, it
becomes a member of a threaded subtree which is anchored at the
closest ancestor which isn't threaded.
The threads of the processes which are in a threaded subtree can be
placed anywhere without being restricted by process granularity or
no-internal-process constraint. Note that the threads aren't allowed
to escape to a different threaded subtree. To be used inside a
threaded subtree, a controller should explicitly support threaded mode
and be able to handle internal competition in the way which is
appropriate for the resource.
The root of a threaded subtree, the nearest ancestor which isn't
threaded, is called the threaded domain and serves as the resource
domain for the whole subtree. This is the last cgroup where domain
controllers are operational and where all the domain-level resource
consumptions in the subtree are accounted. This allows threaded
controllers to operate at thread granularity when requested while
staying inside the scope of system-level resource distribution.
As the root cgroup is exempt from the no-internal-process constraint,
it can serve as both a threaded domain and a parent to normal cgroups,
so, unlike non-root cgroups, the root cgroup can have both domain and
threaded children.
Internally, in a threaded subtree, each css_set has its ->dom_cset
pointing to a matching css_set which belongs to the threaded domain.
This ensures that thread root level cgroup_subsys_state for all
threaded controllers are readily accessible for domain-level
operations.
This patch enables threaded mode for the pids and perf_events
controllers. Neither has to worry about domain-level resource
consumptions and it's enough to simply set the flag.
For more details on the interface and behavior of the thread mode,
please refer to the section 2-2-2 in Documentation/cgroup-v2.txt added
by this patch.
v5: - Dropped silly no-op ->dom_cgrp init from cgroup_create().
Spotted by Waiman.
- Documentation updated as suggested by Waiman.
- cgroup.type content slightly reformatted.
- Mark the debug controller threaded.
v4: - Updated to the general idea of marking specific cgroups
domain/threaded as suggested by PeterZ.
v3: - Dropped "join" and always make mixed children join the parent's
threaded subtree.
v2: - After discussions with Waiman, support for mixed thread mode is
added. This should address the issue that Peter pointed out
where any nesting should be avoided for thread subtrees while
coexisting with other domain cgroups.
- Enabling / disabling thread mode now piggy backs on the existing
control mask update mechanism.
- Bug fixes and cleanup.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
2017-07-21 11:14:51 -04:00
|
|
|
debug_cgrp_subsys.threaded = true;
|
2017-06-14 16:01:32 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
__setup("cgroup_debug", enable_cgroup_debug);
|