diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b21881e1ea08..9c300ad9a911 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -156,6 +156,8 @@ enum { * specified at mount time and thus is implemented here. */ CGRP_CPUSET_CLONE_CHILDREN, + /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */ + CGRP_SANE_BEHAVIOR, }; struct cgroup_name { @@ -243,6 +245,37 @@ struct cgroup { /* cgroupfs_root->flags */ enum { + /* + * Unfortunately, cgroup core and various controllers are riddled + * with idiosyncrasies and pointless options. The following flag, + * when set, will force sane behavior - some options are forced on, + * others are disallowed, and some controllers will change their + * hierarchical or other behaviors. + * + * The set of behaviors affected by this flag are still being + * determined and developed and the mount option for this flag is + * prefixed with __DEVEL__. The prefix will be dropped once we + * reach the point where all behaviors are compatible with the + * planned unified hierarchy, which will automatically turn on this + * flag. + * + * The followings are the behaviors currently affected this flag. + * + * - Mount options "noprefix" and "clone_children" are disallowed. + * Also, cgroupfs file cgroup.clone_children is not created. + * + * - When mounting an existing superblock, mount options should + * match. + * + * - Remount is disallowed. + * + * The followings are planned changes. + * + * - release_agent will be disallowed once replacement notification + * mechanism is implemented. + */ + CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), + CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ }; @@ -360,6 +393,7 @@ struct cgroup_map_cb { /* cftype->flags */ #define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ #define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ +#define CFTYPE_INSANE (1U << 2) /* don't create if sane_behavior */ #define MAX_CFTYPE_NAME 64 @@ -486,6 +520,15 @@ struct cgroup_scanner { void *data; }; +/* + * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This + * function can be called as long as @cgrp is accessible. + */ +static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) +{ + return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; +} + /* Caller should hold rcu_read_lock() */ static inline const char *cgroup_name(const struct cgroup *cgrp) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 8b8eb7c168ff..67804590d4b0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1080,6 +1080,8 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) mutex_lock(&cgroup_root_mutex); for_each_subsys(root, ss) seq_printf(seq, ",%s", ss->name); + if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) + seq_puts(seq, ",sane_behavior"); if (root->flags & CGRP_ROOT_NOPREFIX) seq_puts(seq, ",noprefix"); if (root->flags & CGRP_ROOT_XATTR) @@ -1144,6 +1146,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) all_ss = true; continue; } + if (!strcmp(token, "__DEVEL__sane_behavior")) { + opts->flags |= CGRP_ROOT_SANE_BEHAVIOR; + continue; + } if (!strcmp(token, "noprefix")) { opts->flags |= CGRP_ROOT_NOPREFIX; continue; @@ -1231,6 +1237,20 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) /* Consistency checks */ + if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) { + pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n"); + + if (opts->flags & CGRP_ROOT_NOPREFIX) { + pr_err("cgroup: sane_behavior: noprefix is not allowed\n"); + return -EINVAL; + } + + if (opts->cpuset_clone_children) { + pr_err("cgroup: sane_behavior: clone_children is not allowed\n"); + return -EINVAL; + } + } + /* * Option noprefix was introduced just for backward compatibility * with the old cpuset, so we allow noprefix only if mounting just @@ -1307,6 +1327,11 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) struct cgroup_sb_opts opts; unsigned long added_mask, removed_mask; + if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) { + pr_err("cgroup: sane_behavior: remount is not allowed\n"); + return -EINVAL; + } + mutex_lock(&cgrp->dentry->d_inode->i_mutex); mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_root_mutex); @@ -1657,6 +1682,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, * any) is not needed */ cgroup_drop_root(opts.new_root); + + if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) && + root->flags != opts.flags) { + pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); + ret = -EINVAL; + goto drop_new_super; + } + /* no subsys rebinding, so refcounts don't change */ drop_parsed_module_refcounts(opts.subsys_mask); } @@ -2200,6 +2233,13 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft, return 0; } +static int cgroup_sane_behavior_show(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *seq) +{ + seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp)); + return 0; +} + /* A buffer size big enough for numbers or short strings */ #define CGROUP_LOCAL_BUFFER_SIZE 64 @@ -2681,6 +2721,8 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, for (cft = cfts; cft->name[0] != '\0'; cft++) { /* does cft->flags tell us to skip this file on @cgrp? */ + if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp)) + continue; if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) continue; if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) @@ -3918,9 +3960,15 @@ static struct cftype files[] = { }, { .name = "cgroup.clone_children", + .flags = CFTYPE_INSANE, .read_u64 = cgroup_clone_children_read, .write_u64 = cgroup_clone_children_write, }, + { + .name = "cgroup.sane_behavior", + .flags = CFTYPE_ONLY_ON_ROOT, + .read_seq_string = cgroup_sane_behavior_show, + }, { .name = "release_agent", .flags = CFTYPE_ONLY_ON_ROOT,