mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 15:29:16 +00:00
cgroup: Fixes for v6.3-rc6
* Fix several cpuset bugs including one where it wasn't applying the target cgroup when tasks are created with CLONE_INTO_CGROUP. * Fix inversed locking order in cgroup1 freezer implementation. * Fix garbage cpu.stat::core_sched.forceidle_usec reporting in the root cgroup. This is a relatively big pull request this late in the cycle but the major contributor is the above mentioned cpuset bug which is rather significant. -----BEGIN PGP SIGNATURE----- iIQEABYIACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCZDiJuw4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGbUfAQCLYhxijWvCpRYlQ3mfd1pgyvWNB90o4lnFkltz D0iSpwD/SOL5zwkR7WBeejJDKVIsezPpz3SZvxzzKMSk1VODkgo= =eOCG -----END PGP SIGNATURE----- Merge tag 'cgroup-for-6.3-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup Pull cgroup fixes from Tejun Heo: "This is a relatively big pull request this late in the cycle but the major contributor is the cpuset bug which is rather significant: - Fix several cpuset bugs including one where it wasn't applying the target cgroup when tasks are created with CLONE_INTO_CGROUP With a few smaller fixes: - Fix inversed locking order in cgroup1 freezer implementation - Fix garbage cpu.stat::core_sched.forceidle_usec reporting in the root cgroup" * tag 'cgroup-for-6.3-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup/cpuset: Make cpuset_attach_task() skip subpartitions CPUs for top_cpuset cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly cgroup/cpuset: Wake up cpuset_attach_wq tasks in cpuset_cancel_attach() cgroup,freezer: hold cpu_hotplug_lock before freezer_mutex cgroup/cpuset: Fix partition root's cpuset.cpus update bug cgroup: fix display of forceidle time at root
This commit is contained in:
commit
44149752e9
@ -1513,7 +1513,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
|
||||
spin_unlock_irq(&callback_lock);
|
||||
|
||||
if (adding || deleting)
|
||||
update_tasks_cpumask(parent, tmp->new_cpus);
|
||||
update_tasks_cpumask(parent, tmp->addmask);
|
||||
|
||||
/*
|
||||
* Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary.
|
||||
@ -1770,10 +1770,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
||||
/*
|
||||
* Use the cpumasks in trialcs for tmpmasks when they are pointers
|
||||
* to allocated cpumasks.
|
||||
*
|
||||
* Note that update_parent_subparts_cpumask() uses only addmask &
|
||||
* delmask, but not new_cpus.
|
||||
*/
|
||||
tmp.addmask = trialcs->subparts_cpus;
|
||||
tmp.delmask = trialcs->effective_cpus;
|
||||
tmp.new_cpus = trialcs->cpus_allowed;
|
||||
tmp.new_cpus = NULL;
|
||||
#endif
|
||||
|
||||
retval = validate_change(cs, trialcs);
|
||||
@ -1838,6 +1841,11 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
||||
}
|
||||
spin_unlock_irq(&callback_lock);
|
||||
|
||||
#ifdef CONFIG_CPUMASK_OFFSTACK
|
||||
/* Now trialcs->cpus_allowed is available */
|
||||
tmp.new_cpus = trialcs->cpus_allowed;
|
||||
#endif
|
||||
|
||||
/* effective_cpus will be updated here */
|
||||
update_cpumasks_hier(cs, &tmp, false);
|
||||
|
||||
@ -2445,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp)
|
||||
|
||||
static struct cpuset *cpuset_attach_old_cs;
|
||||
|
||||
/*
|
||||
* Check to see if a cpuset can accept a new task
|
||||
* For v1, cpus_allowed and mems_allowed can't be empty.
|
||||
* For v2, effective_cpus can't be empty.
|
||||
* Note that in v1, effective_cpus = cpus_allowed.
|
||||
*/
|
||||
static int cpuset_can_attach_check(struct cpuset *cs)
|
||||
{
|
||||
if (cpumask_empty(cs->effective_cpus) ||
|
||||
(!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
|
||||
return -ENOSPC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
|
||||
static int cpuset_can_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
@ -2459,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
|
||||
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
|
||||
/* allow moving tasks into an empty cpuset if on default hierarchy */
|
||||
ret = -ENOSPC;
|
||||
if (!is_in_v2_mode() &&
|
||||
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Task cannot be moved to a cpuset with empty effective cpus.
|
||||
*/
|
||||
if (cpumask_empty(cs->effective_cpus))
|
||||
/* Check to see if task is allowed in the cpuset */
|
||||
ret = cpuset_can_attach_check(cs);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
cgroup_taskset_for_each(task, css, tset) {
|
||||
@ -2485,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
|
||||
* changes which zero cpus/mems_allowed.
|
||||
*/
|
||||
cs->attach_in_progress++;
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
return ret;
|
||||
@ -2494,25 +2508,47 @@ out_unlock:
|
||||
static void cpuset_cancel_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
struct cpuset *cs;
|
||||
|
||||
cgroup_taskset_first(tset, &css);
|
||||
cs = css_cs(css);
|
||||
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
css_cs(css)->attach_in_progress--;
|
||||
cs->attach_in_progress--;
|
||||
if (!cs->attach_in_progress)
|
||||
wake_up(&cpuset_attach_wq);
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
}
|
||||
|
||||
/*
|
||||
* Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
|
||||
* Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
|
||||
* but we can't allocate it dynamically there. Define it global and
|
||||
* allocate from cpuset_init().
|
||||
*/
|
||||
static cpumask_var_t cpus_attach;
|
||||
static nodemask_t cpuset_attach_nodemask_to;
|
||||
|
||||
static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
|
||||
{
|
||||
percpu_rwsem_assert_held(&cpuset_rwsem);
|
||||
|
||||
if (cs != &top_cpuset)
|
||||
guarantee_online_cpus(task, cpus_attach);
|
||||
else
|
||||
cpumask_andnot(cpus_attach, task_cpu_possible_mask(task),
|
||||
cs->subparts_cpus);
|
||||
/*
|
||||
* can_attach beforehand should guarantee that this doesn't
|
||||
* fail. TODO: have a better way to handle failure here
|
||||
*/
|
||||
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
|
||||
|
||||
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
|
||||
cpuset_update_task_spread_flags(cs, task);
|
||||
}
|
||||
|
||||
static void cpuset_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
/* static buf protected by cpuset_rwsem */
|
||||
static nodemask_t cpuset_attach_nodemask_to;
|
||||
struct task_struct *task;
|
||||
struct task_struct *leader;
|
||||
struct cgroup_subsys_state *css;
|
||||
@ -2543,20 +2579,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
|
||||
|
||||
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
|
||||
|
||||
cgroup_taskset_for_each(task, css, tset) {
|
||||
if (cs != &top_cpuset)
|
||||
guarantee_online_cpus(task, cpus_attach);
|
||||
else
|
||||
cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
|
||||
/*
|
||||
* can_attach beforehand should guarantee that this doesn't
|
||||
* fail. TODO: have a better way to handle failure here
|
||||
*/
|
||||
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
|
||||
|
||||
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
|
||||
cpuset_update_task_spread_flags(cs, task);
|
||||
}
|
||||
cgroup_taskset_for_each(task, css, tset)
|
||||
cpuset_attach_task(cs, task);
|
||||
|
||||
/*
|
||||
* Change mm for all threadgroup leaders. This is expensive and may
|
||||
@ -3247,6 +3271,68 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
}
|
||||
|
||||
/*
|
||||
* In case the child is cloned into a cpuset different from its parent,
|
||||
* additional checks are done to see if the move is allowed.
|
||||
*/
|
||||
static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
|
||||
{
|
||||
struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
|
||||
bool same_cs;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
same_cs = (cs == task_cs(current));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (same_cs)
|
||||
return 0;
|
||||
|
||||
lockdep_assert_held(&cgroup_mutex);
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
|
||||
/* Check to see if task is allowed in the cpuset */
|
||||
ret = cpuset_can_attach_check(cs);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = task_can_attach(task, cs->effective_cpus);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = security_task_setscheduler(task);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Mark attach is in progress. This makes validate_change() fail
|
||||
* changes which zero cpus/mems_allowed.
|
||||
*/
|
||||
cs->attach_in_progress++;
|
||||
out_unlock:
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
|
||||
{
|
||||
struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
|
||||
bool same_cs;
|
||||
|
||||
rcu_read_lock();
|
||||
same_cs = (cs == task_cs(current));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (same_cs)
|
||||
return;
|
||||
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
cs->attach_in_progress--;
|
||||
if (!cs->attach_in_progress)
|
||||
wake_up(&cpuset_attach_wq);
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the new task conform to the current state of its parent,
|
||||
* which could have been changed by cpuset just after it inherits the
|
||||
@ -3254,11 +3340,33 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
|
||||
*/
|
||||
static void cpuset_fork(struct task_struct *task)
|
||||
{
|
||||
if (task_css_is_root(task, cpuset_cgrp_id))
|
||||
struct cpuset *cs;
|
||||
bool same_cs;
|
||||
|
||||
rcu_read_lock();
|
||||
cs = task_cs(task);
|
||||
same_cs = (cs == task_cs(current));
|
||||
rcu_read_unlock();
|
||||
|
||||
if (same_cs) {
|
||||
if (cs == &top_cpuset)
|
||||
return;
|
||||
|
||||
set_cpus_allowed_ptr(task, current->cpus_ptr);
|
||||
task->mems_allowed = current->mems_allowed;
|
||||
return;
|
||||
}
|
||||
|
||||
/* CLONE_INTO_CGROUP */
|
||||
percpu_down_write(&cpuset_rwsem);
|
||||
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
|
||||
cpuset_attach_task(cs, task);
|
||||
|
||||
cs->attach_in_progress--;
|
||||
if (!cs->attach_in_progress)
|
||||
wake_up(&cpuset_attach_wq);
|
||||
|
||||
percpu_up_write(&cpuset_rwsem);
|
||||
}
|
||||
|
||||
struct cgroup_subsys cpuset_cgrp_subsys = {
|
||||
@ -3271,6 +3379,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
|
||||
.attach = cpuset_attach,
|
||||
.post_attach = cpuset_post_attach,
|
||||
.bind = cpuset_bind,
|
||||
.can_fork = cpuset_can_fork,
|
||||
.cancel_fork = cpuset_cancel_fork,
|
||||
.fork = cpuset_fork,
|
||||
.legacy_cftypes = legacy_files,
|
||||
.dfl_cftypes = dfl_files,
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
/*
|
||||
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
|
||||
@ -350,7 +351,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
|
||||
|
||||
if (freeze) {
|
||||
if (!(freezer->state & CGROUP_FREEZING))
|
||||
static_branch_inc(&freezer_active);
|
||||
static_branch_inc_cpuslocked(&freezer_active);
|
||||
freezer->state |= state;
|
||||
freeze_cgroup(freezer);
|
||||
} else {
|
||||
@ -361,7 +362,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
|
||||
if (!(freezer->state & CGROUP_FREEZING)) {
|
||||
freezer->state &= ~CGROUP_FROZEN;
|
||||
if (was_freezing)
|
||||
static_branch_dec(&freezer_active);
|
||||
static_branch_dec_cpuslocked(&freezer_active);
|
||||
unfreeze_cgroup(freezer);
|
||||
}
|
||||
}
|
||||
@ -379,6 +380,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
|
||||
{
|
||||
struct cgroup_subsys_state *pos;
|
||||
|
||||
cpus_read_lock();
|
||||
/*
|
||||
* Update all its descendants in pre-order traversal. Each
|
||||
* descendant will try to inherit its parent's FREEZING state as
|
||||
@ -407,6 +409,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
|
||||
}
|
||||
rcu_read_unlock();
|
||||
mutex_unlock(&freezer_mutex);
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
static ssize_t freezer_write(struct kernfs_open_file *of,
|
||||
|
@ -457,9 +457,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
|
||||
struct task_cputime *cputime = &bstat->cputime;
|
||||
int i;
|
||||
|
||||
cputime->stime = 0;
|
||||
cputime->utime = 0;
|
||||
cputime->sum_exec_runtime = 0;
|
||||
memset(bstat, 0, sizeof(*bstat));
|
||||
for_each_possible_cpu(i) {
|
||||
struct kernel_cpustat kcpustat;
|
||||
u64 *cpustat = kcpustat.cpustat;
|
||||
|
Loading…
x
Reference in New Issue
Block a user