mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-28 00:32:00 +00:00
cgroup: Changes for v6.13
- cpu.stat now also shows niced CPU time. - Freezer and cpuset optimizations. - Other misc changes. -----BEGIN PGP SIGNATURE----- iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCZztlgg4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGbohAQDE/enqpAX9vSOpQPne4ZzgcPlGTrCwBcka3Z5z 4aOF0AD/SmdjcJ/EULisD/2O27ovsGAtqDjngrrZwNUTbCNkTQQ= =pKyo -----END PGP SIGNATURE----- Merge tag 'cgroup-for-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup Pull cgroup updates from Tejun Heo: - cpu.stat now also shows niced CPU time - Freezer and cpuset optimizations - Other misc changes * tag 'cgroup-for-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup/cpuset: Disable cpuset_cpumask_can_shrink() test if not load balancing cgroup/cpuset: Further optimize code if CONFIG_CPUSETS_V1 not set cgroup/cpuset: Enforce at most one rebuild_sched_domains_locked() call per operation cgroup/cpuset: Revert "Allow suppression of sched domain rebuild in update_cpumasks_hier()" MAINTAINERS: remove Zefan Li cgroup/freezer: Add cgroup CGRP_FROZEN flag update helper cgroup/freezer: Reduce redundant traversal for cgroup_freeze cgroup/bpf: only cgroup v2 can be attached by bpf programs Revert "cgroup: Fix memory leak caused by missing cgroup_bpf_offline" selftests/cgroup: Fix compile error in test_cpu.c cgroup/rstat: Selftests for niced CPU statistics cgroup/rstat: Tracking cgroup-level niced CPU time cgroup/cpuset: Fix spelling errors in file kernel/cgroup/cpuset.c
This commit is contained in:
commit
7586d52765
3
CREDITS
3
CREDITS
@ -579,6 +579,9 @@ N: Zach Brown
|
|||||||
E: zab@zabbo.net
|
E: zab@zabbo.net
|
||||||
D: maestro pci sound
|
D: maestro pci sound
|
||||||
|
|
||||||
|
N: Zefan Li
|
||||||
|
D: Contribution to control group stuff
|
||||||
|
|
||||||
N: David Brownell
|
N: David Brownell
|
||||||
D: Kernel engineer, mentor, and friend. Maintained USB EHCI and
|
D: Kernel engineer, mentor, and friend. Maintained USB EHCI and
|
||||||
D: gadget layers, SPI subsystem, GPIO subsystem, and more than a few
|
D: gadget layers, SPI subsystem, GPIO subsystem, and more than a few
|
||||||
|
@ -5756,7 +5756,6 @@ F: kernel/context_tracking.c
|
|||||||
|
|
||||||
CONTROL GROUP (CGROUP)
|
CONTROL GROUP (CGROUP)
|
||||||
M: Tejun Heo <tj@kernel.org>
|
M: Tejun Heo <tj@kernel.org>
|
||||||
M: Zefan Li <lizefan.x@bytedance.com>
|
|
||||||
M: Johannes Weiner <hannes@cmpxchg.org>
|
M: Johannes Weiner <hannes@cmpxchg.org>
|
||||||
M: Michal Koutný <mkoutny@suse.com>
|
M: Michal Koutný <mkoutny@suse.com>
|
||||||
L: cgroups@vger.kernel.org
|
L: cgroups@vger.kernel.org
|
||||||
@ -5785,7 +5784,6 @@ F: include/linux/blk-cgroup.h
|
|||||||
|
|
||||||
CONTROL GROUP - CPUSET
|
CONTROL GROUP - CPUSET
|
||||||
M: Waiman Long <longman@redhat.com>
|
M: Waiman Long <longman@redhat.com>
|
||||||
M: Zefan Li <lizefan.x@bytedance.com>
|
|
||||||
L: cgroups@vger.kernel.org
|
L: cgroups@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
|
||||||
|
@ -327,6 +327,7 @@ struct cgroup_base_stat {
|
|||||||
#ifdef CONFIG_SCHED_CORE
|
#ifdef CONFIG_SCHED_CORE
|
||||||
u64 forceidle_sum;
|
u64 forceidle_sum;
|
||||||
#endif
|
#endif
|
||||||
|
u64 ntime;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -397,7 +398,7 @@ struct cgroup_freezer_state {
|
|||||||
bool freeze;
|
bool freeze;
|
||||||
|
|
||||||
/* Should the cgroup actually be frozen? */
|
/* Should the cgroup actually be frozen? */
|
||||||
int e_freeze;
|
bool e_freeze;
|
||||||
|
|
||||||
/* Fields below are protected by css_set_lock */
|
/* Fields below are protected by css_set_lock */
|
||||||
|
|
||||||
|
@ -2140,8 +2140,10 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto exit_stats;
|
goto exit_stats;
|
||||||
|
|
||||||
ret = cgroup_bpf_inherit(root_cgrp);
|
if (root == &cgrp_dfl_root) {
|
||||||
WARN_ON_ONCE(ret);
|
ret = cgroup_bpf_inherit(root_cgrp);
|
||||||
|
WARN_ON_ONCE(ret);
|
||||||
|
}
|
||||||
|
|
||||||
trace_cgroup_setup_root(root);
|
trace_cgroup_setup_root(root);
|
||||||
|
|
||||||
@ -2314,10 +2316,8 @@ static void cgroup_kill_sb(struct super_block *sb)
|
|||||||
* And don't kill the default root.
|
* And don't kill the default root.
|
||||||
*/
|
*/
|
||||||
if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
|
if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
|
||||||
!percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
|
!percpu_ref_is_dying(&root->cgrp.self.refcnt))
|
||||||
cgroup_bpf_offline(&root->cgrp);
|
|
||||||
percpu_ref_kill(&root->cgrp.self.refcnt);
|
percpu_ref_kill(&root->cgrp.self.refcnt);
|
||||||
}
|
|
||||||
cgroup_put(&root->cgrp);
|
cgroup_put(&root->cgrp);
|
||||||
kernfs_kill_sb(sb);
|
kernfs_kill_sb(sb);
|
||||||
}
|
}
|
||||||
@ -5710,9 +5710,11 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto out_kernfs_remove;
|
goto out_kernfs_remove;
|
||||||
|
|
||||||
ret = cgroup_bpf_inherit(cgrp);
|
if (cgrp->root == &cgrp_dfl_root) {
|
||||||
if (ret)
|
ret = cgroup_bpf_inherit(cgrp);
|
||||||
goto out_psi_free;
|
if (ret)
|
||||||
|
goto out_psi_free;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* New cgroup inherits effective freeze counter, and
|
* New cgroup inherits effective freeze counter, and
|
||||||
@ -6026,7 +6028,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
|
|||||||
|
|
||||||
cgroup1_check_for_release(parent);
|
cgroup1_check_for_release(parent);
|
||||||
|
|
||||||
cgroup_bpf_offline(cgrp);
|
if (cgrp->root == &cgrp_dfl_root)
|
||||||
|
cgroup_bpf_offline(cgrp);
|
||||||
|
|
||||||
/* put the base reference */
|
/* put the base reference */
|
||||||
percpu_ref_kill(&cgrp->self.refcnt);
|
percpu_ref_kill(&cgrp->self.refcnt);
|
||||||
|
@ -84,9 +84,19 @@ static bool have_boot_isolcpus;
|
|||||||
static struct list_head remote_children;
|
static struct list_head remote_children;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A flag to force sched domain rebuild at the end of an operation while
|
* A flag to force sched domain rebuild at the end of an operation.
|
||||||
* inhibiting it in the intermediate stages when set. Currently it is only
|
* It can be set in
|
||||||
* set in hotplug code.
|
* - update_partition_sd_lb()
|
||||||
|
* - remote_partition_check()
|
||||||
|
* - update_cpumasks_hier()
|
||||||
|
* - cpuset_update_flag()
|
||||||
|
* - cpuset_hotplug_update_tasks()
|
||||||
|
* - cpuset_handle_hotplug()
|
||||||
|
*
|
||||||
|
* Protected by cpuset_mutex (with cpus_read_lock held) or cpus_write_lock.
|
||||||
|
*
|
||||||
|
* Note that update_relax_domain_level() in cpuset-v1.c can still call
|
||||||
|
* rebuild_sched_domains_locked() directly without using this flag.
|
||||||
*/
|
*/
|
||||||
static bool force_sd_rebuild;
|
static bool force_sd_rebuild;
|
||||||
|
|
||||||
@ -283,6 +293,12 @@ static inline void dec_attach_in_progress(struct cpuset *cs)
|
|||||||
mutex_unlock(&cpuset_mutex);
|
mutex_unlock(&cpuset_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool cpuset_v2(void)
|
||||||
|
{
|
||||||
|
return !IS_ENABLED(CONFIG_CPUSETS_V1) ||
|
||||||
|
cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cgroup v2 behavior is used on the "cpus" and "mems" control files when
|
* Cgroup v2 behavior is used on the "cpus" and "mems" control files when
|
||||||
* on default hierarchy or when the cpuset_v2_mode flag is set by mounting
|
* on default hierarchy or when the cpuset_v2_mode flag is set by mounting
|
||||||
@ -293,7 +309,7 @@ static inline void dec_attach_in_progress(struct cpuset *cs)
|
|||||||
*/
|
*/
|
||||||
static inline bool is_in_v2_mode(void)
|
static inline bool is_in_v2_mode(void)
|
||||||
{
|
{
|
||||||
return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
|
return cpuset_v2() ||
|
||||||
(cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
|
(cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -565,12 +581,24 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* We can't shrink if we won't have enough room for SCHED_DEADLINE
|
* We can't shrink if we won't have enough room for SCHED_DEADLINE
|
||||||
* tasks.
|
* tasks. This check is not done when scheduling is disabled as the
|
||||||
|
* users should know what they are doing.
|
||||||
|
*
|
||||||
|
* For v1, effective_cpus == cpus_allowed & user_xcpus() returns
|
||||||
|
* cpus_allowed.
|
||||||
|
*
|
||||||
|
* For v2, is_cpu_exclusive() & is_sched_load_balance() are true only
|
||||||
|
* for non-isolated partition root. At this point, the target
|
||||||
|
* effective_cpus isn't computed yet. user_xcpus() is the best
|
||||||
|
* approximation.
|
||||||
|
*
|
||||||
|
* TBD: May need to precompute the real effective_cpus here in case
|
||||||
|
* incorrect scheduling of SCHED_DEADLINE tasks in a partition
|
||||||
|
* becomes an issue.
|
||||||
*/
|
*/
|
||||||
ret = -EBUSY;
|
ret = -EBUSY;
|
||||||
if (is_cpu_exclusive(cur) &&
|
if (is_cpu_exclusive(cur) && is_sched_load_balance(cur) &&
|
||||||
!cpuset_cpumask_can_shrink(cur->cpus_allowed,
|
!cpuset_cpumask_can_shrink(cur->effective_cpus, user_xcpus(trial)))
|
||||||
trial->cpus_allowed))
|
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -728,7 +756,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
|
|||||||
int nslot; /* next empty doms[] struct cpumask slot */
|
int nslot; /* next empty doms[] struct cpumask slot */
|
||||||
struct cgroup_subsys_state *pos_css;
|
struct cgroup_subsys_state *pos_css;
|
||||||
bool root_load_balance = is_sched_load_balance(&top_cpuset);
|
bool root_load_balance = is_sched_load_balance(&top_cpuset);
|
||||||
bool cgrpv2 = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
|
bool cgrpv2 = cpuset_v2();
|
||||||
int nslot_update;
|
int nslot_update;
|
||||||
|
|
||||||
doms = NULL;
|
doms = NULL;
|
||||||
@ -990,6 +1018,7 @@ void rebuild_sched_domains_locked(void)
|
|||||||
|
|
||||||
lockdep_assert_cpus_held();
|
lockdep_assert_cpus_held();
|
||||||
lockdep_assert_held(&cpuset_mutex);
|
lockdep_assert_held(&cpuset_mutex);
|
||||||
|
force_sd_rebuild = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we have raced with CPU hotplug, return early to avoid
|
* If we have raced with CPU hotplug, return early to avoid
|
||||||
@ -1164,8 +1193,8 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
|
|||||||
clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
|
clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rebuild_domains && !force_sd_rebuild)
|
if (rebuild_domains)
|
||||||
rebuild_sched_domains_locked();
|
cpuset_force_rebuild();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1187,7 +1216,7 @@ static void reset_partition_data(struct cpuset *cs)
|
|||||||
{
|
{
|
||||||
struct cpuset *parent = parent_cs(cs);
|
struct cpuset *parent = parent_cs(cs);
|
||||||
|
|
||||||
if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
|
if (!cpuset_v2())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
lockdep_assert_held(&callback_lock);
|
lockdep_assert_held(&callback_lock);
|
||||||
@ -1339,7 +1368,7 @@ static inline bool is_local_partition(struct cpuset *cs)
|
|||||||
* remote_partition_enable - Enable current cpuset as a remote partition root
|
* remote_partition_enable - Enable current cpuset as a remote partition root
|
||||||
* @cs: the cpuset to update
|
* @cs: the cpuset to update
|
||||||
* @new_prs: new partition_root_state
|
* @new_prs: new partition_root_state
|
||||||
* @tmp: temparary masks
|
* @tmp: temporary masks
|
||||||
* Return: 0 if successful, errcode if error
|
* Return: 0 if successful, errcode if error
|
||||||
*
|
*
|
||||||
* Enable the current cpuset to become a remote partition root taking CPUs
|
* Enable the current cpuset to become a remote partition root taking CPUs
|
||||||
@ -1377,7 +1406,7 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
|
|||||||
update_unbound_workqueue_cpumask(isolcpus_updated);
|
update_unbound_workqueue_cpumask(isolcpus_updated);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Proprogate changes in top_cpuset's effective_cpus down the hierarchy.
|
* Propagate changes in top_cpuset's effective_cpus down the hierarchy.
|
||||||
*/
|
*/
|
||||||
cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus);
|
cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus);
|
||||||
update_sibling_cpumasks(&top_cpuset, NULL, tmp);
|
update_sibling_cpumasks(&top_cpuset, NULL, tmp);
|
||||||
@ -1387,7 +1416,7 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
|
|||||||
/*
|
/*
|
||||||
* remote_partition_disable - Remove current cpuset from remote partition list
|
* remote_partition_disable - Remove current cpuset from remote partition list
|
||||||
* @cs: the cpuset to update
|
* @cs: the cpuset to update
|
||||||
* @tmp: temparary masks
|
* @tmp: temporary masks
|
||||||
*
|
*
|
||||||
* The effective_cpus is also updated.
|
* The effective_cpus is also updated.
|
||||||
*
|
*
|
||||||
@ -1413,7 +1442,7 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
|
|||||||
update_unbound_workqueue_cpumask(isolcpus_updated);
|
update_unbound_workqueue_cpumask(isolcpus_updated);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Proprogate changes in top_cpuset's effective_cpus down the hierarchy.
|
* Propagate changes in top_cpuset's effective_cpus down the hierarchy.
|
||||||
*/
|
*/
|
||||||
cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus);
|
cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus);
|
||||||
update_sibling_cpumasks(&top_cpuset, NULL, tmp);
|
update_sibling_cpumasks(&top_cpuset, NULL, tmp);
|
||||||
@ -1423,7 +1452,7 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
|
|||||||
* remote_cpus_update - cpus_exclusive change of remote partition
|
* remote_cpus_update - cpus_exclusive change of remote partition
|
||||||
* @cs: the cpuset to be updated
|
* @cs: the cpuset to be updated
|
||||||
* @newmask: the new effective_xcpus mask
|
* @newmask: the new effective_xcpus mask
|
||||||
* @tmp: temparary masks
|
* @tmp: temporary masks
|
||||||
*
|
*
|
||||||
* top_cpuset and subpartitions_cpus will be updated or partition can be
|
* top_cpuset and subpartitions_cpus will be updated or partition can be
|
||||||
* invalidated.
|
* invalidated.
|
||||||
@ -1465,7 +1494,7 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
|
|||||||
update_unbound_workqueue_cpumask(isolcpus_updated);
|
update_unbound_workqueue_cpumask(isolcpus_updated);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Proprogate changes in top_cpuset's effective_cpus down the hierarchy.
|
* Propagate changes in top_cpuset's effective_cpus down the hierarchy.
|
||||||
*/
|
*/
|
||||||
cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus);
|
cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus);
|
||||||
update_sibling_cpumasks(&top_cpuset, NULL, tmp);
|
update_sibling_cpumasks(&top_cpuset, NULL, tmp);
|
||||||
@ -1480,7 +1509,7 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
|
|||||||
* @cs: the cpuset to be updated
|
* @cs: the cpuset to be updated
|
||||||
* @newmask: the new effective_xcpus mask
|
* @newmask: the new effective_xcpus mask
|
||||||
* @delmask: temporary mask for deletion (not in tmp)
|
* @delmask: temporary mask for deletion (not in tmp)
|
||||||
* @tmp: temparary masks
|
* @tmp: temporary masks
|
||||||
*
|
*
|
||||||
* This should be called before the given cs has updated its cpus_allowed
|
* This should be called before the given cs has updated its cpus_allowed
|
||||||
* and/or effective_xcpus.
|
* and/or effective_xcpus.
|
||||||
@ -1512,8 +1541,8 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask,
|
|||||||
remote_partition_disable(child, tmp);
|
remote_partition_disable(child, tmp);
|
||||||
disable_cnt++;
|
disable_cnt++;
|
||||||
}
|
}
|
||||||
if (disable_cnt && !force_sd_rebuild)
|
if (disable_cnt)
|
||||||
rebuild_sched_domains_locked();
|
cpuset_force_rebuild();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1922,12 +1951,6 @@ static void compute_partition_effective_cpumask(struct cpuset *cs,
|
|||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* update_cpumasks_hier() flags
|
|
||||||
*/
|
|
||||||
#define HIER_CHECKALL 0x01 /* Check all cpusets with no skipping */
|
|
||||||
#define HIER_NO_SD_REBUILD 0x02 /* Don't rebuild sched domains */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
|
* update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
|
||||||
* @cs: the cpuset to consider
|
* @cs: the cpuset to consider
|
||||||
@ -1942,7 +1965,7 @@ static void compute_partition_effective_cpumask(struct cpuset *cs,
|
|||||||
* Called with cpuset_mutex held
|
* Called with cpuset_mutex held
|
||||||
*/
|
*/
|
||||||
static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
|
static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
|
||||||
int flags)
|
bool force)
|
||||||
{
|
{
|
||||||
struct cpuset *cp;
|
struct cpuset *cp;
|
||||||
struct cgroup_subsys_state *pos_css;
|
struct cgroup_subsys_state *pos_css;
|
||||||
@ -2007,12 +2030,12 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
|
|||||||
* Skip the whole subtree if
|
* Skip the whole subtree if
|
||||||
* 1) the cpumask remains the same,
|
* 1) the cpumask remains the same,
|
||||||
* 2) has no partition root state,
|
* 2) has no partition root state,
|
||||||
* 3) HIER_CHECKALL flag not set, and
|
* 3) force flag not set, and
|
||||||
* 4) for v2 load balance state same as its parent.
|
* 4) for v2 load balance state same as its parent.
|
||||||
*/
|
*/
|
||||||
if (!cp->partition_root_state && !(flags & HIER_CHECKALL) &&
|
if (!cp->partition_root_state && !force &&
|
||||||
cpumask_equal(tmp->new_cpus, cp->effective_cpus) &&
|
cpumask_equal(tmp->new_cpus, cp->effective_cpus) &&
|
||||||
(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
|
(!cpuset_v2() ||
|
||||||
(is_sched_load_balance(parent) == is_sched_load_balance(cp)))) {
|
(is_sched_load_balance(parent) == is_sched_load_balance(cp)))) {
|
||||||
pos_css = css_rightmost_descendant(pos_css);
|
pos_css = css_rightmost_descendant(pos_css);
|
||||||
continue;
|
continue;
|
||||||
@ -2086,8 +2109,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
|
|||||||
* from parent if current cpuset isn't a valid partition root
|
* from parent if current cpuset isn't a valid partition root
|
||||||
* and their load balance states differ.
|
* and their load balance states differ.
|
||||||
*/
|
*/
|
||||||
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
if (cpuset_v2() && !is_partition_valid(cp) &&
|
||||||
!is_partition_valid(cp) &&
|
|
||||||
(is_sched_load_balance(parent) != is_sched_load_balance(cp))) {
|
(is_sched_load_balance(parent) != is_sched_load_balance(cp))) {
|
||||||
if (is_sched_load_balance(parent))
|
if (is_sched_load_balance(parent))
|
||||||
set_bit(CS_SCHED_LOAD_BALANCE, &cp->flags);
|
set_bit(CS_SCHED_LOAD_BALANCE, &cp->flags);
|
||||||
@ -2103,8 +2125,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
|
|||||||
*/
|
*/
|
||||||
if (!cpumask_empty(cp->cpus_allowed) &&
|
if (!cpumask_empty(cp->cpus_allowed) &&
|
||||||
is_sched_load_balance(cp) &&
|
is_sched_load_balance(cp) &&
|
||||||
(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
|
(!cpuset_v2() || is_partition_valid(cp)))
|
||||||
is_partition_valid(cp)))
|
|
||||||
need_rebuild_sched_domains = true;
|
need_rebuild_sched_domains = true;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
@ -2112,9 +2133,8 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
|
|||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD) &&
|
if (need_rebuild_sched_domains)
|
||||||
!force_sd_rebuild)
|
cpuset_force_rebuild();
|
||||||
rebuild_sched_domains_locked();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2141,9 +2161,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
|
|||||||
* directly.
|
* directly.
|
||||||
*
|
*
|
||||||
* The update_cpumasks_hier() function may sleep. So we have to
|
* The update_cpumasks_hier() function may sleep. So we have to
|
||||||
* release the RCU read lock before calling it. HIER_NO_SD_REBUILD
|
* release the RCU read lock before calling it.
|
||||||
* flag is used to suppress rebuild of sched domains as the callers
|
|
||||||
* will take care of that.
|
|
||||||
*/
|
*/
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
cpuset_for_each_child(sibling, pos_css, parent) {
|
cpuset_for_each_child(sibling, pos_css, parent) {
|
||||||
@ -2159,7 +2177,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
update_cpumasks_hier(sibling, tmp, HIER_NO_SD_REBUILD);
|
update_cpumasks_hier(sibling, tmp, false);
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
css_put(&sibling->css);
|
css_put(&sibling->css);
|
||||||
}
|
}
|
||||||
@ -2179,7 +2197,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
|||||||
struct tmpmasks tmp;
|
struct tmpmasks tmp;
|
||||||
struct cpuset *parent = parent_cs(cs);
|
struct cpuset *parent = parent_cs(cs);
|
||||||
bool invalidate = false;
|
bool invalidate = false;
|
||||||
int hier_flags = 0;
|
bool force = false;
|
||||||
int old_prs = cs->partition_root_state;
|
int old_prs = cs->partition_root_state;
|
||||||
|
|
||||||
/* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
|
/* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
|
||||||
@ -2206,7 +2224,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When exclusive_cpus isn't explicitly set, it is constrainted
|
* When exclusive_cpus isn't explicitly set, it is constrained
|
||||||
* by cpus_allowed and parent's effective_xcpus. Otherwise,
|
* by cpus_allowed and parent's effective_xcpus. Otherwise,
|
||||||
* trialcs->effective_xcpus is used as a temporary cpumask
|
* trialcs->effective_xcpus is used as a temporary cpumask
|
||||||
* for checking validity of the partition root.
|
* for checking validity of the partition root.
|
||||||
@ -2240,12 +2258,11 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
|||||||
* Check all the descendants in update_cpumasks_hier() if
|
* Check all the descendants in update_cpumasks_hier() if
|
||||||
* effective_xcpus is to be changed.
|
* effective_xcpus is to be changed.
|
||||||
*/
|
*/
|
||||||
if (!cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus))
|
force = !cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus);
|
||||||
hier_flags = HIER_CHECKALL;
|
|
||||||
|
|
||||||
retval = validate_change(cs, trialcs);
|
retval = validate_change(cs, trialcs);
|
||||||
|
|
||||||
if ((retval == -EINVAL) && cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
|
if ((retval == -EINVAL) && cpuset_v2()) {
|
||||||
struct cgroup_subsys_state *css;
|
struct cgroup_subsys_state *css;
|
||||||
struct cpuset *cp;
|
struct cpuset *cp;
|
||||||
|
|
||||||
@ -2309,7 +2326,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
|||||||
spin_unlock_irq(&callback_lock);
|
spin_unlock_irq(&callback_lock);
|
||||||
|
|
||||||
/* effective_cpus/effective_xcpus will be updated here */
|
/* effective_cpus/effective_xcpus will be updated here */
|
||||||
update_cpumasks_hier(cs, &tmp, hier_flags);
|
update_cpumasks_hier(cs, &tmp, force);
|
||||||
|
|
||||||
/* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */
|
/* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */
|
||||||
if (cs->partition_root_state)
|
if (cs->partition_root_state)
|
||||||
@ -2334,7 +2351,7 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
|||||||
struct tmpmasks tmp;
|
struct tmpmasks tmp;
|
||||||
struct cpuset *parent = parent_cs(cs);
|
struct cpuset *parent = parent_cs(cs);
|
||||||
bool invalidate = false;
|
bool invalidate = false;
|
||||||
int hier_flags = 0;
|
bool force = false;
|
||||||
int old_prs = cs->partition_root_state;
|
int old_prs = cs->partition_root_state;
|
||||||
|
|
||||||
if (!*buf) {
|
if (!*buf) {
|
||||||
@ -2357,8 +2374,7 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
|||||||
* Check all the descendants in update_cpumasks_hier() if
|
* Check all the descendants in update_cpumasks_hier() if
|
||||||
* effective_xcpus is to be changed.
|
* effective_xcpus is to be changed.
|
||||||
*/
|
*/
|
||||||
if (!cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus))
|
force = !cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus);
|
||||||
hier_flags = HIER_CHECKALL;
|
|
||||||
|
|
||||||
retval = validate_change(cs, trialcs);
|
retval = validate_change(cs, trialcs);
|
||||||
if (retval)
|
if (retval)
|
||||||
@ -2411,8 +2427,8 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
|||||||
* of the subtree when it is a valid partition root or effective_xcpus
|
* of the subtree when it is a valid partition root or effective_xcpus
|
||||||
* is updated.
|
* is updated.
|
||||||
*/
|
*/
|
||||||
if (is_partition_valid(cs) || hier_flags)
|
if (is_partition_valid(cs) || force)
|
||||||
update_cpumasks_hier(cs, &tmp, hier_flags);
|
update_cpumasks_hier(cs, &tmp, force);
|
||||||
|
|
||||||
/* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */
|
/* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */
|
||||||
if (cs->partition_root_state)
|
if (cs->partition_root_state)
|
||||||
@ -2737,9 +2753,12 @@ int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
|
|||||||
cs->flags = trialcs->flags;
|
cs->flags = trialcs->flags;
|
||||||
spin_unlock_irq(&callback_lock);
|
spin_unlock_irq(&callback_lock);
|
||||||
|
|
||||||
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed &&
|
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) {
|
||||||
!force_sd_rebuild)
|
if (cpuset_v2())
|
||||||
rebuild_sched_domains_locked();
|
cpuset_force_rebuild();
|
||||||
|
else
|
||||||
|
rebuild_sched_domains_locked();
|
||||||
|
}
|
||||||
|
|
||||||
if (spread_flag_changed)
|
if (spread_flag_changed)
|
||||||
cpuset1_update_tasks_flags(cs);
|
cpuset1_update_tasks_flags(cs);
|
||||||
@ -2853,12 +2872,14 @@ static int update_prstate(struct cpuset *cs, int new_prs)
|
|||||||
update_unbound_workqueue_cpumask(new_xcpus_state);
|
update_unbound_workqueue_cpumask(new_xcpus_state);
|
||||||
|
|
||||||
/* Force update if switching back to member */
|
/* Force update if switching back to member */
|
||||||
update_cpumasks_hier(cs, &tmpmask, !new_prs ? HIER_CHECKALL : 0);
|
update_cpumasks_hier(cs, &tmpmask, !new_prs);
|
||||||
|
|
||||||
/* Update sched domains and load balance flag */
|
/* Update sched domains and load balance flag */
|
||||||
update_partition_sd_lb(cs, old_prs);
|
update_partition_sd_lb(cs, old_prs);
|
||||||
|
|
||||||
notify_partition_change(cs, old_prs);
|
notify_partition_change(cs, old_prs);
|
||||||
|
if (force_sd_rebuild)
|
||||||
|
rebuild_sched_domains_locked();
|
||||||
free_cpumasks(NULL, &tmpmask);
|
free_cpumasks(NULL, &tmpmask);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -2919,8 +2940,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
|
|||||||
* migration permission derives from hierarchy ownership in
|
* migration permission derives from hierarchy ownership in
|
||||||
* cgroup_procs_write_permission()).
|
* cgroup_procs_write_permission()).
|
||||||
*/
|
*/
|
||||||
if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
|
if (!cpuset_v2() || (cpus_updated || mems_updated)) {
|
||||||
(cpus_updated || mems_updated)) {
|
|
||||||
ret = security_task_setscheduler(task);
|
ret = security_task_setscheduler(task);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
@ -3034,8 +3054,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
|
|||||||
* in effective cpus and mems. In that case, we can optimize out
|
* in effective cpus and mems. In that case, we can optimize out
|
||||||
* by skipping the task iteration and update.
|
* by skipping the task iteration and update.
|
||||||
*/
|
*/
|
||||||
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
if (cpuset_v2() && !cpus_updated && !mems_updated) {
|
||||||
!cpus_updated && !mems_updated) {
|
|
||||||
cpuset_attach_nodemask_to = cs->effective_mems;
|
cpuset_attach_nodemask_to = cs->effective_mems;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -3152,6 +3171,8 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
|
|||||||
}
|
}
|
||||||
|
|
||||||
free_cpuset(trialcs);
|
free_cpuset(trialcs);
|
||||||
|
if (force_sd_rebuild)
|
||||||
|
rebuild_sched_domains_locked();
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mutex_unlock(&cpuset_mutex);
|
mutex_unlock(&cpuset_mutex);
|
||||||
cpus_read_unlock();
|
cpus_read_unlock();
|
||||||
@ -3383,7 +3404,7 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
|
|||||||
INIT_LIST_HEAD(&cs->remote_sibling);
|
INIT_LIST_HEAD(&cs->remote_sibling);
|
||||||
|
|
||||||
/* Set CS_MEMORY_MIGRATE for default hierarchy */
|
/* Set CS_MEMORY_MIGRATE for default hierarchy */
|
||||||
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
|
if (cpuset_v2())
|
||||||
__set_bit(CS_MEMORY_MIGRATE, &cs->flags);
|
__set_bit(CS_MEMORY_MIGRATE, &cs->flags);
|
||||||
|
|
||||||
return &cs->css;
|
return &cs->css;
|
||||||
@ -3410,8 +3431,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
|
|||||||
/*
|
/*
|
||||||
* For v2, clear CS_SCHED_LOAD_BALANCE if parent is isolated
|
* For v2, clear CS_SCHED_LOAD_BALANCE if parent is isolated
|
||||||
*/
|
*/
|
||||||
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
if (cpuset_v2() && !is_sched_load_balance(parent))
|
||||||
!is_sched_load_balance(parent))
|
|
||||||
clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
|
clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
|
||||||
|
|
||||||
cpuset_inc();
|
cpuset_inc();
|
||||||
@ -3481,8 +3501,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
|
|||||||
if (is_partition_valid(cs))
|
if (is_partition_valid(cs))
|
||||||
update_prstate(cs, 0);
|
update_prstate(cs, 0);
|
||||||
|
|
||||||
if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
if (!cpuset_v2() && is_sched_load_balance(cs))
|
||||||
is_sched_load_balance(cs))
|
|
||||||
cpuset_update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
|
cpuset_update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
|
||||||
|
|
||||||
cpuset_dec();
|
cpuset_dec();
|
||||||
@ -3896,11 +3915,9 @@ static void cpuset_handle_hotplug(void)
|
|||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* rebuild sched domains if cpus_allowed has changed */
|
/* rebuild sched domains if necessary */
|
||||||
if (force_sd_rebuild) {
|
if (force_sd_rebuild)
|
||||||
force_sd_rebuild = false;
|
|
||||||
rebuild_sched_domains_cpuslocked();
|
rebuild_sched_domains_cpuslocked();
|
||||||
}
|
|
||||||
|
|
||||||
free_cpumasks(NULL, ptmp);
|
free_cpumasks(NULL, ptmp);
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,28 @@
|
|||||||
|
|
||||||
#include <trace/events/cgroup.h>
|
#include <trace/events/cgroup.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update CGRP_FROZEN of cgroup.flag
|
||||||
|
* Return true if flags is updated; false if flags has no change
|
||||||
|
*/
|
||||||
|
static bool cgroup_update_frozen_flag(struct cgroup *cgrp, bool frozen)
|
||||||
|
{
|
||||||
|
lockdep_assert_held(&css_set_lock);
|
||||||
|
|
||||||
|
/* Already there? */
|
||||||
|
if (test_bit(CGRP_FROZEN, &cgrp->flags) == frozen)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (frozen)
|
||||||
|
set_bit(CGRP_FROZEN, &cgrp->flags);
|
||||||
|
else
|
||||||
|
clear_bit(CGRP_FROZEN, &cgrp->flags);
|
||||||
|
|
||||||
|
cgroup_file_notify(&cgrp->events_file);
|
||||||
|
TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Propagate the cgroup frozen state upwards by the cgroup tree.
|
* Propagate the cgroup frozen state upwards by the cgroup tree.
|
||||||
*/
|
*/
|
||||||
@ -24,24 +46,16 @@ static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
|
|||||||
while ((cgrp = cgroup_parent(cgrp))) {
|
while ((cgrp = cgroup_parent(cgrp))) {
|
||||||
if (frozen) {
|
if (frozen) {
|
||||||
cgrp->freezer.nr_frozen_descendants += desc;
|
cgrp->freezer.nr_frozen_descendants += desc;
|
||||||
if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
|
if (!test_bit(CGRP_FREEZE, &cgrp->flags) ||
|
||||||
test_bit(CGRP_FREEZE, &cgrp->flags) &&
|
(cgrp->freezer.nr_frozen_descendants !=
|
||||||
cgrp->freezer.nr_frozen_descendants ==
|
cgrp->nr_descendants))
|
||||||
cgrp->nr_descendants) {
|
continue;
|
||||||
set_bit(CGRP_FROZEN, &cgrp->flags);
|
|
||||||
cgroup_file_notify(&cgrp->events_file);
|
|
||||||
TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
|
|
||||||
desc++;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
cgrp->freezer.nr_frozen_descendants -= desc;
|
cgrp->freezer.nr_frozen_descendants -= desc;
|
||||||
if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
|
|
||||||
clear_bit(CGRP_FROZEN, &cgrp->flags);
|
|
||||||
cgroup_file_notify(&cgrp->events_file);
|
|
||||||
TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
|
|
||||||
desc++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cgroup_update_frozen_flag(cgrp, frozen))
|
||||||
|
desc++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,8 +67,6 @@ void cgroup_update_frozen(struct cgroup *cgrp)
|
|||||||
{
|
{
|
||||||
bool frozen;
|
bool frozen;
|
||||||
|
|
||||||
lockdep_assert_held(&css_set_lock);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the cgroup has to be frozen (CGRP_FREEZE bit set),
|
* If the cgroup has to be frozen (CGRP_FREEZE bit set),
|
||||||
* and all tasks are frozen and/or stopped, let's consider
|
* and all tasks are frozen and/or stopped, let's consider
|
||||||
@ -63,24 +75,9 @@ void cgroup_update_frozen(struct cgroup *cgrp)
|
|||||||
frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
|
frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
|
||||||
cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
|
cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
|
||||||
|
|
||||||
if (frozen) {
|
/* If flags is updated, update the state of ancestor cgroups. */
|
||||||
/* Already there? */
|
if (cgroup_update_frozen_flag(cgrp, frozen))
|
||||||
if (test_bit(CGRP_FROZEN, &cgrp->flags))
|
cgroup_propagate_frozen(cgrp, frozen);
|
||||||
return;
|
|
||||||
|
|
||||||
set_bit(CGRP_FROZEN, &cgrp->flags);
|
|
||||||
} else {
|
|
||||||
/* Already there? */
|
|
||||||
if (!test_bit(CGRP_FROZEN, &cgrp->flags))
|
|
||||||
return;
|
|
||||||
|
|
||||||
clear_bit(CGRP_FROZEN, &cgrp->flags);
|
|
||||||
}
|
|
||||||
cgroup_file_notify(&cgrp->events_file);
|
|
||||||
TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
|
|
||||||
|
|
||||||
/* Update the state of ancestor cgroups. */
|
|
||||||
cgroup_propagate_frozen(cgrp, frozen);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -260,8 +257,10 @@ void cgroup_freezer_migrate_task(struct task_struct *task,
|
|||||||
void cgroup_freeze(struct cgroup *cgrp, bool freeze)
|
void cgroup_freeze(struct cgroup *cgrp, bool freeze)
|
||||||
{
|
{
|
||||||
struct cgroup_subsys_state *css;
|
struct cgroup_subsys_state *css;
|
||||||
|
struct cgroup *parent;
|
||||||
struct cgroup *dsct;
|
struct cgroup *dsct;
|
||||||
bool applied = false;
|
bool applied = false;
|
||||||
|
bool old_e;
|
||||||
|
|
||||||
lockdep_assert_held(&cgroup_mutex);
|
lockdep_assert_held(&cgroup_mutex);
|
||||||
|
|
||||||
@ -282,22 +281,18 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze)
|
|||||||
if (cgroup_is_dead(dsct))
|
if (cgroup_is_dead(dsct))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (freeze) {
|
/*
|
||||||
dsct->freezer.e_freeze++;
|
* e_freeze is affected by parent's e_freeze and dst's freeze.
|
||||||
/*
|
* If old e_freeze eq new e_freeze, no change, its children
|
||||||
* Already frozen because of ancestor's settings?
|
* will not be affected. So do nothing and skip the subtree
|
||||||
*/
|
*/
|
||||||
if (dsct->freezer.e_freeze > 1)
|
old_e = dsct->freezer.e_freeze;
|
||||||
continue;
|
parent = cgroup_parent(dsct);
|
||||||
} else {
|
dsct->freezer.e_freeze = (dsct->freezer.freeze ||
|
||||||
dsct->freezer.e_freeze--;
|
parent->freezer.e_freeze);
|
||||||
/*
|
if (dsct->freezer.e_freeze == old_e) {
|
||||||
* Still frozen because of ancestor's settings?
|
css = css_rightmost_descendant(css);
|
||||||
*/
|
continue;
|
||||||
if (dsct->freezer.e_freeze > 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -444,6 +444,7 @@ static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
|
|||||||
#ifdef CONFIG_SCHED_CORE
|
#ifdef CONFIG_SCHED_CORE
|
||||||
dst_bstat->forceidle_sum += src_bstat->forceidle_sum;
|
dst_bstat->forceidle_sum += src_bstat->forceidle_sum;
|
||||||
#endif
|
#endif
|
||||||
|
dst_bstat->ntime += src_bstat->ntime;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
|
static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
|
||||||
@ -455,6 +456,7 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
|
|||||||
#ifdef CONFIG_SCHED_CORE
|
#ifdef CONFIG_SCHED_CORE
|
||||||
dst_bstat->forceidle_sum -= src_bstat->forceidle_sum;
|
dst_bstat->forceidle_sum -= src_bstat->forceidle_sum;
|
||||||
#endif
|
#endif
|
||||||
|
dst_bstat->ntime -= src_bstat->ntime;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
|
static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
|
||||||
@ -534,8 +536,10 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
|
|||||||
rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
|
rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
|
||||||
|
|
||||||
switch (index) {
|
switch (index) {
|
||||||
case CPUTIME_USER:
|
|
||||||
case CPUTIME_NICE:
|
case CPUTIME_NICE:
|
||||||
|
rstatc->bstat.ntime += delta_exec;
|
||||||
|
fallthrough;
|
||||||
|
case CPUTIME_USER:
|
||||||
rstatc->bstat.cputime.utime += delta_exec;
|
rstatc->bstat.cputime.utime += delta_exec;
|
||||||
break;
|
break;
|
||||||
case CPUTIME_SYSTEM:
|
case CPUTIME_SYSTEM:
|
||||||
@ -591,6 +595,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
|
|||||||
#ifdef CONFIG_SCHED_CORE
|
#ifdef CONFIG_SCHED_CORE
|
||||||
bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE];
|
bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE];
|
||||||
#endif
|
#endif
|
||||||
|
bstat->ntime += cpustat[CPUTIME_NICE];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -608,13 +613,14 @@ static void cgroup_force_idle_show(struct seq_file *seq, struct cgroup_base_stat
|
|||||||
void cgroup_base_stat_cputime_show(struct seq_file *seq)
|
void cgroup_base_stat_cputime_show(struct seq_file *seq)
|
||||||
{
|
{
|
||||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||||
u64 usage, utime, stime;
|
u64 usage, utime, stime, ntime;
|
||||||
|
|
||||||
if (cgroup_parent(cgrp)) {
|
if (cgroup_parent(cgrp)) {
|
||||||
cgroup_rstat_flush_hold(cgrp);
|
cgroup_rstat_flush_hold(cgrp);
|
||||||
usage = cgrp->bstat.cputime.sum_exec_runtime;
|
usage = cgrp->bstat.cputime.sum_exec_runtime;
|
||||||
cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
|
cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
|
||||||
&utime, &stime);
|
&utime, &stime);
|
||||||
|
ntime = cgrp->bstat.ntime;
|
||||||
cgroup_rstat_flush_release(cgrp);
|
cgroup_rstat_flush_release(cgrp);
|
||||||
} else {
|
} else {
|
||||||
/* cgrp->bstat of root is not actually used, reuse it */
|
/* cgrp->bstat of root is not actually used, reuse it */
|
||||||
@ -622,16 +628,19 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
|
|||||||
usage = cgrp->bstat.cputime.sum_exec_runtime;
|
usage = cgrp->bstat.cputime.sum_exec_runtime;
|
||||||
utime = cgrp->bstat.cputime.utime;
|
utime = cgrp->bstat.cputime.utime;
|
||||||
stime = cgrp->bstat.cputime.stime;
|
stime = cgrp->bstat.cputime.stime;
|
||||||
|
ntime = cgrp->bstat.ntime;
|
||||||
}
|
}
|
||||||
|
|
||||||
do_div(usage, NSEC_PER_USEC);
|
do_div(usage, NSEC_PER_USEC);
|
||||||
do_div(utime, NSEC_PER_USEC);
|
do_div(utime, NSEC_PER_USEC);
|
||||||
do_div(stime, NSEC_PER_USEC);
|
do_div(stime, NSEC_PER_USEC);
|
||||||
|
do_div(ntime, NSEC_PER_USEC);
|
||||||
|
|
||||||
seq_printf(seq, "usage_usec %llu\n"
|
seq_printf(seq, "usage_usec %llu\n"
|
||||||
"user_usec %llu\n"
|
"user_usec %llu\n"
|
||||||
"system_usec %llu\n",
|
"system_usec %llu\n"
|
||||||
usage, utime, stime);
|
"nice_usec %llu\n",
|
||||||
|
usage, utime, stime, ntime);
|
||||||
|
|
||||||
cgroup_force_idle_show(seq, &cgrp->bstat);
|
cgroup_force_idle_show(seq, &cgrp->bstat);
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "../kselftest.h"
|
#include "../kselftest.h"
|
||||||
#include "cgroup_util.h"
|
#include "cgroup_util.h"
|
||||||
@ -229,6 +230,79 @@ static int test_cpucg_stats(const char *root)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Creates a nice process that consumes CPU and checks that the elapsed
|
||||||
|
* usertime in the cgroup is close to the expected time.
|
||||||
|
*/
|
||||||
|
static int test_cpucg_nice(const char *root)
|
||||||
|
{
|
||||||
|
int ret = KSFT_FAIL;
|
||||||
|
int status;
|
||||||
|
long user_usec, nice_usec;
|
||||||
|
long usage_seconds = 2;
|
||||||
|
long expected_nice_usec = usage_seconds * USEC_PER_SEC;
|
||||||
|
char *cpucg;
|
||||||
|
pid_t pid;
|
||||||
|
|
||||||
|
cpucg = cg_name(root, "cpucg_test");
|
||||||
|
if (!cpucg)
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
if (cg_create(cpucg))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
|
||||||
|
nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
|
||||||
|
if (nice_usec == -1)
|
||||||
|
ret = KSFT_SKIP;
|
||||||
|
if (user_usec != 0 || nice_usec != 0)
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We fork here to create a new process that can be niced without
|
||||||
|
* polluting the nice value of other selftests
|
||||||
|
*/
|
||||||
|
pid = fork();
|
||||||
|
if (pid < 0) {
|
||||||
|
goto cleanup;
|
||||||
|
} else if (pid == 0) {
|
||||||
|
struct cpu_hog_func_param param = {
|
||||||
|
.nprocs = 1,
|
||||||
|
.ts = {
|
||||||
|
.tv_sec = usage_seconds,
|
||||||
|
.tv_nsec = 0,
|
||||||
|
},
|
||||||
|
.clock_type = CPU_HOG_CLOCK_PROCESS,
|
||||||
|
};
|
||||||
|
char buf[64];
|
||||||
|
snprintf(buf, sizeof(buf), "%d", getpid());
|
||||||
|
if (cg_write(cpucg, "cgroup.procs", buf))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
/* Try to keep niced CPU usage as constrained to hog_cpu as possible */
|
||||||
|
nice(1);
|
||||||
|
hog_cpus_timed(cpucg, ¶m);
|
||||||
|
exit(0);
|
||||||
|
} else {
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
if (!WIFEXITED(status))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
|
||||||
|
nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
|
||||||
|
if (!values_close(nice_usec, expected_nice_usec, 1))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
ret = KSFT_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
cg_destroy(cpucg);
|
||||||
|
free(cpucg);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
run_cpucg_weight_test(
|
run_cpucg_weight_test(
|
||||||
const char *root,
|
const char *root,
|
||||||
@ -686,6 +760,7 @@ struct cpucg_test {
|
|||||||
} tests[] = {
|
} tests[] = {
|
||||||
T(test_cpucg_subtree_control),
|
T(test_cpucg_subtree_control),
|
||||||
T(test_cpucg_stats),
|
T(test_cpucg_stats),
|
||||||
|
T(test_cpucg_nice),
|
||||||
T(test_cpucg_weight_overprovisioned),
|
T(test_cpucg_weight_overprovisioned),
|
||||||
T(test_cpucg_weight_underprovisioned),
|
T(test_cpucg_weight_underprovisioned),
|
||||||
T(test_cpucg_nested_weight_overprovisioned),
|
T(test_cpucg_nested_weight_overprovisioned),
|
||||||
|
Loading…
Reference in New Issue
Block a user