mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-15 09:34:17 +00:00
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: "Misc fixes: a cgroup fix, a fair-scheduler migration accounting fix, a cputime fix and two cpuacct cleanups" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/cpuacct: Simplify the cpuacct code sched/cpuacct: Rename parameter in cpuusage_write() for readability sched/fair: Add comments to explain select_idle_sibling() sched/fair: Fix fairness issue on migration sched/cgroup: Fix/cleanup cgroup teardown/init sched/cputime: Fix steal time accounting vs. CPU hotplug
This commit is contained in:
commit
be53f58fa0
@ -5371,6 +5371,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
|
||||
case CPU_UP_PREPARE:
|
||||
rq->calc_load_update = calc_load_update;
|
||||
account_reset_rq(rq);
|
||||
break;
|
||||
|
||||
case CPU_ONLINE:
|
||||
@ -7537,7 +7538,7 @@ void set_curr_task(int cpu, struct task_struct *p)
|
||||
/* task_group_lock serializes the addition/removal of task groups */
|
||||
static DEFINE_SPINLOCK(task_group_lock);
|
||||
|
||||
static void free_sched_group(struct task_group *tg)
|
||||
static void sched_free_group(struct task_group *tg)
|
||||
{
|
||||
free_fair_sched_group(tg);
|
||||
free_rt_sched_group(tg);
|
||||
@ -7563,7 +7564,7 @@ struct task_group *sched_create_group(struct task_group *parent)
|
||||
return tg;
|
||||
|
||||
err:
|
||||
free_sched_group(tg);
|
||||
sched_free_group(tg);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
@ -7583,17 +7584,16 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
|
||||
}
|
||||
|
||||
/* rcu callback to free various structures associated with a task group */
|
||||
static void free_sched_group_rcu(struct rcu_head *rhp)
|
||||
static void sched_free_group_rcu(struct rcu_head *rhp)
|
||||
{
|
||||
/* now it should be safe to free those cfs_rqs */
|
||||
free_sched_group(container_of(rhp, struct task_group, rcu));
|
||||
sched_free_group(container_of(rhp, struct task_group, rcu));
|
||||
}
|
||||
|
||||
/* Destroy runqueue etc associated with a task group */
|
||||
void sched_destroy_group(struct task_group *tg)
|
||||
{
|
||||
/* wait for possible concurrent references to cfs_rqs complete */
|
||||
call_rcu(&tg->rcu, free_sched_group_rcu);
|
||||
call_rcu(&tg->rcu, sched_free_group_rcu);
|
||||
}
|
||||
|
||||
void sched_offline_group(struct task_group *tg)
|
||||
@ -8052,31 +8052,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
if (IS_ERR(tg))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
sched_online_group(tg, parent);
|
||||
|
||||
return &tg->css;
|
||||
}
|
||||
|
||||
static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
|
||||
static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct task_group *tg = css_tg(css);
|
||||
struct task_group *parent = css_tg(css->parent);
|
||||
|
||||
if (parent)
|
||||
sched_online_group(tg, parent);
|
||||
return 0;
|
||||
sched_offline_group(tg);
|
||||
}
|
||||
|
||||
static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct task_group *tg = css_tg(css);
|
||||
|
||||
sched_destroy_group(tg);
|
||||
}
|
||||
|
||||
static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct task_group *tg = css_tg(css);
|
||||
|
||||
sched_offline_group(tg);
|
||||
/*
|
||||
* Relies on the RCU grace period between css_released() and this.
|
||||
*/
|
||||
sched_free_group(tg);
|
||||
}
|
||||
|
||||
static void cpu_cgroup_fork(struct task_struct *task)
|
||||
@ -8436,9 +8431,8 @@ static struct cftype cpu_files[] = {
|
||||
|
||||
struct cgroup_subsys cpu_cgrp_subsys = {
|
||||
.css_alloc = cpu_cgroup_css_alloc,
|
||||
.css_released = cpu_cgroup_css_released,
|
||||
.css_free = cpu_cgroup_css_free,
|
||||
.css_online = cpu_cgroup_css_online,
|
||||
.css_offline = cpu_cgroup_css_offline,
|
||||
.fork = cpu_cgroup_fork,
|
||||
.can_attach = cpu_cgroup_can_attach,
|
||||
.attach = cpu_cgroup_attach,
|
||||
|
@ -145,13 +145,16 @@ static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
|
||||
}
|
||||
|
||||
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
u64 reset)
|
||||
u64 val)
|
||||
{
|
||||
struct cpuacct *ca = css_ca(css);
|
||||
int err = 0;
|
||||
int i;
|
||||
|
||||
if (reset) {
|
||||
/*
|
||||
* Only allow '0' here to do a reset.
|
||||
*/
|
||||
if (val) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@ -235,23 +238,10 @@ static struct cftype files[] = {
|
||||
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||
{
|
||||
struct cpuacct *ca;
|
||||
int cpu;
|
||||
|
||||
cpu = task_cpu(tsk);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
ca = task_ca(tsk);
|
||||
|
||||
while (true) {
|
||||
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||
*cpuusage += cputime;
|
||||
|
||||
ca = parent_ca(ca);
|
||||
if (!ca)
|
||||
break;
|
||||
}
|
||||
|
||||
for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
|
||||
*this_cpu_ptr(ca->cpuusage) += cputime;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@ -260,18 +250,13 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||
*
|
||||
* Note: it's the caller that updates the account of the root cgroup.
|
||||
*/
|
||||
void cpuacct_account_field(struct task_struct *p, int index, u64 val)
|
||||
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
|
||||
{
|
||||
struct kernel_cpustat *kcpustat;
|
||||
struct cpuacct *ca;
|
||||
|
||||
rcu_read_lock();
|
||||
ca = task_ca(p);
|
||||
while (ca != &root_cpuacct) {
|
||||
kcpustat = this_cpu_ptr(ca->cpustat);
|
||||
kcpustat->cpustat[index] += val;
|
||||
ca = parent_ca(ca);
|
||||
}
|
||||
for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
|
||||
this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifdef CONFIG_CGROUP_CPUACCT
|
||||
|
||||
extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
|
||||
extern void cpuacct_account_field(struct task_struct *p, int index, u64 val);
|
||||
extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
|
||||
|
||||
#else
|
||||
|
||||
@ -10,7 +10,7 @@ static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||
}
|
||||
|
||||
static inline void
|
||||
cpuacct_account_field(struct task_struct *p, int index, u64 val)
|
||||
cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -3181,17 +3181,25 @@ static inline void check_schedstat_required(void)
|
||||
static void
|
||||
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
/*
|
||||
* Update the normalized vruntime before updating min_vruntime
|
||||
* through calling update_curr().
|
||||
*/
|
||||
if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
|
||||
se->vruntime += cfs_rq->min_vruntime;
|
||||
bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING);
|
||||
bool curr = cfs_rq->curr == se;
|
||||
|
||||
/*
|
||||
* Update run-time statistics of the 'current'.
|
||||
* If we're the current task, we must renormalise before calling
|
||||
* update_curr().
|
||||
*/
|
||||
if (renorm && curr)
|
||||
se->vruntime += cfs_rq->min_vruntime;
|
||||
|
||||
update_curr(cfs_rq);
|
||||
|
||||
/*
|
||||
* Otherwise, renormalise after, such that we're placed at the current
|
||||
* moment in time, instead of some random moment in the past.
|
||||
*/
|
||||
if (renorm && !curr)
|
||||
se->vruntime += cfs_rq->min_vruntime;
|
||||
|
||||
enqueue_entity_load_avg(cfs_rq, se);
|
||||
account_entity_enqueue(cfs_rq, se);
|
||||
update_cfs_shares(cfs_rq);
|
||||
@ -3207,7 +3215,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
update_stats_enqueue(cfs_rq, se);
|
||||
check_spread(cfs_rq, se);
|
||||
}
|
||||
if (se != cfs_rq->curr)
|
||||
if (!curr)
|
||||
__enqueue_entity(cfs_rq, se);
|
||||
se->on_rq = 1;
|
||||
|
||||
@ -5071,7 +5079,19 @@ static int select_idle_sibling(struct task_struct *p, int target)
|
||||
return i;
|
||||
|
||||
/*
|
||||
* Otherwise, iterate the domains and find an elegible idle cpu.
|
||||
* Otherwise, iterate the domains and find an eligible idle cpu.
|
||||
*
|
||||
* A completely idle sched group at higher domains is more
|
||||
* desirable than an idle group at a lower level, because lower
|
||||
* domains have smaller groups and usually share hardware
|
||||
* resources which causes tasks to contend on them, e.g. x86
|
||||
* hyperthread siblings in the lowest domain (SMT) can contend
|
||||
* on the shared cpu pipeline.
|
||||
*
|
||||
* However, while we prefer idle groups at higher domains
|
||||
* finding an idle cpu at the lowest domain is still better than
|
||||
* returning 'target', which we've already established, isn't
|
||||
* idle.
|
||||
*/
|
||||
sd = rcu_dereference(per_cpu(sd_llc, target));
|
||||
for_each_lower_domain(sd) {
|
||||
@ -5081,11 +5101,16 @@ static int select_idle_sibling(struct task_struct *p, int target)
|
||||
tsk_cpus_allowed(p)))
|
||||
goto next;
|
||||
|
||||
/* Ensure the entire group is idle */
|
||||
for_each_cpu(i, sched_group_cpus(sg)) {
|
||||
if (i == target || !idle_cpu(i))
|
||||
goto next;
|
||||
}
|
||||
|
||||
/*
|
||||
* It doesn't matter which cpu we pick, the
|
||||
* whole group is idle.
|
||||
*/
|
||||
target = cpumask_first_and(sched_group_cpus(sg),
|
||||
tsk_cpus_allowed(p));
|
||||
goto done;
|
||||
|
@ -1841,3 +1841,16 @@ static inline void cpufreq_trigger_update(u64 time)
|
||||
static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
|
||||
static inline void cpufreq_trigger_update(u64 time) {}
|
||||
#endif /* CONFIG_CPU_FREQ */
|
||||
|
||||
static inline void account_reset_rq(struct rq *rq)
|
||||
{
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
rq->prev_irq_time = 0;
|
||||
#endif
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
rq->prev_steal_time = 0;
|
||||
#endif
|
||||
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
|
||||
rq->prev_steal_time_rq = 0;
|
||||
#endif
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user