sched/balancing: Periodically decay max cost of idle balance

This patch builds on patch 2 and periodically decays that max value to
do idle balancing per sched domain by approximately 1% per second. Also
decay the rq's max_idle_balance_cost value.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1379096813-3032-4-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Jason Low 2013-09-13 11:26:53 -07:00 committed by Ingo Molnar
parent 9bd721c55c
commit f48627e686
4 changed files with 38 additions and 7 deletions

View File

@ -27,6 +27,7 @@
.balance_interval = 1, \ .balance_interval = 1, \
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
.max_newidle_lb_cost = 0, \ .max_newidle_lb_cost = 0, \
.next_decay_max_lb_cost = jiffies, \
} }
#define cpu_to_node(cpu) ((void)(cpu), 0) #define cpu_to_node(cpu) ((void)(cpu), 0)

View File

@ -810,7 +810,10 @@ struct sched_domain {
unsigned int nr_balance_failed; /* initialise to 0 */ unsigned int nr_balance_failed; /* initialise to 0 */
u64 last_update; u64 last_update;
/* idle_balance() stats */
u64 max_newidle_lb_cost; u64 max_newidle_lb_cost;
unsigned long next_decay_max_lb_cost;
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */ /* load_balance() stats */

View File

@ -107,6 +107,7 @@ int arch_update_cpu_topology(void);
.balance_interval = 1, \ .balance_interval = 1, \
.smt_gain = 1178, /* 15% */ \ .smt_gain = 1178, /* 15% */ \
.max_newidle_lb_cost = 0, \ .max_newidle_lb_cost = 0, \
.next_decay_max_lb_cost = jiffies, \
} }
#endif #endif
#endif /* CONFIG_SCHED_SMT */ #endif /* CONFIG_SCHED_SMT */
@ -137,6 +138,7 @@ int arch_update_cpu_topology(void);
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.max_newidle_lb_cost = 0, \ .max_newidle_lb_cost = 0, \
.next_decay_max_lb_cost = jiffies, \
} }
#endif #endif
#endif /* CONFIG_SCHED_MC */ #endif /* CONFIG_SCHED_MC */
@ -169,6 +171,7 @@ int arch_update_cpu_topology(void);
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.max_newidle_lb_cost = 0, \ .max_newidle_lb_cost = 0, \
.next_decay_max_lb_cost = jiffies, \
} }
#endif #endif

View File

@ -5681,15 +5681,39 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
/* Earliest time when we have to do rebalance again */ /* Earliest time when we have to do rebalance again */
unsigned long next_balance = jiffies + 60*HZ; unsigned long next_balance = jiffies + 60*HZ;
int update_next_balance = 0; int update_next_balance = 0;
int need_serialize; int need_serialize, need_decay = 0;
u64 max_cost = 0;
update_blocked_averages(cpu); update_blocked_averages(cpu);
rcu_read_lock(); rcu_read_lock();
for_each_domain(cpu, sd) { for_each_domain(cpu, sd) {
/*
* Decay the newidle max times here because this is a regular
* visit to all the domains. Decay ~1% per second.
*/
if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
sd->max_newidle_lb_cost =
(sd->max_newidle_lb_cost * 253) / 256;
sd->next_decay_max_lb_cost = jiffies + HZ;
need_decay = 1;
}
max_cost += sd->max_newidle_lb_cost;
if (!(sd->flags & SD_LOAD_BALANCE)) if (!(sd->flags & SD_LOAD_BALANCE))
continue; continue;
/*
* Stop the load balance at this level. There is another
* CPU in our sched group which is doing load balancing more
* actively.
*/
if (!continue_balancing) {
if (need_decay)
continue;
break;
}
interval = sd->balance_interval; interval = sd->balance_interval;
if (idle != CPU_IDLE) if (idle != CPU_IDLE)
interval *= sd->busy_factor; interval *= sd->busy_factor;
@ -5723,14 +5747,14 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
next_balance = sd->last_balance + interval; next_balance = sd->last_balance + interval;
update_next_balance = 1; update_next_balance = 1;
} }
}
if (need_decay) {
/* /*
* Stop the load balance at this level. There is another * Ensure the rq-wide value also decays but keep it at a
* CPU in our sched group which is doing load balancing more * reasonable floor to avoid funnies with rq->avg_idle.
* actively.
*/ */
if (!continue_balancing) rq->max_idle_balance_cost =
break; max((u64)sysctl_sched_migration_cost, max_cost);
} }
rcu_read_unlock(); rcu_read_unlock();