From 4da3abcefe178c650033f371e94fa10e80bce167 Mon Sep 17 00:00:00 2001 From: Luca Abeni Date: Thu, 18 May 2017 22:13:32 +0200 Subject: [PATCH] sched/deadline: Do not reclaim the whole CPU bandwidth Original GRUB tends to reclaim 100% of the CPU time... And this allows a CPU hog to starve non-deadline tasks. To address this issue, allow the scheduler to reclaim only a specified fraction of CPU time, stored in the new "bw_ratio" field of the dl runqueue structure. Tested-by: Daniel Bristot de Oliveira Signed-off-by: Luca Abeni Signed-off-by: Peter Zijlstra (Intel) Cc: Claudio Scordino Cc: Joel Fernandes Cc: Juri Lelli Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/1495138417-6203-6-git-send-email-luca.abeni@santannapisa.it Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 11 +++++++++++ kernel/sched/deadline.c | 12 +++++++++++- kernel/sched/sched.h | 8 ++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b68a1fa05244..7abd06400a98 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6759,6 +6759,16 @@ static int sched_dl_global_validate(void) return ret; } +void init_dl_rq_bw_ratio(struct dl_rq *dl_rq) +{ + if (global_rt_runtime() == RUNTIME_INF) { + dl_rq->bw_ratio = 1 << RATIO_SHIFT; + } else { + dl_rq->bw_ratio = to_ratio(global_rt_runtime(), + global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT); + } +} + static void sched_dl_do_global(void) { u64 new_bw = -1; @@ -6784,6 +6794,7 @@ static void sched_dl_do_global(void) raw_spin_unlock_irqrestore(&dl_b->lock, flags); rcu_read_unlock_sched(); + init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl); } } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 0bee537554f6..6a0614b9c8d7 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -268,6 +268,7 @@ void init_dl_rq(struct dl_rq *dl_rq) #endif dl_rq->running_bw = 0; + init_dl_rq_bw_ratio(dl_rq); } #ifdef CONFIG_SMP @@ -924,11 +925,20 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); * Uact is the (per-runqueue) active utilization. * Since rq->dl.running_bw contains Uact * 2^BW_SHIFT, the result * has to be shifted right by BW_SHIFT. + * To reclaim only a fraction Umax of the CPU time, the + * runtime accounting rule is modified as + * "dq = -Uact / Umax dt"; since rq->dl.bw_ratio contains + * 2^RATIO_SHIFT / Umax, delta is multiplied by bw_ratio and shifted + * right by RATIO_SHIFT. + * Since delta is a 64 bit variable, to have an overflow its value + * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds. + * So, overflow is not an issue here. */ u64 grub_reclaim(u64 delta, struct rq *rq) { delta *= rq->dl.running_bw; - delta >>= BW_SHIFT; + delta *= rq->dl.bw_ratio; + delta >>= BW_SHIFT + RATIO_SHIFT; return delta; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index bb409ef40120..878fe757d6ad 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -565,6 +565,12 @@ struct dl_rq { * task blocks */ u64 running_bw; + + /* + * Inverse of the fraction of CPU utilization that can be reclaimed + * by the GRUB algorithm. + */ + u64 bw_ratio; }; #ifdef CONFIG_SMP @@ -1495,9 +1501,11 @@ extern struct dl_bandwidth def_dl_bandwidth; extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime); extern void init_dl_task_timer(struct sched_dl_entity *dl_se); extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se); +extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq); #define BW_SHIFT 20 #define BW_UNIT (1 << BW_SHIFT) +#define RATIO_SHIFT 8 unsigned long to_ratio(u64 period, u64 runtime); extern void init_entity_runnable_average(struct sched_entity *se);