mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 21:23:23 +00:00
Merge branch into tip/master: 'sched/core'
# New commits in sched/core: af98d8a36a96 ("sched/fair: Fix CPU bandwidth limit bypass during CPU hotplug") 7675361ff9a1 ("sched: deadline: Cleanup goto label in pick_earliest_pushable_dl_task") 7d5265ffcd8b ("rseq: Validate read-only fields under DEBUG_RSEQ config") 2a77e4be12cb ("sched/fair: Untangle NEXT_BUDDY and pick_next_task()") 95d9fed3a2ae ("sched/fair: Mark m*_vruntime() with __maybe_unused") 0429489e0928 ("sched/fair: Fix variable declaration position") 61b82dfb6b7e ("sched/fair: Do not try to migrate delayed dequeue task") 736c55a02c47 ("sched/fair: Rename cfs_rq.nr_running into nr_queued") 43eef7c3a4a6 ("sched/fair: Remove unused cfs_rq.idle_nr_running") 31898e7b87dd ("sched/fair: Rename cfs_rq.idle_h_nr_running into h_nr_idle") 9216582b0bfb ("sched/fair: Removed unsued cfs_rq.h_nr_delayed") 1a49104496d3 ("sched/fair: Use the new cfs_rq.h_nr_runnable") c2a295bffeaf ("sched/fair: Add new cfs_rq.h_nr_runnable") 7b8a702d9438 ("sched/fair: Rename h_nr_running into h_nr_queued") c907cd44a108 ("sched: Unify HK_TYPE_{TIMER|TICK|MISC} to HK_TYPE_KERNEL_NOISE") 6010d245ddc9 ("sched/isolation: Consolidate housekeeping cpumasks that are always identical") 1174b9344bc7 ("sched/isolation: Make "isolcpus=nohz" equivalent to "nohz_full"") ae5c677729e9 ("sched/core: Remove HK_TYPE_SCHED") a76328d44c7a ("sched/fair: Remove CONFIG_CFS_BANDWIDTH=n definition of cfs_bandwidth_used()") 3a181f20fb4e ("sched/deadline: Consolidate Timer Cancellation") 53916d5fd3c0 ("sched/deadline: Check bandwidth overflow earlier for hotplug") d4742f6ed7ea ("sched/deadline: Correctly account for allocated bandwidth during hotplug") 41d4200b7103 ("sched/deadline: Restore dl_server bandwidth on non-destructive root domain changes") 59297e2093ce ("sched: add READ_ONCE to task_on_rq_queued") 108ad0999085 ("sched: Don't try to catch up excess steal time.") Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
c779bc69c8
@ -2432,7 +2432,9 @@
|
||||
specified in the flag list (default: domain):
|
||||
|
||||
nohz
|
||||
Disable the tick when a single task runs.
|
||||
Disable the tick when a single task runs as well as
|
||||
disabling other kernel noises like having RCU callbacks
|
||||
offloaded. This is equivalent to the nohz_full parameter.
|
||||
|
||||
A residual 1Hz tick is offloaded to workqueues, which you
|
||||
need to affine to housekeeping through the global
|
||||
|
@ -1374,6 +1374,15 @@ struct task_struct {
|
||||
* with respect to preemption.
|
||||
*/
|
||||
unsigned long rseq_event_mask;
|
||||
# ifdef CONFIG_DEBUG_RSEQ
|
||||
/*
|
||||
* This is a place holder to save a copy of the rseq fields for
|
||||
* validation of read-only fields. The struct rseq has a
|
||||
* variable-length array at the end, so it cannot be used
|
||||
* directly. Reserve a size large enough for the known fields.
|
||||
*/
|
||||
char rseq_fields[sizeof(struct rseq)];
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_MM_CID
|
||||
|
@ -7,16 +7,21 @@
|
||||
#include <linux/tick.h>
|
||||
|
||||
enum hk_type {
|
||||
HK_TYPE_TIMER,
|
||||
HK_TYPE_RCU,
|
||||
HK_TYPE_MISC,
|
||||
HK_TYPE_SCHED,
|
||||
HK_TYPE_TICK,
|
||||
HK_TYPE_DOMAIN,
|
||||
HK_TYPE_WQ,
|
||||
HK_TYPE_MANAGED_IRQ,
|
||||
HK_TYPE_KTHREAD,
|
||||
HK_TYPE_MAX
|
||||
HK_TYPE_KERNEL_NOISE,
|
||||
HK_TYPE_MAX,
|
||||
|
||||
/*
|
||||
* The following housekeeping types are only set by the nohz_full
|
||||
* boot commandline option. So they can share the same value.
|
||||
*/
|
||||
HK_TYPE_TICK = HK_TYPE_KERNEL_NOISE,
|
||||
HK_TYPE_TIMER = HK_TYPE_KERNEL_NOISE,
|
||||
HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE,
|
||||
HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE,
|
||||
HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE,
|
||||
HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CPU_ISOLATION
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/rseq.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
@ -25,6 +26,78 @@
|
||||
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
|
||||
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
|
||||
|
||||
#ifdef CONFIG_DEBUG_RSEQ
|
||||
static struct rseq *rseq_kernel_fields(struct task_struct *t)
|
||||
{
|
||||
return (struct rseq *) t->rseq_fields;
|
||||
}
|
||||
|
||||
static int rseq_validate_ro_fields(struct task_struct *t)
|
||||
{
|
||||
static DEFINE_RATELIMIT_STATE(_rs,
|
||||
DEFAULT_RATELIMIT_INTERVAL,
|
||||
DEFAULT_RATELIMIT_BURST);
|
||||
u32 cpu_id_start, cpu_id, node_id, mm_cid;
|
||||
struct rseq __user *rseq = t->rseq;
|
||||
|
||||
/*
|
||||
* Validate fields which are required to be read-only by
|
||||
* user-space.
|
||||
*/
|
||||
if (!user_read_access_begin(rseq, t->rseq_len))
|
||||
goto efault;
|
||||
unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
|
||||
unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
|
||||
unsafe_get_user(node_id, &rseq->node_id, efault_end);
|
||||
unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end);
|
||||
user_read_access_end();
|
||||
|
||||
if ((cpu_id_start != rseq_kernel_fields(t)->cpu_id_start ||
|
||||
cpu_id != rseq_kernel_fields(t)->cpu_id ||
|
||||
node_id != rseq_kernel_fields(t)->node_id ||
|
||||
mm_cid != rseq_kernel_fields(t)->mm_cid) && __ratelimit(&_rs)) {
|
||||
|
||||
pr_warn("Detected rseq corruption for pid: %d, name: %s\n"
|
||||
"\tcpu_id_start: %u ?= %u\n"
|
||||
"\tcpu_id: %u ?= %u\n"
|
||||
"\tnode_id: %u ?= %u\n"
|
||||
"\tmm_cid: %u ?= %u\n",
|
||||
t->pid, t->comm,
|
||||
cpu_id_start, rseq_kernel_fields(t)->cpu_id_start,
|
||||
cpu_id, rseq_kernel_fields(t)->cpu_id,
|
||||
node_id, rseq_kernel_fields(t)->node_id,
|
||||
mm_cid, rseq_kernel_fields(t)->mm_cid);
|
||||
}
|
||||
|
||||
/* For now, only print a console warning on mismatch. */
|
||||
return 0;
|
||||
|
||||
efault_end:
|
||||
user_read_access_end();
|
||||
efault:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id,
|
||||
u32 node_id, u32 mm_cid)
|
||||
{
|
||||
rseq_kernel_fields(t)->cpu_id_start = cpu_id;
|
||||
rseq_kernel_fields(t)->cpu_id = cpu_id;
|
||||
rseq_kernel_fields(t)->node_id = node_id;
|
||||
rseq_kernel_fields(t)->mm_cid = mm_cid;
|
||||
}
|
||||
#else
|
||||
static int rseq_validate_ro_fields(struct task_struct *t)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id,
|
||||
u32 node_id, u32 mm_cid)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
*
|
||||
* Restartable sequences are a lightweight interface that allows
|
||||
@ -92,6 +165,11 @@ static int rseq_update_cpu_node_id(struct task_struct *t)
|
||||
u32 node_id = cpu_to_node(cpu_id);
|
||||
u32 mm_cid = task_mm_cid(t);
|
||||
|
||||
/*
|
||||
* Validate read-only rseq fields.
|
||||
*/
|
||||
if (rseq_validate_ro_fields(t))
|
||||
goto efault;
|
||||
WARN_ON_ONCE((int) mm_cid < 0);
|
||||
if (!user_write_access_begin(rseq, t->rseq_len))
|
||||
goto efault;
|
||||
@ -105,6 +183,7 @@ static int rseq_update_cpu_node_id(struct task_struct *t)
|
||||
* t->rseq_len != ORIG_RSEQ_SIZE.
|
||||
*/
|
||||
user_write_access_end();
|
||||
rseq_set_ro_fields(t, cpu_id, cpu_id, node_id, mm_cid);
|
||||
trace_rseq_update(t);
|
||||
return 0;
|
||||
|
||||
@ -119,6 +198,11 @@ static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
|
||||
u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0,
|
||||
mm_cid = 0;
|
||||
|
||||
/*
|
||||
* Validate read-only rseq fields.
|
||||
*/
|
||||
if (!rseq_validate_ro_fields(t))
|
||||
return -EFAULT;
|
||||
/*
|
||||
* Reset cpu_id_start to its initial state (0).
|
||||
*/
|
||||
@ -141,6 +225,9 @@ static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
|
||||
*/
|
||||
if (put_user(mm_cid, &t->rseq->mm_cid))
|
||||
return -EFAULT;
|
||||
|
||||
rseq_set_ro_fields(t, cpu_id_start, cpu_id, node_id, mm_cid);
|
||||
|
||||
/*
|
||||
* Additional feature fields added after ORIG_RSEQ_SIZE
|
||||
* need to be conditionally reset only if
|
||||
@ -423,6 +510,17 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
|
||||
current->rseq = rseq;
|
||||
current->rseq_len = rseq_len;
|
||||
current->rseq_sig = sig;
|
||||
#ifdef CONFIG_DEBUG_RSEQ
|
||||
/*
|
||||
* Initialize the in-kernel rseq fields copy for validation of
|
||||
* read-only fields.
|
||||
*/
|
||||
if (get_user(rseq_kernel_fields(current)->cpu_id_start, &rseq->cpu_id_start) ||
|
||||
get_user(rseq_kernel_fields(current)->cpu_id, &rseq->cpu_id) ||
|
||||
get_user(rseq_kernel_fields(current)->node_id, &rseq->node_id) ||
|
||||
get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid))
|
||||
return -EFAULT;
|
||||
#endif
|
||||
/*
|
||||
* If rseq was previously inactive, and has just been
|
||||
* registered, ensure the cpu_id_start and cpu_id fields
|
||||
|
@ -766,13 +766,15 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
|
||||
#endif
|
||||
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
|
||||
if (static_key_false((¶virt_steal_rq_enabled))) {
|
||||
steal = paravirt_steal_clock(cpu_of(rq));
|
||||
u64 prev_steal;
|
||||
|
||||
steal = prev_steal = paravirt_steal_clock(cpu_of(rq));
|
||||
steal -= rq->prev_steal_time_rq;
|
||||
|
||||
if (unlikely(steal > delta))
|
||||
steal = delta;
|
||||
|
||||
rq->prev_steal_time_rq += steal;
|
||||
rq->prev_steal_time_rq = prev_steal;
|
||||
delta -= steal;
|
||||
}
|
||||
#endif
|
||||
@ -1168,13 +1170,13 @@ int get_nohz_timer_target(void)
|
||||
struct sched_domain *sd;
|
||||
const struct cpumask *hk_mask;
|
||||
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_TIMER)) {
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_KERNEL_NOISE)) {
|
||||
if (!idle_cpu(cpu))
|
||||
return cpu;
|
||||
default_cpu = cpu;
|
||||
}
|
||||
|
||||
hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
|
||||
hk_mask = housekeeping_cpumask(HK_TYPE_KERNEL_NOISE);
|
||||
|
||||
guard(rcu)();
|
||||
|
||||
@ -1189,7 +1191,7 @@ int get_nohz_timer_target(void)
|
||||
}
|
||||
|
||||
if (default_cpu == -1)
|
||||
default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
|
||||
default_cpu = housekeeping_any_cpu(HK_TYPE_KERNEL_NOISE);
|
||||
|
||||
return default_cpu;
|
||||
}
|
||||
@ -1341,7 +1343,7 @@ bool sched_can_stop_tick(struct rq *rq)
|
||||
if (scx_enabled() && !scx_can_stop_tick(rq))
|
||||
return false;
|
||||
|
||||
if (rq->cfs.h_nr_running > 1)
|
||||
if (rq->cfs.h_nr_queued > 1)
|
||||
return false;
|
||||
|
||||
/*
|
||||
@ -5632,7 +5634,7 @@ void sched_tick(void)
|
||||
unsigned long hw_pressure;
|
||||
u64 resched_latency;
|
||||
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_TICK))
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_KERNEL_NOISE))
|
||||
arch_scale_freq_tick();
|
||||
|
||||
sched_clock_tick();
|
||||
@ -5771,7 +5773,7 @@ static void sched_tick_start(int cpu)
|
||||
int os;
|
||||
struct tick_work *twork;
|
||||
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_TICK))
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_KERNEL_NOISE))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!tick_work_cpu);
|
||||
@ -5792,7 +5794,7 @@ static void sched_tick_stop(int cpu)
|
||||
struct tick_work *twork;
|
||||
int os;
|
||||
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_TICK))
|
||||
if (housekeeping_cpu(cpu, HK_TYPE_KERNEL_NOISE))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!tick_work_cpu);
|
||||
@ -6018,7 +6020,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
* opportunity to pull in more work from other CPUs.
|
||||
*/
|
||||
if (likely(!sched_class_above(prev->sched_class, &fair_sched_class) &&
|
||||
rq->nr_running == rq->cfs.h_nr_running)) {
|
||||
rq->nr_running == rq->cfs.h_nr_queued)) {
|
||||
|
||||
p = pick_next_task_fair(rq, prev, rf);
|
||||
if (unlikely(p == RETRY_TASK))
|
||||
@ -8180,19 +8182,14 @@ static void cpuset_cpu_active(void)
|
||||
cpuset_update_active_cpus();
|
||||
}
|
||||
|
||||
static int cpuset_cpu_inactive(unsigned int cpu)
|
||||
static void cpuset_cpu_inactive(unsigned int cpu)
|
||||
{
|
||||
if (!cpuhp_tasks_frozen) {
|
||||
int ret = dl_bw_check_overflow(cpu);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
cpuset_update_active_cpus();
|
||||
} else {
|
||||
num_cpus_frozen++;
|
||||
partition_sched_domains(1, NULL, NULL);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void sched_smt_present_inc(int cpu)
|
||||
@ -8254,6 +8251,11 @@ int sched_cpu_deactivate(unsigned int cpu)
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
int ret;
|
||||
|
||||
ret = dl_bw_deactivate(cpu);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Remove CPU from nohz.idle_cpus_mask to prevent participating in
|
||||
* load balancing when not active
|
||||
@ -8299,15 +8301,7 @@ int sched_cpu_deactivate(unsigned int cpu)
|
||||
return 0;
|
||||
|
||||
sched_update_numa(cpu, false);
|
||||
ret = cpuset_cpu_inactive(cpu);
|
||||
if (ret) {
|
||||
sched_smt_present_inc(cpu);
|
||||
sched_set_rq_online(rq, cpu);
|
||||
balance_push_set(cpu, false);
|
||||
set_cpu_active(cpu, true);
|
||||
sched_update_numa(cpu, true);
|
||||
return ret;
|
||||
}
|
||||
cpuset_cpu_inactive(cpu);
|
||||
sched_domains_numa_masks_clear(cpu);
|
||||
return 0;
|
||||
}
|
||||
|
@ -342,6 +342,29 @@ static void dl_rq_change_utilization(struct rq *rq, struct sched_dl_entity *dl_s
|
||||
__add_rq_bw(new_bw, &rq->dl);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
void cancel_dl_timer(struct sched_dl_entity *dl_se, struct hrtimer *timer)
|
||||
{
|
||||
/*
|
||||
* If the timer callback was running (hrtimer_try_to_cancel == -1),
|
||||
* it will eventually call put_task_struct().
|
||||
*/
|
||||
if (hrtimer_try_to_cancel(timer) == 1 && !dl_server(dl_se))
|
||||
put_task_struct(dl_task_of(dl_se));
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
void cancel_replenish_timer(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
cancel_dl_timer(dl_se, &dl_se->dl_timer);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
void cancel_inactive_timer(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
cancel_dl_timer(dl_se, &dl_se->inactive_timer);
|
||||
}
|
||||
|
||||
static void dl_change_utilization(struct task_struct *p, u64 new_bw)
|
||||
{
|
||||
WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
|
||||
@ -495,10 +518,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
|
||||
* will not touch the rq's active utilization,
|
||||
* so we are still safe.
|
||||
*/
|
||||
if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) {
|
||||
if (!dl_server(dl_se))
|
||||
put_task_struct(dl_task_of(dl_se));
|
||||
}
|
||||
cancel_inactive_timer(dl_se);
|
||||
} else {
|
||||
/*
|
||||
* Since "dl_non_contending" is not set, the
|
||||
@ -2115,13 +2135,8 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
|
||||
* The replenish timer needs to be canceled. No
|
||||
* problem if it fires concurrently: boosted threads
|
||||
* are ignored in dl_task_timer().
|
||||
*
|
||||
* If the timer callback was running (hrtimer_try_to_cancel == -1),
|
||||
* it will eventually call put_task_struct().
|
||||
*/
|
||||
if (hrtimer_try_to_cancel(&p->dl.dl_timer) == 1 &&
|
||||
!dl_server(&p->dl))
|
||||
put_task_struct(p);
|
||||
cancel_replenish_timer(&p->dl);
|
||||
p->dl.dl_throttled = 0;
|
||||
}
|
||||
} else if (!dl_prio(p->normal_prio)) {
|
||||
@ -2289,8 +2304,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
|
||||
* will not touch the rq's active utilization,
|
||||
* so we are still safe.
|
||||
*/
|
||||
if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
|
||||
put_task_struct(p);
|
||||
cancel_inactive_timer(&p->dl);
|
||||
}
|
||||
sub_rq_bw(&p->dl, &rq->dl);
|
||||
rq_unlock(rq, &rf);
|
||||
@ -2506,16 +2520,13 @@ static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu
|
||||
return NULL;
|
||||
|
||||
next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
|
||||
|
||||
next_node:
|
||||
if (next_node) {
|
||||
while (next_node) {
|
||||
p = __node_2_pdl(next_node);
|
||||
|
||||
if (task_is_pushable(rq, p, cpu))
|
||||
return p;
|
||||
|
||||
next_node = rb_next(next_node);
|
||||
goto next_node;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@ -2964,11 +2975,22 @@ void dl_add_task_root_domain(struct task_struct *p)
|
||||
|
||||
void dl_clear_root_domain(struct root_domain *rd)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
|
||||
guard(raw_spinlock_irqsave)(&rd->dl_bw.lock);
|
||||
rd->dl_bw.total_bw = 0;
|
||||
raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
|
||||
|
||||
/*
|
||||
* dl_server bandwidth is only restored when CPUs are attached to root
|
||||
* domains (after domains are created or CPUs moved back to the
|
||||
* default root doamin).
|
||||
*/
|
||||
for_each_cpu(i, rd->span) {
|
||||
struct sched_dl_entity *dl_se = &cpu_rq(i)->fair_server;
|
||||
|
||||
if (dl_server(dl_se) && cpu_active(i))
|
||||
rd->dl_bw.total_bw += dl_se->dl_bw;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
@ -3029,8 +3051,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
|
||||
*/
|
||||
static void switched_to_dl(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
|
||||
put_task_struct(p);
|
||||
cancel_inactive_timer(&p->dl);
|
||||
|
||||
/*
|
||||
* In case a task is setscheduled to SCHED_DEADLINE we need to keep
|
||||
@ -3453,29 +3474,31 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
|
||||
}
|
||||
|
||||
enum dl_bw_request {
|
||||
dl_bw_req_check_overflow = 0,
|
||||
dl_bw_req_deactivate = 0,
|
||||
dl_bw_req_alloc,
|
||||
dl_bw_req_free
|
||||
};
|
||||
|
||||
static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long flags, cap;
|
||||
struct dl_bw *dl_b;
|
||||
bool overflow = 0;
|
||||
u64 fair_server_bw = 0;
|
||||
|
||||
rcu_read_lock_sched();
|
||||
dl_b = dl_bw_of(cpu);
|
||||
raw_spin_lock_irqsave(&dl_b->lock, flags);
|
||||
|
||||
if (req == dl_bw_req_free) {
|
||||
cap = dl_bw_capacity(cpu);
|
||||
switch (req) {
|
||||
case dl_bw_req_free:
|
||||
__dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
|
||||
} else {
|
||||
unsigned long cap = dl_bw_capacity(cpu);
|
||||
|
||||
break;
|
||||
case dl_bw_req_alloc:
|
||||
overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
|
||||
|
||||
if (req == dl_bw_req_alloc && !overflow) {
|
||||
if (!overflow) {
|
||||
/*
|
||||
* We reserve space in the destination
|
||||
* root_domain, as we can't fail after this point.
|
||||
@ -3484,6 +3507,42 @@ static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
|
||||
*/
|
||||
__dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
|
||||
}
|
||||
break;
|
||||
case dl_bw_req_deactivate:
|
||||
/*
|
||||
* cpu is not off yet, but we need to do the math by
|
||||
* considering it off already (i.e., what would happen if we
|
||||
* turn cpu off?).
|
||||
*/
|
||||
cap -= arch_scale_cpu_capacity(cpu);
|
||||
|
||||
/*
|
||||
* cpu is going offline and NORMAL tasks will be moved away
|
||||
* from it. We can thus discount dl_server bandwidth
|
||||
* contribution as it won't need to be servicing tasks after
|
||||
* the cpu is off.
|
||||
*/
|
||||
if (cpu_rq(cpu)->fair_server.dl_server)
|
||||
fair_server_bw = cpu_rq(cpu)->fair_server.dl_bw;
|
||||
|
||||
/*
|
||||
* Not much to check if no DEADLINE bandwidth is present.
|
||||
* dl_servers we can discount, as tasks will be moved out the
|
||||
* offlined CPUs anyway.
|
||||
*/
|
||||
if (dl_b->total_bw - fair_server_bw > 0) {
|
||||
/*
|
||||
* Leaving at least one CPU for DEADLINE tasks seems a
|
||||
* wise thing to do. As said above, cpu is not offline
|
||||
* yet, so account for that.
|
||||
*/
|
||||
if (dl_bw_cpus(cpu) - 1)
|
||||
overflow = __dl_overflow(dl_b, cap, fair_server_bw, 0);
|
||||
else
|
||||
overflow = 1;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
|
||||
@ -3492,9 +3551,9 @@ static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
|
||||
return overflow ? -EBUSY : 0;
|
||||
}
|
||||
|
||||
int dl_bw_check_overflow(int cpu)
|
||||
int dl_bw_deactivate(int cpu)
|
||||
{
|
||||
return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
|
||||
return dl_bw_manage(dl_bw_req_deactivate, cpu, 0);
|
||||
}
|
||||
|
||||
int dl_bw_alloc(int cpu, u64 dl_bw)
|
||||
|
@ -379,7 +379,7 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (rq->cfs.h_nr_running) {
|
||||
if (rq->cfs.h_nr_queued) {
|
||||
update_rq_clock(rq);
|
||||
dl_server_stop(&rq->fair_server);
|
||||
}
|
||||
@ -392,7 +392,7 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu
|
||||
printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n",
|
||||
cpu_of(rq));
|
||||
|
||||
if (rq->cfs.h_nr_running)
|
||||
if (rq->cfs.h_nr_queued)
|
||||
dl_server_start(&rq->fair_server);
|
||||
}
|
||||
|
||||
@ -843,13 +843,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
SPLIT_NS(right_vruntime));
|
||||
spread = right_vruntime - left_vruntime;
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
|
||||
SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "h_nr_delayed", cfs_rq->h_nr_delayed);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running",
|
||||
cfs_rq->idle_nr_running);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "idle_h_nr_running",
|
||||
cfs_rq->idle_h_nr_running);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "nr_queued", cfs_rq->nr_queued);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "h_nr_runnable", cfs_rq->h_nr_runnable);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "h_nr_queued", cfs_rq->h_nr_queued);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "h_nr_idle", cfs_rq->h_nr_idle);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
|
||||
#ifdef CONFIG_SMP
|
||||
SEQ_printf(m, " .%-30s: %lu\n", "load_avg",
|
||||
|
@ -523,7 +523,7 @@ void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
|
||||
* Scheduling class tree data structure manipulation methods:
|
||||
*/
|
||||
|
||||
static inline u64 max_vruntime(u64 max_vruntime, u64 vruntime)
|
||||
static inline __maybe_unused u64 max_vruntime(u64 max_vruntime, u64 vruntime)
|
||||
{
|
||||
s64 delta = (s64)(vruntime - max_vruntime);
|
||||
if (delta > 0)
|
||||
@ -532,7 +532,7 @@ static inline u64 max_vruntime(u64 max_vruntime, u64 vruntime)
|
||||
return max_vruntime;
|
||||
}
|
||||
|
||||
static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
|
||||
static inline __maybe_unused u64 min_vruntime(u64 min_vruntime, u64 vruntime)
|
||||
{
|
||||
s64 delta = (s64)(vruntime - min_vruntime);
|
||||
if (delta < 0)
|
||||
@ -915,7 +915,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
|
||||
* We can safely skip eligibility check if there is only one entity
|
||||
* in this cfs_rq, saving some cycles.
|
||||
*/
|
||||
if (cfs_rq->nr_running == 1)
|
||||
if (cfs_rq->nr_queued == 1)
|
||||
return curr && curr->on_rq ? curr : se;
|
||||
|
||||
if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
|
||||
@ -1250,7 +1250,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
||||
|
||||
account_cfs_rq_runtime(cfs_rq, delta_exec);
|
||||
|
||||
if (cfs_rq->nr_running == 1)
|
||||
if (cfs_rq->nr_queued == 1)
|
||||
return;
|
||||
|
||||
if (resched || did_preempt_short(cfs_rq, curr)) {
|
||||
@ -2131,7 +2131,7 @@ static void update_numa_stats(struct task_numa_env *env,
|
||||
ns->load += cpu_load(rq);
|
||||
ns->runnable += cpu_runnable(rq);
|
||||
ns->util += cpu_util_cfs(cpu);
|
||||
ns->nr_running += rq->cfs.h_nr_running;
|
||||
ns->nr_running += rq->cfs.h_nr_runnable;
|
||||
ns->compute_capacity += capacity_of(cpu);
|
||||
|
||||
if (find_idle && idle_core < 0 && !rq->nr_running && idle_cpu(cpu)) {
|
||||
@ -3682,9 +3682,7 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
list_add(&se->group_node, &rq->cfs_tasks);
|
||||
}
|
||||
#endif
|
||||
cfs_rq->nr_running++;
|
||||
if (se_is_idle(se))
|
||||
cfs_rq->idle_nr_running++;
|
||||
cfs_rq->nr_queued++;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -3697,9 +3695,7 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
list_del_init(&se->group_node);
|
||||
}
|
||||
#endif
|
||||
cfs_rq->nr_running--;
|
||||
if (se_is_idle(se))
|
||||
cfs_rq->idle_nr_running--;
|
||||
cfs_rq->nr_queued--;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5233,7 +5229,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
|
||||
|
||||
static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return !cfs_rq->nr_running;
|
||||
return !cfs_rq->nr_queued;
|
||||
}
|
||||
|
||||
#define UPDATE_TG 0x0
|
||||
@ -5289,7 +5285,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
*
|
||||
* EEVDF: placement strategy #1 / #2
|
||||
*/
|
||||
if (sched_feat(PLACE_LAG) && cfs_rq->nr_running && se->vlag) {
|
||||
if (sched_feat(PLACE_LAG) && cfs_rq->nr_queued && se->vlag) {
|
||||
struct sched_entity *curr = cfs_rq->curr;
|
||||
unsigned long load;
|
||||
|
||||
@ -5382,8 +5378,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
|
||||
static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
|
||||
|
||||
static inline bool cfs_bandwidth_used(void);
|
||||
|
||||
static void
|
||||
requeue_delayed_entity(struct sched_entity *se);
|
||||
|
||||
@ -5405,7 +5399,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
* When enqueuing a sched_entity, we must:
|
||||
* - Update loads to have both entity and cfs_rq synced with now.
|
||||
* - For group_entity, update its runnable_weight to reflect the new
|
||||
* h_nr_running of its group cfs_rq.
|
||||
* h_nr_runnable of its group cfs_rq.
|
||||
* - For group_entity, update its weight to reflect the new share of
|
||||
* its group cfs_rq
|
||||
* - Add its new weight to cfs_rq->load.weight
|
||||
@ -5438,7 +5432,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
__enqueue_entity(cfs_rq, se);
|
||||
se->on_rq = 1;
|
||||
|
||||
if (cfs_rq->nr_running == 1) {
|
||||
if (cfs_rq->nr_queued == 1) {
|
||||
check_enqueue_throttle(cfs_rq);
|
||||
if (!throttled_hierarchy(cfs_rq)) {
|
||||
list_add_leaf_cfs_rq(cfs_rq);
|
||||
@ -5480,7 +5474,7 @@ static void set_delayed(struct sched_entity *se)
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
cfs_rq->h_nr_delayed++;
|
||||
cfs_rq->h_nr_runnable--;
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
}
|
||||
@ -5492,7 +5486,7 @@ static void clear_delayed(struct sched_entity *se)
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
cfs_rq->h_nr_delayed--;
|
||||
cfs_rq->h_nr_runnable++;
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
}
|
||||
@ -5509,6 +5503,7 @@ static bool
|
||||
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
bool sleep = flags & DEQUEUE_SLEEP;
|
||||
int action = UPDATE_TG;
|
||||
|
||||
update_curr(cfs_rq);
|
||||
clear_buddies(cfs_rq, se);
|
||||
@ -5534,7 +5529,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
}
|
||||
}
|
||||
|
||||
int action = UPDATE_TG;
|
||||
if (entity_is_task(se) && task_on_rq_migrating(task_of(se)))
|
||||
action |= DO_DETACH;
|
||||
|
||||
@ -5542,7 +5536,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
* When dequeuing a sched_entity, we must:
|
||||
* - Update loads to have both entity and cfs_rq synced with now.
|
||||
* - For group_entity, update its runnable_weight to reflect the new
|
||||
* h_nr_running of its group cfs_rq.
|
||||
* h_nr_runnable of its group cfs_rq.
|
||||
* - Subtract its previous weight from cfs_rq->load.weight.
|
||||
* - For group entity, update its weight to reflect the new share
|
||||
* of its group cfs_rq.
|
||||
@ -5580,7 +5574,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
if (flags & DEQUEUE_DELAYED)
|
||||
finish_delayed_dequeue_entity(se);
|
||||
|
||||
if (cfs_rq->nr_running == 0)
|
||||
if (cfs_rq->nr_queued == 0)
|
||||
update_idle_cfs_rq_clock_pelt(cfs_rq);
|
||||
|
||||
return true;
|
||||
@ -5642,17 +5636,19 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags);
|
||||
static struct sched_entity *
|
||||
pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct sched_entity *se;
|
||||
|
||||
/*
|
||||
* Enabling NEXT_BUDDY will affect latency but not fairness.
|
||||
* Picking the ->next buddy will affect latency but not fairness.
|
||||
*/
|
||||
if (sched_feat(NEXT_BUDDY) &&
|
||||
if (sched_feat(PICK_BUDDY) &&
|
||||
cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next)) {
|
||||
/* ->next will never be delayed */
|
||||
SCHED_WARN_ON(cfs_rq->next->sched_delayed);
|
||||
return cfs_rq->next;
|
||||
}
|
||||
|
||||
struct sched_entity *se = pick_eevdf(cfs_rq);
|
||||
se = pick_eevdf(cfs_rq);
|
||||
if (se->sched_delayed) {
|
||||
dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
|
||||
/*
|
||||
@ -5928,7 +5924,7 @@ static int tg_throttle_down(struct task_group *tg, void *data)
|
||||
list_del_leaf_cfs_rq(cfs_rq);
|
||||
|
||||
SCHED_WARN_ON(cfs_rq->throttled_clock_self);
|
||||
if (cfs_rq->nr_running)
|
||||
if (cfs_rq->nr_queued)
|
||||
cfs_rq->throttled_clock_self = rq_clock(rq);
|
||||
}
|
||||
cfs_rq->throttle_count++;
|
||||
@ -5941,8 +5937,8 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
|
||||
struct sched_entity *se;
|
||||
long task_delta, idle_task_delta, delayed_delta, dequeue = 1;
|
||||
long rq_h_nr_running = rq->cfs.h_nr_running;
|
||||
long queued_delta, runnable_delta, idle_delta, dequeue = 1;
|
||||
long rq_h_nr_queued = rq->cfs.h_nr_queued;
|
||||
|
||||
raw_spin_lock(&cfs_b->lock);
|
||||
/* This will start the period timer if necessary */
|
||||
@ -5972,9 +5968,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
|
||||
rcu_read_unlock();
|
||||
|
||||
task_delta = cfs_rq->h_nr_running;
|
||||
idle_task_delta = cfs_rq->idle_h_nr_running;
|
||||
delayed_delta = cfs_rq->h_nr_delayed;
|
||||
queued_delta = cfs_rq->h_nr_queued;
|
||||
runnable_delta = cfs_rq->h_nr_runnable;
|
||||
idle_delta = cfs_rq->h_nr_idle;
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
|
||||
int flags;
|
||||
@ -5994,11 +5990,11 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
dequeue_entity(qcfs_rq, se, flags);
|
||||
|
||||
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
||||
idle_task_delta = cfs_rq->h_nr_running;
|
||||
idle_delta = cfs_rq->h_nr_queued;
|
||||
|
||||
qcfs_rq->h_nr_running -= task_delta;
|
||||
qcfs_rq->idle_h_nr_running -= idle_task_delta;
|
||||
qcfs_rq->h_nr_delayed -= delayed_delta;
|
||||
qcfs_rq->h_nr_queued -= queued_delta;
|
||||
qcfs_rq->h_nr_runnable -= runnable_delta;
|
||||
qcfs_rq->h_nr_idle -= idle_delta;
|
||||
|
||||
if (qcfs_rq->load.weight) {
|
||||
/* Avoid re-evaluating load for this entity: */
|
||||
@ -6017,18 +6013,18 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
se_update_runnable(se);
|
||||
|
||||
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
||||
idle_task_delta = cfs_rq->h_nr_running;
|
||||
idle_delta = cfs_rq->h_nr_queued;
|
||||
|
||||
qcfs_rq->h_nr_running -= task_delta;
|
||||
qcfs_rq->idle_h_nr_running -= idle_task_delta;
|
||||
qcfs_rq->h_nr_delayed -= delayed_delta;
|
||||
qcfs_rq->h_nr_queued -= queued_delta;
|
||||
qcfs_rq->h_nr_runnable -= runnable_delta;
|
||||
qcfs_rq->h_nr_idle -= idle_delta;
|
||||
}
|
||||
|
||||
/* At this point se is NULL and we are at root level*/
|
||||
sub_nr_running(rq, task_delta);
|
||||
sub_nr_running(rq, queued_delta);
|
||||
|
||||
/* Stop the fair server if throttling resulted in no runnable tasks */
|
||||
if (rq_h_nr_running && !rq->cfs.h_nr_running)
|
||||
if (rq_h_nr_queued && !rq->cfs.h_nr_queued)
|
||||
dl_server_stop(&rq->fair_server);
|
||||
done:
|
||||
/*
|
||||
@ -6037,7 +6033,7 @@ done:
|
||||
*/
|
||||
cfs_rq->throttled = 1;
|
||||
SCHED_WARN_ON(cfs_rq->throttled_clock);
|
||||
if (cfs_rq->nr_running)
|
||||
if (cfs_rq->nr_queued)
|
||||
cfs_rq->throttled_clock = rq_clock(rq);
|
||||
return true;
|
||||
}
|
||||
@ -6047,8 +6043,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
|
||||
struct sched_entity *se;
|
||||
long task_delta, idle_task_delta, delayed_delta;
|
||||
long rq_h_nr_running = rq->cfs.h_nr_running;
|
||||
long queued_delta, runnable_delta, idle_delta;
|
||||
long rq_h_nr_queued = rq->cfs.h_nr_queued;
|
||||
|
||||
se = cfs_rq->tg->se[cpu_of(rq)];
|
||||
|
||||
@ -6081,9 +6077,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
goto unthrottle_throttle;
|
||||
}
|
||||
|
||||
task_delta = cfs_rq->h_nr_running;
|
||||
idle_task_delta = cfs_rq->idle_h_nr_running;
|
||||
delayed_delta = cfs_rq->h_nr_delayed;
|
||||
queued_delta = cfs_rq->h_nr_queued;
|
||||
runnable_delta = cfs_rq->h_nr_runnable;
|
||||
idle_delta = cfs_rq->h_nr_idle;
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
|
||||
|
||||
@ -6097,11 +6093,11 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP);
|
||||
|
||||
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
||||
idle_task_delta = cfs_rq->h_nr_running;
|
||||
idle_delta = cfs_rq->h_nr_queued;
|
||||
|
||||
qcfs_rq->h_nr_running += task_delta;
|
||||
qcfs_rq->idle_h_nr_running += idle_task_delta;
|
||||
qcfs_rq->h_nr_delayed += delayed_delta;
|
||||
qcfs_rq->h_nr_queued += queued_delta;
|
||||
qcfs_rq->h_nr_runnable += runnable_delta;
|
||||
qcfs_rq->h_nr_idle += idle_delta;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(qcfs_rq))
|
||||
@ -6115,11 +6111,11 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
se_update_runnable(se);
|
||||
|
||||
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
||||
idle_task_delta = cfs_rq->h_nr_running;
|
||||
idle_delta = cfs_rq->h_nr_queued;
|
||||
|
||||
qcfs_rq->h_nr_running += task_delta;
|
||||
qcfs_rq->idle_h_nr_running += idle_task_delta;
|
||||
qcfs_rq->h_nr_delayed += delayed_delta;
|
||||
qcfs_rq->h_nr_queued += queued_delta;
|
||||
qcfs_rq->h_nr_runnable += runnable_delta;
|
||||
qcfs_rq->h_nr_idle += idle_delta;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(qcfs_rq))
|
||||
@ -6127,17 +6123,17 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
}
|
||||
|
||||
/* Start the fair server if un-throttling resulted in new runnable tasks */
|
||||
if (!rq_h_nr_running && rq->cfs.h_nr_running)
|
||||
if (!rq_h_nr_queued && rq->cfs.h_nr_queued)
|
||||
dl_server_start(&rq->fair_server);
|
||||
|
||||
/* At this point se is NULL and we are at root level*/
|
||||
add_nr_running(rq, task_delta);
|
||||
add_nr_running(rq, queued_delta);
|
||||
|
||||
unthrottle_throttle:
|
||||
assert_list_leaf_cfs_rq(rq);
|
||||
|
||||
/* Determine whether we need to wake up potentially idle CPU: */
|
||||
if (rq->curr == rq->idle && rq->cfs.nr_running)
|
||||
if (rq->curr == rq->idle && rq->cfs.nr_queued)
|
||||
resched_curr(rq);
|
||||
}
|
||||
|
||||
@ -6438,7 +6434,7 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
||||
if (!cfs_bandwidth_used())
|
||||
return;
|
||||
|
||||
if (!cfs_rq->runtime_enabled || cfs_rq->nr_running)
|
||||
if (!cfs_rq->runtime_enabled || cfs_rq->nr_queued)
|
||||
return;
|
||||
|
||||
__return_cfs_rq_runtime(cfs_rq);
|
||||
@ -6709,6 +6705,10 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
|
||||
|
||||
lockdep_assert_rq_held(rq);
|
||||
|
||||
// Do not unthrottle for an active CPU
|
||||
if (cpumask_test_cpu(cpu_of(rq), cpu_active_mask))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The rq clock has already been updated in the
|
||||
* set_rq_offline(), so we should skip updating
|
||||
@ -6723,19 +6723,21 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
|
||||
if (!cfs_rq->runtime_enabled)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* clock_task is not advancing so we just need to make sure
|
||||
* there's some valid quota amount
|
||||
*/
|
||||
cfs_rq->runtime_remaining = 1;
|
||||
/*
|
||||
* Offline rq is schedulable till CPU is completely disabled
|
||||
* in take_cpu_down(), so we prevent new cfs throttling here.
|
||||
*/
|
||||
cfs_rq->runtime_enabled = 0;
|
||||
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
unthrottle_cfs_rq(cfs_rq);
|
||||
if (!cfs_rq_throttled(cfs_rq))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* clock_task is not advancing so we just need to make sure
|
||||
* there's some valid quota amount
|
||||
*/
|
||||
cfs_rq->runtime_remaining = 1;
|
||||
unthrottle_cfs_rq(cfs_rq);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@ -6784,11 +6786,6 @@ static void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p)
|
||||
|
||||
#else /* CONFIG_CFS_BANDWIDTH */
|
||||
|
||||
static inline bool cfs_bandwidth_used(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
|
||||
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
|
||||
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
|
||||
@ -6846,7 +6843,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
||||
|
||||
SCHED_WARN_ON(task_rq(p) != rq);
|
||||
|
||||
if (rq->cfs.h_nr_running > 1) {
|
||||
if (rq->cfs.h_nr_queued > 1) {
|
||||
u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
|
||||
u64 slice = se->slice;
|
||||
s64 delta = slice - ran;
|
||||
@ -6934,7 +6931,7 @@ static inline void check_update_overutilized_status(struct rq *rq) { }
|
||||
/* Runqueue only has SCHED_IDLE tasks enqueued */
|
||||
static int sched_idle_rq(struct rq *rq)
|
||||
{
|
||||
return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running &&
|
||||
return unlikely(rq->nr_running == rq->cfs.h_nr_idle &&
|
||||
rq->nr_running);
|
||||
}
|
||||
|
||||
@ -6961,14 +6958,14 @@ requeue_delayed_entity(struct sched_entity *se)
|
||||
if (sched_feat(DELAY_ZERO)) {
|
||||
update_entity_lag(cfs_rq, se);
|
||||
if (se->vlag > 0) {
|
||||
cfs_rq->nr_running--;
|
||||
cfs_rq->nr_queued--;
|
||||
if (se != cfs_rq->curr)
|
||||
__dequeue_entity(cfs_rq, se);
|
||||
se->vlag = 0;
|
||||
place_entity(cfs_rq, se, 0);
|
||||
if (se != cfs_rq->curr)
|
||||
__enqueue_entity(cfs_rq, se);
|
||||
cfs_rq->nr_running++;
|
||||
cfs_rq->nr_queued++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -6986,10 +6983,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &p->se;
|
||||
int idle_h_nr_running = task_has_idle_policy(p);
|
||||
int h_nr_delayed = 0;
|
||||
int h_nr_idle = task_has_idle_policy(p);
|
||||
int h_nr_runnable = 1;
|
||||
int task_new = !(flags & ENQUEUE_WAKEUP);
|
||||
int rq_h_nr_running = rq->cfs.h_nr_running;
|
||||
int rq_h_nr_queued = rq->cfs.h_nr_queued;
|
||||
u64 slice = 0;
|
||||
|
||||
/*
|
||||
@ -7014,8 +7011,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
if (p->in_iowait)
|
||||
cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
|
||||
|
||||
if (task_new)
|
||||
h_nr_delayed = !!se->sched_delayed;
|
||||
if (task_new && se->sched_delayed)
|
||||
h_nr_runnable = 0;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
if (se->on_rq) {
|
||||
@ -7037,12 +7034,12 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
enqueue_entity(cfs_rq, se, flags);
|
||||
slice = cfs_rq_min_slice(cfs_rq);
|
||||
|
||||
cfs_rq->h_nr_running++;
|
||||
cfs_rq->idle_h_nr_running += idle_h_nr_running;
|
||||
cfs_rq->h_nr_delayed += h_nr_delayed;
|
||||
cfs_rq->h_nr_runnable += h_nr_runnable;
|
||||
cfs_rq->h_nr_queued++;
|
||||
cfs_rq->h_nr_idle += h_nr_idle;
|
||||
|
||||
if (cfs_rq_is_idle(cfs_rq))
|
||||
idle_h_nr_running = 1;
|
||||
h_nr_idle = 1;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
@ -7061,19 +7058,19 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
se->slice = slice;
|
||||
slice = cfs_rq_min_slice(cfs_rq);
|
||||
|
||||
cfs_rq->h_nr_running++;
|
||||
cfs_rq->idle_h_nr_running += idle_h_nr_running;
|
||||
cfs_rq->h_nr_delayed += h_nr_delayed;
|
||||
cfs_rq->h_nr_runnable += h_nr_runnable;
|
||||
cfs_rq->h_nr_queued++;
|
||||
cfs_rq->h_nr_idle += h_nr_idle;
|
||||
|
||||
if (cfs_rq_is_idle(cfs_rq))
|
||||
idle_h_nr_running = 1;
|
||||
h_nr_idle = 1;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
goto enqueue_throttle;
|
||||
}
|
||||
|
||||
if (!rq_h_nr_running && rq->cfs.h_nr_running) {
|
||||
if (!rq_h_nr_queued && rq->cfs.h_nr_queued) {
|
||||
/* Account for idle runtime */
|
||||
if (!rq->nr_running)
|
||||
dl_server_update_idle_time(rq, rq->curr);
|
||||
@ -7120,22 +7117,22 @@ static void set_next_buddy(struct sched_entity *se);
|
||||
static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
bool was_sched_idle = sched_idle_rq(rq);
|
||||
int rq_h_nr_running = rq->cfs.h_nr_running;
|
||||
int rq_h_nr_queued = rq->cfs.h_nr_queued;
|
||||
bool task_sleep = flags & DEQUEUE_SLEEP;
|
||||
bool task_delayed = flags & DEQUEUE_DELAYED;
|
||||
struct task_struct *p = NULL;
|
||||
int idle_h_nr_running = 0;
|
||||
int h_nr_running = 0;
|
||||
int h_nr_delayed = 0;
|
||||
int h_nr_idle = 0;
|
||||
int h_nr_queued = 0;
|
||||
int h_nr_runnable = 0;
|
||||
struct cfs_rq *cfs_rq;
|
||||
u64 slice = 0;
|
||||
|
||||
if (entity_is_task(se)) {
|
||||
p = task_of(se);
|
||||
h_nr_running = 1;
|
||||
idle_h_nr_running = task_has_idle_policy(p);
|
||||
if (!task_sleep && !task_delayed)
|
||||
h_nr_delayed = !!se->sched_delayed;
|
||||
h_nr_queued = 1;
|
||||
h_nr_idle = task_has_idle_policy(p);
|
||||
if (task_sleep || task_delayed || !se->sched_delayed)
|
||||
h_nr_runnable = 1;
|
||||
} else {
|
||||
cfs_rq = group_cfs_rq(se);
|
||||
slice = cfs_rq_min_slice(cfs_rq);
|
||||
@ -7151,12 +7148,12 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
||||
break;
|
||||
}
|
||||
|
||||
cfs_rq->h_nr_running -= h_nr_running;
|
||||
cfs_rq->idle_h_nr_running -= idle_h_nr_running;
|
||||
cfs_rq->h_nr_delayed -= h_nr_delayed;
|
||||
cfs_rq->h_nr_runnable -= h_nr_runnable;
|
||||
cfs_rq->h_nr_queued -= h_nr_queued;
|
||||
cfs_rq->h_nr_idle -= h_nr_idle;
|
||||
|
||||
if (cfs_rq_is_idle(cfs_rq))
|
||||
idle_h_nr_running = h_nr_running;
|
||||
h_nr_idle = h_nr_queued;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
@ -7190,21 +7187,21 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
||||
se->slice = slice;
|
||||
slice = cfs_rq_min_slice(cfs_rq);
|
||||
|
||||
cfs_rq->h_nr_running -= h_nr_running;
|
||||
cfs_rq->idle_h_nr_running -= idle_h_nr_running;
|
||||
cfs_rq->h_nr_delayed -= h_nr_delayed;
|
||||
cfs_rq->h_nr_runnable -= h_nr_runnable;
|
||||
cfs_rq->h_nr_queued -= h_nr_queued;
|
||||
cfs_rq->h_nr_idle -= h_nr_idle;
|
||||
|
||||
if (cfs_rq_is_idle(cfs_rq))
|
||||
idle_h_nr_running = h_nr_running;
|
||||
h_nr_idle = h_nr_queued;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub_nr_running(rq, h_nr_running);
|
||||
sub_nr_running(rq, h_nr_queued);
|
||||
|
||||
if (rq_h_nr_running && !rq->cfs.h_nr_running)
|
||||
if (rq_h_nr_queued && !rq->cfs.h_nr_queued)
|
||||
dl_server_stop(&rq->fair_server);
|
||||
|
||||
/* balance early to pull high priority tasks */
|
||||
@ -8893,7 +8890,7 @@ static struct task_struct *pick_task_fair(struct rq *rq)
|
||||
|
||||
again:
|
||||
cfs_rq = &rq->cfs;
|
||||
if (!cfs_rq->nr_running)
|
||||
if (!cfs_rq->nr_queued)
|
||||
return NULL;
|
||||
|
||||
do {
|
||||
@ -9010,7 +9007,7 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru
|
||||
|
||||
static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
return !!dl_se->rq->cfs.nr_running;
|
||||
return !!dl_se->rq->cfs.nr_queued;
|
||||
}
|
||||
|
||||
static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se)
|
||||
@ -9411,11 +9408,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
||||
|
||||
/*
|
||||
* We do not migrate tasks that are:
|
||||
* 1) throttled_lb_pair, or
|
||||
* 2) cannot be migrated to this CPU due to cpus_ptr, or
|
||||
* 3) running (obviously), or
|
||||
* 4) are cache-hot on their current CPU.
|
||||
* 1) delayed dequeued unless we migrate load, or
|
||||
* 2) throttled_lb_pair, or
|
||||
* 3) cannot be migrated to this CPU due to cpus_ptr, or
|
||||
* 4) running (obviously), or
|
||||
* 5) are cache-hot on their current CPU.
|
||||
*/
|
||||
if ((p->se.sched_delayed) && (env->migration_type != migrate_load))
|
||||
return 0;
|
||||
|
||||
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
|
||||
return 0;
|
||||
|
||||
@ -9800,7 +9801,7 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
|
||||
if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq)) {
|
||||
update_tg_load_avg(cfs_rq);
|
||||
|
||||
if (cfs_rq->nr_running == 0)
|
||||
if (cfs_rq->nr_queued == 0)
|
||||
update_idle_cfs_rq_clock_pelt(cfs_rq);
|
||||
|
||||
if (cfs_rq == &rq->cfs)
|
||||
@ -10332,7 +10333,7 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
|
||||
* When there is more than 1 task, the group_overloaded case already
|
||||
* takes care of cpu with reduced capacity
|
||||
*/
|
||||
if (rq->cfs.h_nr_running != 1)
|
||||
if (rq->cfs.h_nr_runnable != 1)
|
||||
return false;
|
||||
|
||||
return check_cpu_capacity(rq, sd);
|
||||
@ -10367,7 +10368,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
||||
sgs->group_load += load;
|
||||
sgs->group_util += cpu_util_cfs(i);
|
||||
sgs->group_runnable += cpu_runnable(rq);
|
||||
sgs->sum_h_nr_running += rq->cfs.h_nr_running;
|
||||
sgs->sum_h_nr_running += rq->cfs.h_nr_runnable;
|
||||
|
||||
nr_running = rq->nr_running;
|
||||
sgs->sum_nr_running += nr_running;
|
||||
@ -10682,7 +10683,7 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
|
||||
sgs->group_util += cpu_util_without(i, p);
|
||||
sgs->group_runnable += cpu_runnable_without(rq, p);
|
||||
local = task_running_on_cpu(i, p);
|
||||
sgs->sum_h_nr_running += rq->cfs.h_nr_running - local;
|
||||
sgs->sum_h_nr_running += rq->cfs.h_nr_runnable - local;
|
||||
|
||||
nr_running = rq->nr_running - local;
|
||||
sgs->sum_nr_running += nr_running;
|
||||
@ -11464,7 +11465,7 @@ static struct rq *sched_balance_find_src_rq(struct lb_env *env,
|
||||
if (rt > env->fbq_type)
|
||||
continue;
|
||||
|
||||
nr_running = rq->cfs.h_nr_running;
|
||||
nr_running = rq->cfs.h_nr_runnable;
|
||||
if (!nr_running)
|
||||
continue;
|
||||
|
||||
@ -11623,7 +11624,7 @@ static int need_active_balance(struct lb_env *env)
|
||||
* available on dst_cpu.
|
||||
*/
|
||||
if (env->idle &&
|
||||
(env->src_rq->cfs.h_nr_running == 1)) {
|
||||
(env->src_rq->cfs.h_nr_runnable == 1)) {
|
||||
if ((check_cpu_capacity(env->src_rq, sd)) &&
|
||||
(capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
|
||||
return 1;
|
||||
@ -12251,16 +12252,13 @@ static inline int on_null_domain(struct rq *rq)
|
||||
* - When one of the busy CPUs notices that there may be an idle rebalancing
|
||||
* needed, they will kick the idle load balancer, which then does idle
|
||||
* load balancing for all the idle CPUs.
|
||||
*
|
||||
* - HK_TYPE_MISC CPUs are used for this task, because HK_TYPE_SCHED is not set
|
||||
* anywhere yet.
|
||||
*/
|
||||
static inline int find_new_ilb(void)
|
||||
{
|
||||
const struct cpumask *hk_mask;
|
||||
int ilb_cpu;
|
||||
|
||||
hk_mask = housekeeping_cpumask(HK_TYPE_MISC);
|
||||
hk_mask = housekeeping_cpumask(HK_TYPE_KERNEL_NOISE);
|
||||
|
||||
for_each_cpu_and(ilb_cpu, nohz.idle_cpus_mask, hk_mask) {
|
||||
|
||||
@ -12278,7 +12276,8 @@ static inline int find_new_ilb(void)
|
||||
* Kick a CPU to do the NOHZ balancing, if it is time for it, via a cross-CPU
|
||||
* SMP function call (IPI).
|
||||
*
|
||||
* We pick the first idle CPU in the HK_TYPE_MISC housekeeping set (if there is one).
|
||||
* We pick the first idle CPU in the HK_TYPE_KERNEL_NOISE housekeeping set
|
||||
* (if there is one).
|
||||
*/
|
||||
static void kick_ilb(unsigned int flags)
|
||||
{
|
||||
@ -12366,7 +12365,7 @@ static void nohz_balancer_kick(struct rq *rq)
|
||||
* If there's a runnable CFS task and the current CPU has reduced
|
||||
* capacity, kick the ILB to see if there's a better CPU to run on:
|
||||
*/
|
||||
if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
|
||||
if (rq->cfs.h_nr_runnable >= 1 && check_cpu_capacity(rq, sd)) {
|
||||
flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
|
||||
goto unlock;
|
||||
}
|
||||
@ -12498,10 +12497,6 @@ void nohz_balance_enter_idle(int cpu)
|
||||
if (!cpu_active(cpu))
|
||||
return;
|
||||
|
||||
/* Spare idle load balancing on CPUs that don't want to be disturbed: */
|
||||
if (!housekeeping_cpu(cpu, HK_TYPE_SCHED))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Can be set safely without rq->lock held
|
||||
* If a clear happens, it will have evaluated last additions because
|
||||
@ -12721,13 +12716,6 @@ static void nohz_newidle_balance(struct rq *this_rq)
|
||||
{
|
||||
int this_cpu = this_rq->cpu;
|
||||
|
||||
/*
|
||||
* This CPU doesn't want to be disturbed by scheduler
|
||||
* housekeeping
|
||||
*/
|
||||
if (!housekeeping_cpu(this_cpu, HK_TYPE_SCHED))
|
||||
return;
|
||||
|
||||
/* Will wake up very soon. No time for doing anything else*/
|
||||
if (this_rq->avg_idle < sysctl_sched_migration_cost)
|
||||
return;
|
||||
@ -12864,11 +12852,11 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
|
||||
* have been enqueued in the meantime. Since we're not going idle,
|
||||
* pretend we pulled a task.
|
||||
*/
|
||||
if (this_rq->cfs.h_nr_running && !pulled_task)
|
||||
if (this_rq->cfs.h_nr_queued && !pulled_task)
|
||||
pulled_task = 1;
|
||||
|
||||
/* Is there a task of a high priority class? */
|
||||
if (this_rq->nr_running != this_rq->cfs.h_nr_running)
|
||||
if (this_rq->nr_running != this_rq->cfs.h_nr_queued)
|
||||
pulled_task = -1;
|
||||
|
||||
out:
|
||||
@ -12982,7 +12970,7 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
|
||||
* MIN_NR_TASKS_DURING_FORCEIDLE - 1 tasks and use that to check
|
||||
* if we need to give up the CPU.
|
||||
*/
|
||||
if (rq->core->core_forceidle_count && rq->cfs.nr_running == 1 &&
|
||||
if (rq->core->core_forceidle_count && rq->cfs.nr_queued == 1 &&
|
||||
__entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
|
||||
resched_curr(rq);
|
||||
}
|
||||
@ -13126,7 +13114,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
|
||||
if (!task_on_rq_queued(p))
|
||||
return;
|
||||
|
||||
if (rq->cfs.nr_running == 1)
|
||||
if (rq->cfs.nr_queued == 1)
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -13536,7 +13524,7 @@ int sched_group_set_idle(struct task_group *tg, long idle)
|
||||
for_each_possible_cpu(i) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
struct sched_entity *se = tg->se[i];
|
||||
struct cfs_rq *parent_cfs_rq, *grp_cfs_rq = tg->cfs_rq[i];
|
||||
struct cfs_rq *grp_cfs_rq = tg->cfs_rq[i];
|
||||
bool was_idle = cfs_rq_is_idle(grp_cfs_rq);
|
||||
long idle_task_delta;
|
||||
struct rq_flags rf;
|
||||
@ -13547,16 +13535,8 @@ int sched_group_set_idle(struct task_group *tg, long idle)
|
||||
if (WARN_ON_ONCE(was_idle == cfs_rq_is_idle(grp_cfs_rq)))
|
||||
goto next_cpu;
|
||||
|
||||
if (se->on_rq) {
|
||||
parent_cfs_rq = cfs_rq_of(se);
|
||||
if (cfs_rq_is_idle(grp_cfs_rq))
|
||||
parent_cfs_rq->idle_nr_running++;
|
||||
else
|
||||
parent_cfs_rq->idle_nr_running--;
|
||||
}
|
||||
|
||||
idle_task_delta = grp_cfs_rq->h_nr_running -
|
||||
grp_cfs_rq->idle_h_nr_running;
|
||||
idle_task_delta = grp_cfs_rq->h_nr_queued -
|
||||
grp_cfs_rq->h_nr_idle;
|
||||
if (!cfs_rq_is_idle(grp_cfs_rq))
|
||||
idle_task_delta *= -1;
|
||||
|
||||
@ -13566,7 +13546,7 @@ int sched_group_set_idle(struct task_group *tg, long idle)
|
||||
if (!se->on_rq)
|
||||
break;
|
||||
|
||||
cfs_rq->idle_h_nr_running += idle_task_delta;
|
||||
cfs_rq->h_nr_idle += idle_task_delta;
|
||||
|
||||
/* Already accounted at parent level and above. */
|
||||
if (cfs_rq_is_idle(cfs_rq))
|
||||
|
@ -31,6 +31,15 @@ SCHED_FEAT(PREEMPT_SHORT, true)
|
||||
*/
|
||||
SCHED_FEAT(NEXT_BUDDY, false)
|
||||
|
||||
/*
|
||||
* Allow completely ignoring cfs_rq->next; which can be set from various
|
||||
* places:
|
||||
* - NEXT_BUDDY (wakeup preemption)
|
||||
* - yield_to_task()
|
||||
* - cgroup dequeue / pick
|
||||
*/
|
||||
SCHED_FEAT(PICK_BUDDY, true)
|
||||
|
||||
/*
|
||||
* Consider buddies to be cache hot, decreases the likeliness of a
|
||||
* cache buddy being migrated away, increases cache locality.
|
||||
|
@ -9,15 +9,9 @@
|
||||
*/
|
||||
|
||||
enum hk_flags {
|
||||
HK_FLAG_TIMER = BIT(HK_TYPE_TIMER),
|
||||
HK_FLAG_RCU = BIT(HK_TYPE_RCU),
|
||||
HK_FLAG_MISC = BIT(HK_TYPE_MISC),
|
||||
HK_FLAG_SCHED = BIT(HK_TYPE_SCHED),
|
||||
HK_FLAG_TICK = BIT(HK_TYPE_TICK),
|
||||
HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN),
|
||||
HK_FLAG_WQ = BIT(HK_TYPE_WQ),
|
||||
HK_FLAG_MANAGED_IRQ = BIT(HK_TYPE_MANAGED_IRQ),
|
||||
HK_FLAG_KTHREAD = BIT(HK_TYPE_KTHREAD),
|
||||
HK_FLAG_KERNEL_NOISE = BIT(HK_TYPE_KERNEL_NOISE),
|
||||
};
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
|
||||
@ -97,7 +91,7 @@ void __init housekeeping_init(void)
|
||||
|
||||
static_branch_enable(&housekeeping_overridden);
|
||||
|
||||
if (housekeeping.flags & HK_FLAG_TICK)
|
||||
if (housekeeping.flags & HK_FLAG_KERNEL_NOISE)
|
||||
sched_tick_offload_init();
|
||||
|
||||
for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) {
|
||||
@ -121,7 +115,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
|
||||
unsigned int first_cpu;
|
||||
int err = 0;
|
||||
|
||||
if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) {
|
||||
if ((flags & HK_FLAG_KERNEL_NOISE) && !(housekeeping.flags & HK_FLAG_KERNEL_NOISE)) {
|
||||
if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) {
|
||||
pr_warn("Housekeeping: nohz unsupported."
|
||||
" Build with CONFIG_NO_HZ_FULL\n");
|
||||
@ -177,7 +171,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
|
||||
housekeeping_setup_type(type, housekeeping_staging);
|
||||
}
|
||||
|
||||
if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK))
|
||||
if ((flags & HK_FLAG_KERNEL_NOISE) && !(housekeeping.flags & HK_FLAG_KERNEL_NOISE))
|
||||
tick_nohz_full_setup(non_housekeeping_mask);
|
||||
|
||||
housekeeping.flags |= flags;
|
||||
@ -195,8 +189,7 @@ static int __init housekeeping_nohz_full_setup(char *str)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU |
|
||||
HK_FLAG_MISC | HK_FLAG_KTHREAD;
|
||||
flags = HK_FLAG_KERNEL_NOISE;
|
||||
|
||||
return housekeeping_setup(str, flags);
|
||||
}
|
||||
@ -210,9 +203,12 @@ static int __init housekeeping_isolcpus_setup(char *str)
|
||||
int len;
|
||||
|
||||
while (isalpha(*str)) {
|
||||
/*
|
||||
* isolcpus=nohz is equivalent to nohz_full.
|
||||
*/
|
||||
if (!strncmp(str, "nohz,", 5)) {
|
||||
str += 5;
|
||||
flags |= HK_FLAG_TICK;
|
||||
flags |= HK_FLAG_KERNEL_NOISE;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -275,7 +275,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
|
||||
*
|
||||
* group: [ see update_cfs_group() ]
|
||||
* se_weight() = tg->weight * grq->load_avg / tg->load_avg
|
||||
* se_runnable() = grq->h_nr_running
|
||||
* se_runnable() = grq->h_nr_runnable
|
||||
*
|
||||
* runnable_sum = se_runnable() * runnable = grq->runnable_sum
|
||||
* runnable_avg = runnable_sum
|
||||
@ -321,7 +321,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
|
||||
{
|
||||
if (___update_load_sum(now, &cfs_rq->avg,
|
||||
scale_load_down(cfs_rq->load.weight),
|
||||
cfs_rq->h_nr_running - cfs_rq->h_nr_delayed,
|
||||
cfs_rq->h_nr_runnable,
|
||||
cfs_rq->curr != NULL)) {
|
||||
|
||||
___update_load_avg(&cfs_rq->avg, 1);
|
||||
|
@ -362,7 +362,7 @@ extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
|
||||
extern bool __checkparam_dl(const struct sched_attr *attr);
|
||||
extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
|
||||
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
|
||||
extern int dl_bw_check_overflow(int cpu);
|
||||
extern int dl_bw_deactivate(int cpu);
|
||||
extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec);
|
||||
/*
|
||||
* SCHED_DEADLINE supports servers (nested scheduling) with the following
|
||||
@ -650,11 +650,10 @@ struct balance_callback {
|
||||
/* CFS-related fields in a runqueue */
|
||||
struct cfs_rq {
|
||||
struct load_weight load;
|
||||
unsigned int nr_running;
|
||||
unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */
|
||||
unsigned int idle_nr_running; /* SCHED_IDLE */
|
||||
unsigned int idle_h_nr_running; /* SCHED_IDLE */
|
||||
unsigned int h_nr_delayed;
|
||||
unsigned int nr_queued;
|
||||
unsigned int h_nr_queued; /* SCHED_{NORMAL,BATCH,IDLE} */
|
||||
unsigned int h_nr_runnable; /* SCHED_{NORMAL,BATCH,IDLE} */
|
||||
unsigned int h_nr_idle; /* SCHED_IDLE */
|
||||
|
||||
s64 avg_vruntime;
|
||||
u64 avg_load;
|
||||
@ -904,11 +903,8 @@ struct dl_rq {
|
||||
|
||||
static inline void se_update_runnable(struct sched_entity *se)
|
||||
{
|
||||
if (!entity_is_task(se)) {
|
||||
struct cfs_rq *cfs_rq = se->my_q;
|
||||
|
||||
se->runnable_weight = cfs_rq->h_nr_running - cfs_rq->h_nr_delayed;
|
||||
}
|
||||
if (!entity_is_task(se))
|
||||
se->runnable_weight = se->my_q->h_nr_runnable;
|
||||
}
|
||||
|
||||
static inline long se_runnable(struct sched_entity *se)
|
||||
@ -2280,7 +2276,7 @@ static inline int task_on_cpu(struct rq *rq, struct task_struct *p)
|
||||
|
||||
static inline int task_on_rq_queued(struct task_struct *p)
|
||||
{
|
||||
return p->on_rq == TASK_ON_RQ_QUEUED;
|
||||
return READ_ONCE(p->on_rq) == TASK_ON_RQ_QUEUED;
|
||||
}
|
||||
|
||||
static inline int task_on_rq_migrating(struct task_struct *p)
|
||||
@ -2574,7 +2570,7 @@ static inline bool sched_rt_runnable(struct rq *rq)
|
||||
|
||||
static inline bool sched_fair_runnable(struct rq *rq)
|
||||
{
|
||||
return rq->cfs.nr_running > 0;
|
||||
return rq->cfs.nr_queued > 0;
|
||||
}
|
||||
|
||||
extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
|
||||
|
@ -2721,9 +2721,11 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
|
||||
|
||||
/*
|
||||
* This domain won't be destroyed and as such
|
||||
* its dl_bw->total_bw needs to be cleared. It
|
||||
* will be recomputed in function
|
||||
* update_tasks_root_domain().
|
||||
* its dl_bw->total_bw needs to be cleared.
|
||||
* Tasks contribution will be then recomputed
|
||||
* in function dl_update_tasks_root_domain(),
|
||||
* dl_servers contribution in function
|
||||
* dl_restore_server_root_domain().
|
||||
*/
|
||||
rd = cpu_rq(cpumask_any(doms_cur[i]))->rd;
|
||||
dl_clear_root_domain(rd);
|
||||
|
Loading…
x
Reference in New Issue
Block a user