mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
Scheduler changes for v6.11:
- Update Daniel Bristot de Oliveira's entry in MAINTAINERS, and credit him in CREDITS. - Harmonize the lock-yielding behavior on dynamically selected preemption models with static ones. - Reorganize the code a bit: split out sched/syscalls.c to reduce the size of sched/core.c - Micro-optimize psi_group_change() - Fix set_load_weight() for SCHED_IDLE tasks - Misc cleanups & fixes Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmaVtVARHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1iqTQ/9GLNzNBnl0oBWCiybeQjyWsZ6BiZi48R0 C1g9/RKy++OyGOjn/yqYK0Kg8cdfoGzHGioMMAucHFW1nXZwVw17xAJK127N0apF 83up7AnFJw/JGr1bI0FwuozqHAs4Z5KzHTv2KBxhYuO77lyYna6/t0liRUbF8ZUZ I/nqav7wDB8RBIB5hEJ/uYLDX7qWdUlyFB+mcvV4ANA99yr++OgipCp6Ob3Rz3cP O676nKJY4vpNbZ/B6bpKg8ezULRP8re2qD3GJRf2huS63uu/Z5ct7ouLVZ1DwN53 mFDBTYUMI2ToV0pseikuqwnmrjxAKcEajTyZpD3vckafd2TlWIopkQZoQ9XLLlIZ DxO+KoekaHTSVy8FWlO8O+iE3IAdUUgECEpNveX45Pb7nFP+5dtFqqnVIdNqCq5e zEuQvizaa5m+A1POZhZKya+z9jbLXXx+gtPCbbADTBWtuyl8azUIh3vjn0bykmv4 IVV/wvUm+BPEIhnKusZZOgB0vLtxUdntBBfUSxqoSOad9L+0/UtSKoKI6wvW00q8 ZkW+85yS3YFiN9W61276RLis2j7OAjE0eDJ96wfhooma2JRDJU4Wmg5oWg8x3WuA JRmK0s63Qik5gpwG5rHQsR5jNqYWTj5Lp7So+M1kRfFsOM/RXQ/AneSXZu/P7d65 LnYWzbKu76c= =lLab -----END PGP SIGNATURE----- Merge tag 'sched-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler updates from Ingo Molnar: - Update Daniel Bristot de Oliveira's entry in MAINTAINERS, and credit him in CREDITS - Harmonize the lock-yielding behavior on dynamically selected preemption models with static ones - Reorganize the code a bit: split out sched/syscalls.c to reduce the size of sched/core.c - Micro-optimize psi_group_change() - Fix set_load_weight() for SCHED_IDLE tasks - Misc cleanups & fixes * tag 'sched-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched: Update MAINTAINERS and CREDITS sched/fair: set_load_weight() must also call reweight_task() for SCHED_IDLE tasks sched/psi: Optimise psi_group_change a bit sched/core: Drop spinlocks on contention iff kernel is preemptible sched/core: Move preempt_model_*() helpers from sched.h to preempt.h sched/balance: Skip unnecessary updates to idle load balancer's flags idle: Remove stale RCU comment sched/headers: Move struct pre-declarations to the beginning of the header sched/core: Clean up kernel/sched/sched.h a bit sched/core: Simplify prefetch_curr_exec_start() sched: Fix spelling in comments sched/syscalls: Split out kernel/sched/syscalls.c from kernel/sched/core.c
This commit is contained in:
commit
4a996d90b9
3
CREDITS
3
CREDITS
@ -271,6 +271,9 @@ D: Driver for WaveFront soundcards (Turtle Beach Maui, Tropez, Tropez+)
|
|||||||
D: Various bugfixes and changes to sound drivers
|
D: Various bugfixes and changes to sound drivers
|
||||||
S: USA
|
S: USA
|
||||||
|
|
||||||
|
N: Daniel Bristot de Oliveira
|
||||||
|
D: Scheduler contributions, notably: SCHED_DEADLINE
|
||||||
|
|
||||||
N: Carlos Henrique Bauer
|
N: Carlos Henrique Bauer
|
||||||
E: chbauer@acm.org
|
E: chbauer@acm.org
|
||||||
E: bauer@atlas.unisinos.br
|
E: bauer@atlas.unisinos.br
|
||||||
|
@ -4728,7 +4728,9 @@
|
|||||||
none - Limited to cond_resched() calls
|
none - Limited to cond_resched() calls
|
||||||
voluntary - Limited to cond_resched() and might_sleep() calls
|
voluntary - Limited to cond_resched() and might_sleep() calls
|
||||||
full - Any section that isn't explicitly preempt disabled
|
full - Any section that isn't explicitly preempt disabled
|
||||||
can be preempted anytime.
|
can be preempted anytime. Tasks will also yield
|
||||||
|
contended spinlocks (if the critical section isn't
|
||||||
|
explicitly preempt disabled beyond the lock itself).
|
||||||
|
|
||||||
print-fatal-signals=
|
print-fatal-signals=
|
||||||
[KNL] debug: print fatal signals
|
[KNL] debug: print fatal signals
|
||||||
|
@ -20047,7 +20047,6 @@ R: Dietmar Eggemann <dietmar.eggemann@arm.com> (SCHED_NORMAL)
|
|||||||
R: Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
|
R: Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
|
||||||
R: Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
|
R: Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
|
||||||
R: Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
|
R: Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
|
||||||
R: Daniel Bristot de Oliveira <bristot@redhat.com> (SCHED_DEADLINE)
|
|
||||||
R: Valentin Schneider <vschneid@redhat.com> (TOPOLOGY)
|
R: Valentin Schneider <vschneid@redhat.com> (TOPOLOGY)
|
||||||
L: linux-kernel@vger.kernel.org
|
L: linux-kernel@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
@ -481,4 +481,45 @@ DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
|
|||||||
DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
|
DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
|
||||||
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
|
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
|
||||||
|
|
||||||
|
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||||
|
|
||||||
|
extern bool preempt_model_none(void);
|
||||||
|
extern bool preempt_model_voluntary(void);
|
||||||
|
extern bool preempt_model_full(void);
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
static inline bool preempt_model_none(void)
|
||||||
|
{
|
||||||
|
return IS_ENABLED(CONFIG_PREEMPT_NONE);
|
||||||
|
}
|
||||||
|
static inline bool preempt_model_voluntary(void)
|
||||||
|
{
|
||||||
|
return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
|
||||||
|
}
|
||||||
|
static inline bool preempt_model_full(void)
|
||||||
|
{
|
||||||
|
return IS_ENABLED(CONFIG_PREEMPT);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline bool preempt_model_rt(void)
|
||||||
|
{
|
||||||
|
return IS_ENABLED(CONFIG_PREEMPT_RT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Does the preemption model allow non-cooperative preemption?
|
||||||
|
*
|
||||||
|
* For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
|
||||||
|
* CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
|
||||||
|
* kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
|
||||||
|
* PREEMPT_NONE model.
|
||||||
|
*/
|
||||||
|
static inline bool preempt_model_preemptible(void)
|
||||||
|
{
|
||||||
|
return preempt_model_full() || preempt_model_rt();
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* __LINUX_PREEMPT_H */
|
#endif /* __LINUX_PREEMPT_H */
|
||||||
|
@ -2064,47 +2064,6 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock);
|
|||||||
__cond_resched_rwlock_write(lock); \
|
__cond_resched_rwlock_write(lock); \
|
||||||
})
|
})
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
|
||||||
|
|
||||||
extern bool preempt_model_none(void);
|
|
||||||
extern bool preempt_model_voluntary(void);
|
|
||||||
extern bool preempt_model_full(void);
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
static inline bool preempt_model_none(void)
|
|
||||||
{
|
|
||||||
return IS_ENABLED(CONFIG_PREEMPT_NONE);
|
|
||||||
}
|
|
||||||
static inline bool preempt_model_voluntary(void)
|
|
||||||
{
|
|
||||||
return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
|
|
||||||
}
|
|
||||||
static inline bool preempt_model_full(void)
|
|
||||||
{
|
|
||||||
return IS_ENABLED(CONFIG_PREEMPT);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline bool preempt_model_rt(void)
|
|
||||||
{
|
|
||||||
return IS_ENABLED(CONFIG_PREEMPT_RT);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Does the preemption model allow non-cooperative preemption?
|
|
||||||
*
|
|
||||||
* For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
|
|
||||||
* CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
|
|
||||||
* kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
|
|
||||||
* PREEMPT_NONE model.
|
|
||||||
*/
|
|
||||||
static inline bool preempt_model_preemptible(void)
|
|
||||||
{
|
|
||||||
return preempt_model_full() || preempt_model_rt();
|
|
||||||
}
|
|
||||||
|
|
||||||
static __always_inline bool need_resched(void)
|
static __always_inline bool need_resched(void)
|
||||||
{
|
{
|
||||||
return unlikely(tif_need_resched());
|
return unlikely(tif_need_resched());
|
||||||
|
@ -462,11 +462,10 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
|
|||||||
*/
|
*/
|
||||||
static inline int spin_needbreak(spinlock_t *lock)
|
static inline int spin_needbreak(spinlock_t *lock)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_PREEMPTION
|
if (!preempt_model_preemptible())
|
||||||
|
return 0;
|
||||||
|
|
||||||
return spin_is_contended(lock);
|
return spin_is_contended(lock);
|
||||||
#else
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -479,11 +478,10 @@ static inline int spin_needbreak(spinlock_t *lock)
|
|||||||
*/
|
*/
|
||||||
static inline int rwlock_needbreak(rwlock_t *lock)
|
static inline int rwlock_needbreak(rwlock_t *lock)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_PREEMPTION
|
if (!preempt_model_preemptible())
|
||||||
|
return 0;
|
||||||
|
|
||||||
return rwlock_is_contended(lock);
|
return rwlock_is_contended(lock);
|
||||||
#else
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -52,3 +52,4 @@
|
|||||||
#include "cputime.c"
|
#include "cputime.c"
|
||||||
#include "deadline.c"
|
#include "deadline.c"
|
||||||
|
|
||||||
|
#include "syscalls.c"
|
||||||
|
@ -340,7 +340,7 @@ static notrace u64 sched_clock_remote(struct sched_clock_data *scd)
|
|||||||
this_clock = sched_clock_local(my_scd);
|
this_clock = sched_clock_local(my_scd);
|
||||||
/*
|
/*
|
||||||
* We must enforce atomic readout on 32-bit, otherwise the
|
* We must enforce atomic readout on 32-bit, otherwise the
|
||||||
* update on the remote CPU can hit inbetween the readout of
|
* update on the remote CPU can hit in between the readout of
|
||||||
* the low 32-bit and the high 32-bit portion.
|
* the low 32-bit and the high 32-bit portion.
|
||||||
*/
|
*/
|
||||||
remote_clock = cmpxchg64(&scd->clock, 0, 0);
|
remote_clock = cmpxchg64(&scd->clock, 0, 0);
|
||||||
@ -444,7 +444,7 @@ notrace void sched_clock_tick_stable(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are going deep-idle (irqs are disabled):
|
* We are going deep-idle (IRQs are disabled):
|
||||||
*/
|
*/
|
||||||
notrace void sched_clock_idle_sleep_event(void)
|
notrace void sched_clock_idle_sleep_event(void)
|
||||||
{
|
{
|
||||||
|
1874
kernel/sched/core.c
1874
kernel/sched/core.c
File diff suppressed because it is too large
Load Diff
@ -279,7 +279,7 @@ void __sched_core_account_forceidle(struct rq *rq)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: this will account forceidle to the current cpu, even
|
* Note: this will account forceidle to the current CPU, even
|
||||||
* if it comes from our SMT sibling.
|
* if it comes from our SMT sibling.
|
||||||
*/
|
*/
|
||||||
__account_forceidle_time(p, delta);
|
__account_forceidle_time(p, delta);
|
||||||
|
@ -14,11 +14,11 @@
|
|||||||
* They are only modified in vtime_account, on corresponding CPU
|
* They are only modified in vtime_account, on corresponding CPU
|
||||||
* with interrupts disabled. So, writes are safe.
|
* with interrupts disabled. So, writes are safe.
|
||||||
* They are read and saved off onto struct rq in update_rq_clock().
|
* They are read and saved off onto struct rq in update_rq_clock().
|
||||||
* This may result in other CPU reading this CPU's irq time and can
|
* This may result in other CPU reading this CPU's IRQ time and can
|
||||||
* race with irq/vtime_account on this CPU. We would either get old
|
* race with irq/vtime_account on this CPU. We would either get old
|
||||||
* or new value with a side effect of accounting a slice of irq time to wrong
|
* or new value with a side effect of accounting a slice of IRQ time to wrong
|
||||||
* task when irq is in progress while we read rq->clock. That is a worthy
|
* task when IRQ is in progress while we read rq->clock. That is a worthy
|
||||||
* compromise in place of having locks on each irq in account_system_time.
|
* compromise in place of having locks on each IRQ in account_system_time.
|
||||||
*/
|
*/
|
||||||
DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
|
DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
|
||||||
|
|
||||||
@ -269,7 +269,7 @@ static __always_inline u64 steal_account_process_time(u64 maxtime)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Account how much elapsed time was spent in steal, irq, or softirq time.
|
* Account how much elapsed time was spent in steal, IRQ, or softirq time.
|
||||||
*/
|
*/
|
||||||
static inline u64 account_other_time(u64 max)
|
static inline u64 account_other_time(u64 max)
|
||||||
{
|
{
|
||||||
@ -370,7 +370,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
|
|||||||
* Check for hardirq is done both for system and user time as there is
|
* Check for hardirq is done both for system and user time as there is
|
||||||
* no timer going off while we are on hardirq and hence we may never get an
|
* no timer going off while we are on hardirq and hence we may never get an
|
||||||
* opportunity to update it solely in system time.
|
* opportunity to update it solely in system time.
|
||||||
* p->stime and friends are only updated on system time and not on irq
|
* p->stime and friends are only updated on system time and not on IRQ
|
||||||
* softirq as those do not count in task exec_runtime any more.
|
* softirq as those do not count in task exec_runtime any more.
|
||||||
*/
|
*/
|
||||||
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
|
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
|
||||||
@ -380,7 +380,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* When returning from idle, many ticks can get accounted at
|
* When returning from idle, many ticks can get accounted at
|
||||||
* once, including some ticks of steal, irq, and softirq time.
|
* once, including some ticks of steal, IRQ, and softirq time.
|
||||||
* Subtract those ticks from the amount of time accounted to
|
* Subtract those ticks from the amount of time accounted to
|
||||||
* idle, or potentially user or system time. Due to rounding,
|
* idle, or potentially user or system time. Due to rounding,
|
||||||
* other time can exceed ticks occasionally.
|
* other time can exceed ticks occasionally.
|
||||||
|
@ -708,7 +708,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* And we finally need to fixup root_domain(s) bandwidth accounting,
|
* And we finally need to fix up root_domain(s) bandwidth accounting,
|
||||||
* since p is still hanging out in the old (now moved to default) root
|
* since p is still hanging out in the old (now moved to default) root
|
||||||
* domain.
|
* domain.
|
||||||
*/
|
*/
|
||||||
@ -992,7 +992,7 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
|
|||||||
* is detected, the runtime and deadline need to be updated.
|
* is detected, the runtime and deadline need to be updated.
|
||||||
*
|
*
|
||||||
* If the task has an implicit deadline, i.e., deadline == period, the Original
|
* If the task has an implicit deadline, i.e., deadline == period, the Original
|
||||||
* CBS is applied. the runtime is replenished and a new absolute deadline is
|
* CBS is applied. The runtime is replenished and a new absolute deadline is
|
||||||
* set, as in the previous cases.
|
* set, as in the previous cases.
|
||||||
*
|
*
|
||||||
* However, the Original CBS does not work properly for tasks with
|
* However, the Original CBS does not work properly for tasks with
|
||||||
@ -1294,7 +1294,7 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
|
|||||||
* Since rq->dl.running_bw and rq->dl.this_bw contain utilizations multiplied
|
* Since rq->dl.running_bw and rq->dl.this_bw contain utilizations multiplied
|
||||||
* by 2^BW_SHIFT, the result has to be shifted right by BW_SHIFT.
|
* by 2^BW_SHIFT, the result has to be shifted right by BW_SHIFT.
|
||||||
* Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT, dl_bw
|
* Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT, dl_bw
|
||||||
* is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
|
* is multiplied by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
|
||||||
* Since delta is a 64 bit variable, to have an overflow its value should be
|
* Since delta is a 64 bit variable, to have an overflow its value should be
|
||||||
* larger than 2^(64 - 20 - 8), which is more than 64 seconds. So, overflow is
|
* larger than 2^(64 - 20 - 8), which is more than 64 seconds. So, overflow is
|
||||||
* not an issue here.
|
* not an issue here.
|
||||||
@ -2493,7 +2493,7 @@ static void pull_dl_task(struct rq *this_rq)
|
|||||||
src_rq = cpu_rq(cpu);
|
src_rq = cpu_rq(cpu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It looks racy, abd it is! However, as in sched_rt.c,
|
* It looks racy, and it is! However, as in sched_rt.c,
|
||||||
* we are fine with this.
|
* we are fine with this.
|
||||||
*/
|
*/
|
||||||
if (this_rq->dl.dl_nr_running &&
|
if (this_rq->dl.dl_nr_running &&
|
||||||
|
@ -61,7 +61,7 @@
|
|||||||
* Options are:
|
* Options are:
|
||||||
*
|
*
|
||||||
* SCHED_TUNABLESCALING_NONE - unscaled, always *1
|
* SCHED_TUNABLESCALING_NONE - unscaled, always *1
|
||||||
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
|
* SCHED_TUNABLESCALING_LOG - scaled logarithmically, *1+ilog(ncpus)
|
||||||
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
|
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
|
||||||
*
|
*
|
||||||
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
|
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
|
||||||
@ -3835,15 +3835,14 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void reweight_task(struct task_struct *p, int prio)
|
void reweight_task(struct task_struct *p, const struct load_weight *lw)
|
||||||
{
|
{
|
||||||
struct sched_entity *se = &p->se;
|
struct sched_entity *se = &p->se;
|
||||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||||
struct load_weight *load = &se->load;
|
struct load_weight *load = &se->load;
|
||||||
unsigned long weight = scale_load(sched_prio_to_weight[prio]);
|
|
||||||
|
|
||||||
reweight_entity(cfs_rq, se, weight);
|
reweight_entity(cfs_rq, se, lw->weight);
|
||||||
load->inv_weight = sched_prio_to_wmult[prio];
|
load->inv_weight = lw->inv_weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
|
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
|
||||||
@ -8719,7 +8718,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
|
|||||||
* topology where each level pairs two lower groups (or better). This results
|
* topology where each level pairs two lower groups (or better). This results
|
||||||
* in O(log n) layers. Furthermore we reduce the number of CPUs going up the
|
* in O(log n) layers. Furthermore we reduce the number of CPUs going up the
|
||||||
* tree to only the first of the previous level and we decrease the frequency
|
* tree to only the first of the previous level and we decrease the frequency
|
||||||
* of load-balance at each level inv. proportional to the number of CPUs in
|
* of load-balance at each level inversely proportional to the number of CPUs in
|
||||||
* the groups.
|
* the groups.
|
||||||
*
|
*
|
||||||
* This yields:
|
* This yields:
|
||||||
@ -11885,6 +11884,13 @@ static void kick_ilb(unsigned int flags)
|
|||||||
if (ilb_cpu < 0)
|
if (ilb_cpu < 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Don't bother if no new NOHZ balance work items for ilb_cpu,
|
||||||
|
* i.e. all bits in flags are already set in ilb_cpu.
|
||||||
|
*/
|
||||||
|
if ((atomic_read(nohz_flags(ilb_cpu)) & flags) == flags)
|
||||||
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Access to rq::nohz_csd is serialized by NOHZ_KICK_MASK; he who sets
|
* Access to rq::nohz_csd is serialized by NOHZ_KICK_MASK; he who sets
|
||||||
* the first flag owns it; cleared by nohz_csd_func().
|
* the first flag owns it; cleared by nohz_csd_func().
|
||||||
|
@ -172,19 +172,13 @@ static void cpuidle_idle_call(void)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if the idle task must be rescheduled. If it is the
|
* Check if the idle task must be rescheduled. If it is the
|
||||||
* case, exit the function after re-enabling the local irq.
|
* case, exit the function after re-enabling the local IRQ.
|
||||||
*/
|
*/
|
||||||
if (need_resched()) {
|
if (need_resched()) {
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* The RCU framework needs to be told that we are entering an idle
|
|
||||||
* section, so no more rcu read side critical sections and one more
|
|
||||||
* step to the grace period
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (cpuidle_not_available(drv, dev)) {
|
if (cpuidle_not_available(drv, dev)) {
|
||||||
tick_nohz_idle_stop_tick();
|
tick_nohz_idle_stop_tick();
|
||||||
|
|
||||||
@ -244,7 +238,7 @@ static void cpuidle_idle_call(void)
|
|||||||
__current_set_polling();
|
__current_set_polling();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It is up to the idle functions to reenable local interrupts
|
* It is up to the idle functions to re-enable local interrupts
|
||||||
*/
|
*/
|
||||||
if (WARN_ON_ONCE(irqs_disabled()))
|
if (WARN_ON_ONCE(irqs_disabled()))
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
@ -320,7 +314,7 @@ static void do_idle(void)
|
|||||||
rcu_nocb_flush_deferred_wakeup();
|
rcu_nocb_flush_deferred_wakeup();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In poll mode we reenable interrupts and spin. Also if we
|
* In poll mode we re-enable interrupts and spin. Also if we
|
||||||
* detected in the wakeup from idle path that the tick
|
* detected in the wakeup from idle path that the tick
|
||||||
* broadcast device expired for us, we don't want to go deep
|
* broadcast device expired for us, we don't want to go deep
|
||||||
* idle as we know that the IPI is going to arrive right away.
|
* idle as we know that the IPI is going to arrive right away.
|
||||||
|
@ -45,7 +45,7 @@
|
|||||||
* again, being late doesn't loose the delta, just wrecks the sample.
|
* again, being late doesn't loose the delta, just wrecks the sample.
|
||||||
*
|
*
|
||||||
* - cpu_rq()->nr_uninterruptible isn't accurately tracked per-CPU because
|
* - cpu_rq()->nr_uninterruptible isn't accurately tracked per-CPU because
|
||||||
* this would add another cross-CPU cacheline miss and atomic operation
|
* this would add another cross-CPU cache-line miss and atomic operation
|
||||||
* to the wakeup path. Instead we increment on whatever CPU the task ran
|
* to the wakeup path. Instead we increment on whatever CPU the task ran
|
||||||
* when it went into uninterruptible state and decrement on whatever CPU
|
* when it went into uninterruptible state and decrement on whatever CPU
|
||||||
* did the wakeup. This means that only the sum of nr_uninterruptible over
|
* did the wakeup. This means that only the sum of nr_uninterruptible over
|
||||||
@ -62,7 +62,7 @@ EXPORT_SYMBOL(avenrun); /* should be removed */
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* get_avenrun - get the load average array
|
* get_avenrun - get the load average array
|
||||||
* @loads: pointer to dest load array
|
* @loads: pointer to destination load array
|
||||||
* @offset: offset to add
|
* @offset: offset to add
|
||||||
* @shift: shift count to shift the result left
|
* @shift: shift count to shift the result left
|
||||||
*
|
*
|
||||||
|
@ -417,7 +417,7 @@ int update_hw_load_avg(u64 now, struct rq *rq, u64 capacity)
|
|||||||
|
|
||||||
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
|
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
|
||||||
/*
|
/*
|
||||||
* irq:
|
* IRQ:
|
||||||
*
|
*
|
||||||
* util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
|
* util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
|
||||||
* util_sum = cpu_scale * load_sum
|
* util_sum = cpu_scale * load_sum
|
||||||
@ -432,7 +432,7 @@ int update_irq_load_avg(struct rq *rq, u64 running)
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can't use clock_pelt because irq time is not accounted in
|
* We can't use clock_pelt because IRQ time is not accounted in
|
||||||
* clock_task. Instead we directly scale the running time to
|
* clock_task. Instead we directly scale the running time to
|
||||||
* reflect the real amount of computation
|
* reflect the real amount of computation
|
||||||
*/
|
*/
|
||||||
|
@ -41,7 +41,7 @@
|
|||||||
* What it means for a task to be productive is defined differently
|
* What it means for a task to be productive is defined differently
|
||||||
* for each resource. For IO, productive means a running task. For
|
* for each resource. For IO, productive means a running task. For
|
||||||
* memory, productive means a running task that isn't a reclaimer. For
|
* memory, productive means a running task that isn't a reclaimer. For
|
||||||
* CPU, productive means an oncpu task.
|
* CPU, productive means an on-CPU task.
|
||||||
*
|
*
|
||||||
* Naturally, the FULL state doesn't exist for the CPU resource at the
|
* Naturally, the FULL state doesn't exist for the CPU resource at the
|
||||||
* system level, but exist at the cgroup level. At the cgroup level,
|
* system level, but exist at the cgroup level. At the cgroup level,
|
||||||
@ -49,7 +49,7 @@
|
|||||||
* resource which is being used by others outside of the cgroup or
|
* resource which is being used by others outside of the cgroup or
|
||||||
* throttled by the cgroup cpu.max configuration.
|
* throttled by the cgroup cpu.max configuration.
|
||||||
*
|
*
|
||||||
* The percentage of wallclock time spent in those compound stall
|
* The percentage of wall clock time spent in those compound stall
|
||||||
* states gives pressure numbers between 0 and 100 for each resource,
|
* states gives pressure numbers between 0 and 100 for each resource,
|
||||||
* where the SOME percentage indicates workload slowdowns and the FULL
|
* where the SOME percentage indicates workload slowdowns and the FULL
|
||||||
* percentage indicates reduced CPU utilization:
|
* percentage indicates reduced CPU utilization:
|
||||||
@ -218,28 +218,32 @@ void __init psi_init(void)
|
|||||||
group_init(&psi_system);
|
group_init(&psi_system);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool test_state(unsigned int *tasks, enum psi_states state, bool oncpu)
|
static u32 test_states(unsigned int *tasks, u32 state_mask)
|
||||||
{
|
{
|
||||||
switch (state) {
|
const bool oncpu = state_mask & PSI_ONCPU;
|
||||||
case PSI_IO_SOME:
|
|
||||||
return unlikely(tasks[NR_IOWAIT]);
|
if (tasks[NR_IOWAIT]) {
|
||||||
case PSI_IO_FULL:
|
state_mask |= BIT(PSI_IO_SOME);
|
||||||
return unlikely(tasks[NR_IOWAIT] && !tasks[NR_RUNNING]);
|
if (!tasks[NR_RUNNING])
|
||||||
case PSI_MEM_SOME:
|
state_mask |= BIT(PSI_IO_FULL);
|
||||||
return unlikely(tasks[NR_MEMSTALL]);
|
|
||||||
case PSI_MEM_FULL:
|
|
||||||
return unlikely(tasks[NR_MEMSTALL] &&
|
|
||||||
tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]);
|
|
||||||
case PSI_CPU_SOME:
|
|
||||||
return unlikely(tasks[NR_RUNNING] > oncpu);
|
|
||||||
case PSI_CPU_FULL:
|
|
||||||
return unlikely(tasks[NR_RUNNING] && !oncpu);
|
|
||||||
case PSI_NONIDLE:
|
|
||||||
return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] ||
|
|
||||||
tasks[NR_RUNNING];
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tasks[NR_MEMSTALL]) {
|
||||||
|
state_mask |= BIT(PSI_MEM_SOME);
|
||||||
|
if (tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING])
|
||||||
|
state_mask |= BIT(PSI_MEM_FULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tasks[NR_RUNNING] > oncpu)
|
||||||
|
state_mask |= BIT(PSI_CPU_SOME);
|
||||||
|
|
||||||
|
if (tasks[NR_RUNNING] && !oncpu)
|
||||||
|
state_mask |= BIT(PSI_CPU_FULL);
|
||||||
|
|
||||||
|
if (tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] || tasks[NR_RUNNING])
|
||||||
|
state_mask |= BIT(PSI_NONIDLE);
|
||||||
|
|
||||||
|
return state_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void get_recent_times(struct psi_group *group, int cpu,
|
static void get_recent_times(struct psi_group *group, int cpu,
|
||||||
@ -345,7 +349,7 @@ static void collect_percpu_times(struct psi_group *group,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Collect the per-cpu time buckets and average them into a
|
* Collect the per-cpu time buckets and average them into a
|
||||||
* single time sample that is normalized to wallclock time.
|
* single time sample that is normalized to wall clock time.
|
||||||
*
|
*
|
||||||
* For averaging, each CPU is weighted by its non-idle time in
|
* For averaging, each CPU is weighted by its non-idle time in
|
||||||
* the sampling period. This eliminates artifacts from uneven
|
* the sampling period. This eliminates artifacts from uneven
|
||||||
@ -770,7 +774,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
|||||||
{
|
{
|
||||||
struct psi_group_cpu *groupc;
|
struct psi_group_cpu *groupc;
|
||||||
unsigned int t, m;
|
unsigned int t, m;
|
||||||
enum psi_states s;
|
|
||||||
u32 state_mask;
|
u32 state_mask;
|
||||||
|
|
||||||
lockdep_assert_rq_held(cpu_rq(cpu));
|
lockdep_assert_rq_held(cpu_rq(cpu));
|
||||||
@ -842,10 +845,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (s = 0; s < NR_PSI_STATES; s++) {
|
state_mask = test_states(groupc->tasks, state_mask);
|
||||||
if (test_state(groupc->tasks, s, state_mask & PSI_ONCPU))
|
|
||||||
state_mask |= (1 << s);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since we care about lost potential, a memstall is FULL
|
* Since we care about lost potential, a memstall is FULL
|
||||||
@ -1205,7 +1205,7 @@ void psi_cgroup_restart(struct psi_group *group)
|
|||||||
/*
|
/*
|
||||||
* After we disable psi_group->enabled, we don't actually
|
* After we disable psi_group->enabled, we don't actually
|
||||||
* stop percpu tasks accounting in each psi_group_cpu,
|
* stop percpu tasks accounting in each psi_group_cpu,
|
||||||
* instead only stop test_state() loop, record_times()
|
* instead only stop test_states() loop, record_times()
|
||||||
* and averaging worker, see psi_group_change() for details.
|
* and averaging worker, see psi_group_change() for details.
|
||||||
*
|
*
|
||||||
* When disable cgroup PSI, this function has nothing to sync
|
* When disable cgroup PSI, this function has nothing to sync
|
||||||
@ -1213,7 +1213,7 @@ void psi_cgroup_restart(struct psi_group *group)
|
|||||||
* would see !psi_group->enabled and only do task accounting.
|
* would see !psi_group->enabled and only do task accounting.
|
||||||
*
|
*
|
||||||
* When re-enable cgroup PSI, this function use psi_group_change()
|
* When re-enable cgroup PSI, this function use psi_group_change()
|
||||||
* to get correct state mask from test_state() loop on tasks[],
|
* to get correct state mask from test_states() loop on tasks[],
|
||||||
* and restart groupc->state_start from now, use .clear = .set = 0
|
* and restart groupc->state_start from now, use .clear = .set = 0
|
||||||
* here since no task status really changed.
|
* here since no task status really changed.
|
||||||
*/
|
*/
|
||||||
|
@ -140,7 +140,7 @@ void init_rt_rq(struct rt_rq *rt_rq)
|
|||||||
INIT_LIST_HEAD(array->queue + i);
|
INIT_LIST_HEAD(array->queue + i);
|
||||||
__clear_bit(i, array->bitmap);
|
__clear_bit(i, array->bitmap);
|
||||||
}
|
}
|
||||||
/* delimiter for bitsearch: */
|
/* delimiter for bit-search: */
|
||||||
__set_bit(MAX_RT_PRIO, array->bitmap);
|
__set_bit(MAX_RT_PRIO, array->bitmap);
|
||||||
|
|
||||||
#if defined CONFIG_SMP
|
#if defined CONFIG_SMP
|
||||||
@ -1135,7 +1135,7 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* This may have been our highest task, and therefore
|
* This may have been our highest task, and therefore
|
||||||
* we may have some recomputation to do
|
* we may have some re-computation to do
|
||||||
*/
|
*/
|
||||||
if (prio == prev_prio) {
|
if (prio == prev_prio) {
|
||||||
struct rt_prio_array *array = &rt_rq->active;
|
struct rt_prio_array *array = &rt_rq->active;
|
||||||
@ -1571,7 +1571,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags)
|
|||||||
*
|
*
|
||||||
* For equal prio tasks, we just let the scheduler sort it out.
|
* For equal prio tasks, we just let the scheduler sort it out.
|
||||||
*
|
*
|
||||||
* Otherwise, just let it ride on the affined RQ and the
|
* Otherwise, just let it ride on the affine RQ and the
|
||||||
* post-schedule router will push the preempted task away
|
* post-schedule router will push the preempted task away
|
||||||
*
|
*
|
||||||
* This test is optimistic, if we get it wrong the load-balancer
|
* This test is optimistic, if we get it wrong the load-balancer
|
||||||
@ -2147,14 +2147,14 @@ static void push_rt_tasks(struct rq *rq)
|
|||||||
* if its the only CPU with multiple RT tasks queued, and a large number
|
* if its the only CPU with multiple RT tasks queued, and a large number
|
||||||
* of CPUs scheduling a lower priority task at the same time.
|
* of CPUs scheduling a lower priority task at the same time.
|
||||||
*
|
*
|
||||||
* Each root domain has its own irq work function that can iterate over
|
* Each root domain has its own IRQ work function that can iterate over
|
||||||
* all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
|
* all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
|
||||||
* task must be checked if there's one or many CPUs that are lowering
|
* task must be checked if there's one or many CPUs that are lowering
|
||||||
* their priority, there's a single irq work iterator that will try to
|
* their priority, there's a single IRQ work iterator that will try to
|
||||||
* push off RT tasks that are waiting to run.
|
* push off RT tasks that are waiting to run.
|
||||||
*
|
*
|
||||||
* When a CPU schedules a lower priority task, it will kick off the
|
* When a CPU schedules a lower priority task, it will kick off the
|
||||||
* irq work iterator that will jump to each CPU with overloaded RT tasks.
|
* IRQ work iterator that will jump to each CPU with overloaded RT tasks.
|
||||||
* As it only takes the first CPU that schedules a lower priority task
|
* As it only takes the first CPU that schedules a lower priority task
|
||||||
* to start the process, the rto_start variable is incremented and if
|
* to start the process, the rto_start variable is incremented and if
|
||||||
* the atomic result is one, then that CPU will try to take the rto_lock.
|
* the atomic result is one, then that CPU will try to take the rto_lock.
|
||||||
@ -2162,7 +2162,7 @@ static void push_rt_tasks(struct rq *rq)
|
|||||||
* CPUs scheduling lower priority tasks.
|
* CPUs scheduling lower priority tasks.
|
||||||
*
|
*
|
||||||
* All CPUs that are scheduling a lower priority task will increment the
|
* All CPUs that are scheduling a lower priority task will increment the
|
||||||
* rt_loop_next variable. This will make sure that the irq work iterator
|
* rt_loop_next variable. This will make sure that the IRQ work iterator
|
||||||
* checks all RT overloaded CPUs whenever a CPU schedules a new lower
|
* checks all RT overloaded CPUs whenever a CPU schedules a new lower
|
||||||
* priority task, even if the iterator is in the middle of a scan. Incrementing
|
* priority task, even if the iterator is in the middle of a scan. Incrementing
|
||||||
* the rt_loop_next will cause the iterator to perform another scan.
|
* the rt_loop_next will cause the iterator to perform another scan.
|
||||||
@ -2242,7 +2242,7 @@ static void tell_cpu_to_push(struct rq *rq)
|
|||||||
* The rto_cpu is updated under the lock, if it has a valid CPU
|
* The rto_cpu is updated under the lock, if it has a valid CPU
|
||||||
* then the IPI is still running and will continue due to the
|
* then the IPI is still running and will continue due to the
|
||||||
* update to loop_next, and nothing needs to be done here.
|
* update to loop_next, and nothing needs to be done here.
|
||||||
* Otherwise it is finishing up and an ipi needs to be sent.
|
* Otherwise it is finishing up and an IPI needs to be sent.
|
||||||
*/
|
*/
|
||||||
if (rq->rd->rto_cpu < 0)
|
if (rq->rd->rto_cpu < 0)
|
||||||
cpu = rto_next_cpu(rq->rd);
|
cpu = rto_next_cpu(rq->rd);
|
||||||
@ -2594,7 +2594,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
|
|||||||
watchdog(rq, p);
|
watchdog(rq, p);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RR tasks need a special form of timeslice management.
|
* RR tasks need a special form of time-slice management.
|
||||||
* FIFO tasks have no timeslices.
|
* FIFO tasks have no timeslices.
|
||||||
*/
|
*/
|
||||||
if (p->policy != SCHED_RR)
|
if (p->policy != SCHED_RR)
|
||||||
@ -2900,7 +2900,7 @@ static int sched_rt_global_constraints(void)
|
|||||||
|
|
||||||
int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
|
int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
/* Don't accept realtime tasks when there is no way for them to run */
|
/* Don't accept real-time tasks when there is no way for them to run */
|
||||||
if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
|
if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -3001,7 +3001,7 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
|
|||||||
ret = proc_dointvec(table, write, buffer, lenp, ppos);
|
ret = proc_dointvec(table, write, buffer, lenp, ppos);
|
||||||
/*
|
/*
|
||||||
* Make sure that internally we keep jiffies.
|
* Make sure that internally we keep jiffies.
|
||||||
* Also, writing zero resets the timeslice to default:
|
* Also, writing zero resets the time-slice to default:
|
||||||
*/
|
*/
|
||||||
if (!ret && write) {
|
if (!ret && write) {
|
||||||
sched_rr_timeslice =
|
sched_rr_timeslice =
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -224,7 +224,7 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
|
|||||||
/*
|
/*
|
||||||
* Called when a task finally hits the CPU. We can now calculate how
|
* Called when a task finally hits the CPU. We can now calculate how
|
||||||
* long it was waiting to run. We also note when it began so that we
|
* long it was waiting to run. We also note when it began so that we
|
||||||
* can keep stats on how long its timeslice is.
|
* can keep stats on how long its time-slice is.
|
||||||
*/
|
*/
|
||||||
static void sched_info_arrive(struct rq *rq, struct task_struct *t)
|
static void sched_info_arrive(struct rq *rq, struct task_struct *t)
|
||||||
{
|
{
|
||||||
|
1699
kernel/sched/syscalls.c
Normal file
1699
kernel/sched/syscalls.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -501,7 +501,7 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
|
|||||||
cpumask_clear_cpu(rq->cpu, old_rd->span);
|
cpumask_clear_cpu(rq->cpu, old_rd->span);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we dont want to free the old_rd yet then
|
* If we don't want to free the old_rd yet then
|
||||||
* set old_rd to NULL to skip the freeing later
|
* set old_rd to NULL to skip the freeing later
|
||||||
* in this function:
|
* in this function:
|
||||||
*/
|
*/
|
||||||
@ -1176,7 +1176,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
|
|||||||
* uniquely identify each group (for a given domain):
|
* uniquely identify each group (for a given domain):
|
||||||
*
|
*
|
||||||
* - The first is the balance_cpu (see should_we_balance() and the
|
* - The first is the balance_cpu (see should_we_balance() and the
|
||||||
* load-balance blub in fair.c); for each group we only want 1 CPU to
|
* load-balance blurb in fair.c); for each group we only want 1 CPU to
|
||||||
* continue balancing at a higher domain.
|
* continue balancing at a higher domain.
|
||||||
*
|
*
|
||||||
* - The second is the sched_group_capacity; we want all identical groups
|
* - The second is the sched_group_capacity; we want all identical groups
|
||||||
@ -1388,7 +1388,7 @@ static inline void asym_cpu_capacity_update_data(int cpu)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Search if capacity already exits. If not, track which the entry
|
* Search if capacity already exits. If not, track which the entry
|
||||||
* where we should insert to keep the list ordered descendingly.
|
* where we should insert to keep the list ordered descending.
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(entry, &asym_cap_list, link) {
|
list_for_each_entry(entry, &asym_cap_list, link) {
|
||||||
if (capacity == entry->capacity)
|
if (capacity == entry->capacity)
|
||||||
@ -1853,7 +1853,7 @@ void sched_init_numa(int offline_node)
|
|||||||
struct cpumask ***masks;
|
struct cpumask ***masks;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* O(nr_nodes^2) deduplicating selection sort -- in order to find the
|
* O(nr_nodes^2) de-duplicating selection sort -- in order to find the
|
||||||
* unique distances in the node_distance() table.
|
* unique distances in the node_distance() table.
|
||||||
*/
|
*/
|
||||||
distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL);
|
distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL);
|
||||||
@ -2750,7 +2750,7 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
|
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
|
||||||
/* Build perf. domains: */
|
/* Build perf domains: */
|
||||||
for (i = 0; i < ndoms_new; i++) {
|
for (i = 0; i < ndoms_new; i++) {
|
||||||
for (j = 0; j < n && !sched_energy_update; j++) {
|
for (j = 0; j < n && !sched_energy_update; j++) {
|
||||||
if (cpumask_equal(doms_new[i], doms_cur[j]) &&
|
if (cpumask_equal(doms_new[i], doms_cur[j]) &&
|
||||||
@ -2759,7 +2759,7 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
|
|||||||
goto match3;
|
goto match3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* No match - add perf. domains for a new rd */
|
/* No match - add perf domains for a new rd */
|
||||||
has_eas |= build_perf_domains(doms_new[i]);
|
has_eas |= build_perf_domains(doms_new[i]);
|
||||||
match3:
|
match3:
|
||||||
;
|
;
|
||||||
|
@ -33,7 +33,7 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
|
|||||||
EXPORT_SYMBOL(wake_bit_function);
|
EXPORT_SYMBOL(wake_bit_function);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To allow interruptible waiting and asynchronous (i.e. nonblocking)
|
* To allow interruptible waiting and asynchronous (i.e. non-blocking)
|
||||||
* waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
|
* waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
|
||||||
* permitted return codes. Nonzero return codes halt waiting and return.
|
* permitted return codes. Nonzero return codes halt waiting and return.
|
||||||
*/
|
*/
|
||||||
@ -133,7 +133,7 @@ EXPORT_SYMBOL(__wake_up_bit);
|
|||||||
* @bit: the bit of the word being waited on
|
* @bit: the bit of the word being waited on
|
||||||
*
|
*
|
||||||
* There is a standard hashed waitqueue table for generic use. This
|
* There is a standard hashed waitqueue table for generic use. This
|
||||||
* is the part of the hashtable's accessor API that wakes up waiters
|
* is the part of the hash-table's accessor API that wakes up waiters
|
||||||
* on a bit. For instance, if one were to have waiters on a bitflag,
|
* on a bit. For instance, if one were to have waiters on a bitflag,
|
||||||
* one would call wake_up_bit() after clearing the bit.
|
* one would call wake_up_bit() after clearing the bit.
|
||||||
*
|
*
|
||||||
|
Loading…
Reference in New Issue
Block a user