mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-04 04:06:26 +00:00
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (31 commits) sched: fix warning in fs/proc/base.c schedstat: consolidate per-task cpu runtime stats sched: use RCU variant of list traversal in for_each_leaf_rt_rq() sched, cpuacct: export percpu cpuacct cgroup stats sched, cpuacct: refactoring cpuusage_read / cpuusage_write sched: optimize update_curr() sched: fix wakeup preemption clock sched: add missing arch_update_cpu_topology() call sched: let arch_update_cpu_topology indicate if topology changed sched: idle_balance() does not call load_balance_newidle() sched: fix sd_parent_degenerate on non-numa smp machine sched: add uid information to sched_debug for CONFIG_USER_SCHED sched: move double_unlock_balance() higher sched: update comment for move_task_off_dead_cpu sched: fix inconsistency when redistribute per-cpu tg->cfs_rq shares sched/rt: removed unneeded defintion sched: add hierarchical accounting to cpu accounting controller sched: include group statistics in /proc/sched_debug sched: rename SCHED_NO_NO_OMIT_FRAME_POINTER => SCHED_OMIT_FRAME_POINTER sched: clean up SCHED_CPUMASK_ALLOC ...
This commit is contained in:
commit
a39b863342
32
Documentation/controllers/cpuacct.txt
Normal file
32
Documentation/controllers/cpuacct.txt
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
CPU Accounting Controller
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
The CPU accounting controller is used to group tasks using cgroups and
|
||||||
|
account the CPU usage of these groups of tasks.
|
||||||
|
|
||||||
|
The CPU accounting controller supports multi-hierarchy groups. An accounting
|
||||||
|
group accumulates the CPU usage of all of its child groups and the tasks
|
||||||
|
directly present in its group.
|
||||||
|
|
||||||
|
Accounting groups can be created by first mounting the cgroup filesystem.
|
||||||
|
|
||||||
|
# mkdir /cgroups
|
||||||
|
# mount -t cgroup -ocpuacct none /cgroups
|
||||||
|
|
||||||
|
With the above step, the initial or the parent accounting group
|
||||||
|
becomes visible at /cgroups. At bootup, this group includes all the
|
||||||
|
tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
|
||||||
|
/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
|
||||||
|
this group which is essentially the CPU time obtained by all the tasks
|
||||||
|
in the system.
|
||||||
|
|
||||||
|
New accounting groups can be created under the parent group /cgroups.
|
||||||
|
|
||||||
|
# cd /cgroups
|
||||||
|
# mkdir g1
|
||||||
|
# echo $$ > g1
|
||||||
|
|
||||||
|
The above steps create a new group g1 and move the current shell
|
||||||
|
process (bash) into it. CPU time consumed by this bash and its children
|
||||||
|
can be obtained from g1/cpuacct.usage and the same is accumulated in
|
||||||
|
/cgroups/cpuacct.usage also.
|
@ -8,7 +8,7 @@ Context switch
|
|||||||
By default, the switch_to arch function is called with the runqueue
|
By default, the switch_to arch function is called with the runqueue
|
||||||
locked. This is usually not a problem unless switch_to may need to
|
locked. This is usually not a problem unless switch_to may need to
|
||||||
take the runqueue lock. This is usually due to a wake up operation in
|
take the runqueue lock. This is usually due to a wake up operation in
|
||||||
the context switch. See include/asm-ia64/system.h for an example.
|
the context switch. See arch/ia64/include/asm/system.h for an example.
|
||||||
|
|
||||||
To request the scheduler call switch_to with the runqueue unlocked,
|
To request the scheduler call switch_to with the runqueue unlocked,
|
||||||
you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
|
you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
|
||||||
@ -23,7 +23,7 @@ disabled. Interrupts may be enabled over the call if it is likely to
|
|||||||
introduce a significant interrupt latency by adding the line
|
introduce a significant interrupt latency by adding the line
|
||||||
`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
|
`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
|
||||||
unlocked context switches. This define also implies
|
unlocked context switches. This define also implies
|
||||||
`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an
|
`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an
|
||||||
example.
|
example.
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ config GENERIC_IOMAP
|
|||||||
bool
|
bool
|
||||||
default y
|
default y
|
||||||
|
|
||||||
config SCHED_NO_NO_OMIT_FRAME_POINTER
|
config SCHED_OMIT_FRAME_POINTER
|
||||||
bool
|
bool
|
||||||
default y
|
default y
|
||||||
|
|
||||||
|
@ -273,7 +273,7 @@ config GENERIC_CALIBRATE_DELAY
|
|||||||
bool
|
bool
|
||||||
default y
|
default y
|
||||||
|
|
||||||
config SCHED_NO_NO_OMIT_FRAME_POINTER
|
config SCHED_OMIT_FRAME_POINTER
|
||||||
bool
|
bool
|
||||||
default y
|
default y
|
||||||
|
|
||||||
|
@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE
|
|||||||
bool
|
bool
|
||||||
default y
|
default y
|
||||||
|
|
||||||
config SCHED_NO_NO_OMIT_FRAME_POINTER
|
config SCHED_OMIT_FRAME_POINTER
|
||||||
bool
|
bool
|
||||||
default y
|
default y
|
||||||
|
|
||||||
|
@ -141,7 +141,7 @@ config GENERIC_NVRAM
|
|||||||
bool
|
bool
|
||||||
default y if PPC32
|
default y if PPC32
|
||||||
|
|
||||||
config SCHED_NO_NO_OMIT_FRAME_POINTER
|
config SCHED_OMIT_FRAME_POINTER
|
||||||
bool
|
bool
|
||||||
default y
|
default y
|
||||||
|
|
||||||
|
@ -212,7 +212,7 @@ static void update_cpu_core_map(void)
|
|||||||
cpu_core_map[cpu] = cpu_coregroup_map(cpu);
|
cpu_core_map[cpu] = cpu_coregroup_map(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
void arch_update_cpu_topology(void)
|
int arch_update_cpu_topology(void)
|
||||||
{
|
{
|
||||||
struct tl_info *info = tl_info;
|
struct tl_info *info = tl_info;
|
||||||
struct sys_device *sysdev;
|
struct sys_device *sysdev;
|
||||||
@ -221,7 +221,7 @@ void arch_update_cpu_topology(void)
|
|||||||
if (!machine_has_topology) {
|
if (!machine_has_topology) {
|
||||||
update_cpu_core_map();
|
update_cpu_core_map();
|
||||||
topology_update_polarization_simple();
|
topology_update_polarization_simple();
|
||||||
return;
|
return 0;
|
||||||
}
|
}
|
||||||
stsi(info, 15, 1, 2);
|
stsi(info, 15, 1, 2);
|
||||||
tl_to_cores(info);
|
tl_to_cores(info);
|
||||||
@ -230,6 +230,7 @@ void arch_update_cpu_topology(void)
|
|||||||
sysdev = get_cpu_sysdev(cpu);
|
sysdev = get_cpu_sysdev(cpu);
|
||||||
kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
|
kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
|
||||||
}
|
}
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void topology_work_fn(struct work_struct *work)
|
static void topology_work_fn(struct work_struct *work)
|
||||||
|
@ -368,10 +368,10 @@ config X86_RDC321X
|
|||||||
as R-8610-(G).
|
as R-8610-(G).
|
||||||
If you don't have one of these chips, you should say N here.
|
If you don't have one of these chips, you should say N here.
|
||||||
|
|
||||||
config SCHED_NO_NO_OMIT_FRAME_POINTER
|
config SCHED_OMIT_FRAME_POINTER
|
||||||
def_bool y
|
def_bool y
|
||||||
prompt "Single-depth WCHAN output"
|
prompt "Single-depth WCHAN output"
|
||||||
depends on X86_32
|
depends on X86
|
||||||
help
|
help
|
||||||
Calculate simpler /proc/<PID>/wchan values. If this option
|
Calculate simpler /proc/<PID>/wchan values. If this option
|
||||||
is disabled then wchan values will recurse back to the
|
is disabled then wchan values will recurse back to the
|
||||||
|
@ -347,8 +347,8 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
|
|||||||
static int proc_pid_schedstat(struct task_struct *task, char *buffer)
|
static int proc_pid_schedstat(struct task_struct *task, char *buffer)
|
||||||
{
|
{
|
||||||
return sprintf(buffer, "%llu %llu %lu\n",
|
return sprintf(buffer, "%llu %llu %lu\n",
|
||||||
task->sched_info.cpu_time,
|
(unsigned long long)task->se.sum_exec_runtime,
|
||||||
task->sched_info.run_delay,
|
(unsigned long long)task->sched_info.run_delay,
|
||||||
task->sched_info.pcount);
|
task->sched_info.pcount);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(CONFIG_FRAME_POINTER) || \
|
#if defined(CONFIG_FRAME_POINTER) || \
|
||||||
!defined(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER)
|
!defined(CONFIG_SCHED_OMIT_FRAME_POINTER)
|
||||||
#define M32R_PUSH_FP " push fp\n"
|
#define M32R_PUSH_FP " push fp\n"
|
||||||
#define M32R_POP_FP " pop fp\n"
|
#define M32R_POP_FP " pop fp\n"
|
||||||
#else
|
#else
|
||||||
|
@ -260,8 +260,6 @@ static inline int select_nohz_load_balancer(int cpu)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern unsigned long rt_needs_cpu(int cpu);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only dump TASK_* tasks. (0 for all tasks)
|
* Only dump TASK_* tasks. (0 for all tasks)
|
||||||
*/
|
*/
|
||||||
@ -669,8 +667,7 @@ struct reclaim_state;
|
|||||||
struct sched_info {
|
struct sched_info {
|
||||||
/* cumulative counters */
|
/* cumulative counters */
|
||||||
unsigned long pcount; /* # of times run on this cpu */
|
unsigned long pcount; /* # of times run on this cpu */
|
||||||
unsigned long long cpu_time, /* time spent on the cpu */
|
unsigned long long run_delay; /* time spent waiting on a runqueue */
|
||||||
run_delay; /* time spent waiting on a runqueue */
|
|
||||||
|
|
||||||
/* timestamps */
|
/* timestamps */
|
||||||
unsigned long long last_arrival,/* when we last ran on a cpu */
|
unsigned long long last_arrival,/* when we last ran on a cpu */
|
||||||
@ -2210,6 +2207,7 @@ extern void normalize_rt_tasks(void);
|
|||||||
extern struct task_group init_task_group;
|
extern struct task_group init_task_group;
|
||||||
#ifdef CONFIG_USER_SCHED
|
#ifdef CONFIG_USER_SCHED
|
||||||
extern struct task_group root_task_group;
|
extern struct task_group root_task_group;
|
||||||
|
extern void set_tg_uid(struct user_struct *user);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern struct task_group *sched_create_group(struct task_group *parent);
|
extern struct task_group *sched_create_group(struct task_group *parent);
|
||||||
|
@ -49,7 +49,7 @@
|
|||||||
for_each_online_node(node) \
|
for_each_online_node(node) \
|
||||||
if (nr_cpus_node(node))
|
if (nr_cpus_node(node))
|
||||||
|
|
||||||
void arch_update_cpu_topology(void);
|
int arch_update_cpu_topology(void);
|
||||||
|
|
||||||
/* Conform to ACPI 2.0 SLIT distance definitions */
|
/* Conform to ACPI 2.0 SLIT distance definitions */
|
||||||
#define LOCAL_DISTANCE 10
|
#define LOCAL_DISTANCE 10
|
||||||
|
@ -19,7 +19,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
|
|||||||
CFLAGS_REMOVE_rtmutex-debug.o = -pg
|
CFLAGS_REMOVE_rtmutex-debug.o = -pg
|
||||||
CFLAGS_REMOVE_cgroup-debug.o = -pg
|
CFLAGS_REMOVE_cgroup-debug.o = -pg
|
||||||
CFLAGS_REMOVE_sched_clock.o = -pg
|
CFLAGS_REMOVE_sched_clock.o = -pg
|
||||||
CFLAGS_REMOVE_sched.o = -pg
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
obj-$(CONFIG_FREEZER) += freezer.o
|
obj-$(CONFIG_FREEZER) += freezer.o
|
||||||
@ -90,7 +89,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
|
|||||||
obj-$(CONFIG_TRACING) += trace/
|
obj-$(CONFIG_TRACING) += trace/
|
||||||
obj-$(CONFIG_SMP) += sched_cpupri.o
|
obj-$(CONFIG_SMP) += sched_cpupri.o
|
||||||
|
|
||||||
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
|
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
|
||||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||||
# needed for x86 only. Why this used to be enabled for all architectures is beyond
|
# needed for x86 only. Why this used to be enabled for all architectures is beyond
|
||||||
# me. I suspect most platforms don't need this, but until we know that for sure
|
# me. I suspect most platforms don't need this, but until we know that for sure
|
||||||
|
@ -127,7 +127,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
|
|||||||
*/
|
*/
|
||||||
t1 = tsk->sched_info.pcount;
|
t1 = tsk->sched_info.pcount;
|
||||||
t2 = tsk->sched_info.run_delay;
|
t2 = tsk->sched_info.run_delay;
|
||||||
t3 = tsk->sched_info.cpu_time;
|
t3 = tsk->se.sum_exec_runtime;
|
||||||
|
|
||||||
d->cpu_count += t1;
|
d->cpu_count += t1;
|
||||||
|
|
||||||
|
367
kernel/sched.c
367
kernel/sched.c
@ -267,6 +267,10 @@ struct task_group {
|
|||||||
struct cgroup_subsys_state css;
|
struct cgroup_subsys_state css;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_USER_SCHED
|
||||||
|
uid_t uid;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||||
/* schedulable entities of this group on each cpu */
|
/* schedulable entities of this group on each cpu */
|
||||||
struct sched_entity **se;
|
struct sched_entity **se;
|
||||||
@ -292,6 +296,12 @@ struct task_group {
|
|||||||
|
|
||||||
#ifdef CONFIG_USER_SCHED
|
#ifdef CONFIG_USER_SCHED
|
||||||
|
|
||||||
|
/* Helper function to pass uid information to create_sched_user() */
|
||||||
|
void set_tg_uid(struct user_struct *user)
|
||||||
|
{
|
||||||
|
user->tg->uid = user->uid;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Root task group.
|
* Root task group.
|
||||||
* Every UID task group (including init_task_group aka UID-0) will
|
* Every UID task group (including init_task_group aka UID-0) will
|
||||||
@ -594,6 +604,8 @@ struct rq {
|
|||||||
#ifdef CONFIG_SCHEDSTATS
|
#ifdef CONFIG_SCHEDSTATS
|
||||||
/* latency stats */
|
/* latency stats */
|
||||||
struct sched_info rq_sched_info;
|
struct sched_info rq_sched_info;
|
||||||
|
unsigned long long rq_cpu_time;
|
||||||
|
/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
|
||||||
|
|
||||||
/* sys_sched_yield() stats */
|
/* sys_sched_yield() stats */
|
||||||
unsigned int yld_exp_empty;
|
unsigned int yld_exp_empty;
|
||||||
@ -711,45 +723,18 @@ static __read_mostly char *sched_feat_names[] = {
|
|||||||
|
|
||||||
#undef SCHED_FEAT
|
#undef SCHED_FEAT
|
||||||
|
|
||||||
static int sched_feat_open(struct inode *inode, struct file *filp)
|
static int sched_feat_show(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
filp->private_data = inode->i_private;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t
|
|
||||||
sched_feat_read(struct file *filp, char __user *ubuf,
|
|
||||||
size_t cnt, loff_t *ppos)
|
|
||||||
{
|
|
||||||
char *buf;
|
|
||||||
int r = 0;
|
|
||||||
int len = 0;
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; sched_feat_names[i]; i++) {
|
for (i = 0; sched_feat_names[i]; i++) {
|
||||||
len += strlen(sched_feat_names[i]);
|
if (!(sysctl_sched_features & (1UL << i)))
|
||||||
len += 4;
|
seq_puts(m, "NO_");
|
||||||
|
seq_printf(m, "%s ", sched_feat_names[i]);
|
||||||
}
|
}
|
||||||
|
seq_puts(m, "\n");
|
||||||
|
|
||||||
buf = kmalloc(len + 2, GFP_KERNEL);
|
return 0;
|
||||||
if (!buf)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
for (i = 0; sched_feat_names[i]; i++) {
|
|
||||||
if (sysctl_sched_features & (1UL << i))
|
|
||||||
r += sprintf(buf + r, "%s ", sched_feat_names[i]);
|
|
||||||
else
|
|
||||||
r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
r += sprintf(buf + r, "\n");
|
|
||||||
WARN_ON(r >= len + 2);
|
|
||||||
|
|
||||||
r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
|
|
||||||
|
|
||||||
kfree(buf);
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t
|
static ssize_t
|
||||||
@ -794,10 +779,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
|
|||||||
return cnt;
|
return cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int sched_feat_open(struct inode *inode, struct file *filp)
|
||||||
|
{
|
||||||
|
return single_open(filp, sched_feat_show, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
static struct file_operations sched_feat_fops = {
|
static struct file_operations sched_feat_fops = {
|
||||||
.open = sched_feat_open,
|
.open = sched_feat_open,
|
||||||
.read = sched_feat_read,
|
.write = sched_feat_write,
|
||||||
.write = sched_feat_write,
|
.read = seq_read,
|
||||||
|
.llseek = seq_lseek,
|
||||||
|
.release = single_release,
|
||||||
};
|
};
|
||||||
|
|
||||||
static __init int sched_init_debug(void)
|
static __init int sched_init_debug(void)
|
||||||
@ -1482,27 +1474,13 @@ static void
|
|||||||
update_group_shares_cpu(struct task_group *tg, int cpu,
|
update_group_shares_cpu(struct task_group *tg, int cpu,
|
||||||
unsigned long sd_shares, unsigned long sd_rq_weight)
|
unsigned long sd_shares, unsigned long sd_rq_weight)
|
||||||
{
|
{
|
||||||
int boost = 0;
|
|
||||||
unsigned long shares;
|
unsigned long shares;
|
||||||
unsigned long rq_weight;
|
unsigned long rq_weight;
|
||||||
|
|
||||||
if (!tg->se[cpu])
|
if (!tg->se[cpu])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
rq_weight = tg->cfs_rq[cpu]->load.weight;
|
rq_weight = tg->cfs_rq[cpu]->rq_weight;
|
||||||
|
|
||||||
/*
|
|
||||||
* If there are currently no tasks on the cpu pretend there is one of
|
|
||||||
* average load so that when a new task gets to run here it will not
|
|
||||||
* get delayed by group starvation.
|
|
||||||
*/
|
|
||||||
if (!rq_weight) {
|
|
||||||
boost = 1;
|
|
||||||
rq_weight = NICE_0_LOAD;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unlikely(rq_weight > sd_rq_weight))
|
|
||||||
rq_weight = sd_rq_weight;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* \Sum shares * rq_weight
|
* \Sum shares * rq_weight
|
||||||
@ -1510,7 +1488,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
|
|||||||
* \Sum rq_weight
|
* \Sum rq_weight
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
|
shares = (sd_shares * rq_weight) / sd_rq_weight;
|
||||||
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
|
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
|
||||||
|
|
||||||
if (abs(shares - tg->se[cpu]->load.weight) >
|
if (abs(shares - tg->se[cpu]->load.weight) >
|
||||||
@ -1519,11 +1497,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
|
|||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&rq->lock, flags);
|
spin_lock_irqsave(&rq->lock, flags);
|
||||||
/*
|
tg->cfs_rq[cpu]->shares = shares;
|
||||||
* record the actual number of shares, not the boosted amount.
|
|
||||||
*/
|
|
||||||
tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
|
|
||||||
tg->cfs_rq[cpu]->rq_weight = rq_weight;
|
|
||||||
|
|
||||||
__set_se_shares(tg->se[cpu], shares);
|
__set_se_shares(tg->se[cpu], shares);
|
||||||
spin_unlock_irqrestore(&rq->lock, flags);
|
spin_unlock_irqrestore(&rq->lock, flags);
|
||||||
@ -1537,13 +1511,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
|
|||||||
*/
|
*/
|
||||||
static int tg_shares_up(struct task_group *tg, void *data)
|
static int tg_shares_up(struct task_group *tg, void *data)
|
||||||
{
|
{
|
||||||
unsigned long rq_weight = 0;
|
unsigned long weight, rq_weight = 0;
|
||||||
unsigned long shares = 0;
|
unsigned long shares = 0;
|
||||||
struct sched_domain *sd = data;
|
struct sched_domain *sd = data;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for_each_cpu_mask(i, sd->span) {
|
for_each_cpu_mask(i, sd->span) {
|
||||||
rq_weight += tg->cfs_rq[i]->load.weight;
|
/*
|
||||||
|
* If there are currently no tasks on the cpu pretend there
|
||||||
|
* is one of average load so that when a new task gets to
|
||||||
|
* run here it will not get delayed by group starvation.
|
||||||
|
*/
|
||||||
|
weight = tg->cfs_rq[i]->load.weight;
|
||||||
|
if (!weight)
|
||||||
|
weight = NICE_0_LOAD;
|
||||||
|
|
||||||
|
tg->cfs_rq[i]->rq_weight = weight;
|
||||||
|
rq_weight += weight;
|
||||||
shares += tg->cfs_rq[i]->shares;
|
shares += tg->cfs_rq[i]->shares;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1553,9 +1537,6 @@ static int tg_shares_up(struct task_group *tg, void *data)
|
|||||||
if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
|
if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
|
||||||
shares = tg->shares;
|
shares = tg->shares;
|
||||||
|
|
||||||
if (!rq_weight)
|
|
||||||
rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
|
|
||||||
|
|
||||||
for_each_cpu_mask(i, sd->span)
|
for_each_cpu_mask(i, sd->span)
|
||||||
update_group_shares_cpu(tg, i, shares, rq_weight);
|
update_group_shares_cpu(tg, i, shares, rq_weight);
|
||||||
|
|
||||||
@ -1620,6 +1601,39 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
|
||||||
|
*/
|
||||||
|
static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
|
||||||
|
__releases(this_rq->lock)
|
||||||
|
__acquires(busiest->lock)
|
||||||
|
__acquires(this_rq->lock)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (unlikely(!irqs_disabled())) {
|
||||||
|
/* printk() doesn't work good under rq->lock */
|
||||||
|
spin_unlock(&this_rq->lock);
|
||||||
|
BUG_ON(1);
|
||||||
|
}
|
||||||
|
if (unlikely(!spin_trylock(&busiest->lock))) {
|
||||||
|
if (busiest < this_rq) {
|
||||||
|
spin_unlock(&this_rq->lock);
|
||||||
|
spin_lock(&busiest->lock);
|
||||||
|
spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
|
||||||
|
ret = 1;
|
||||||
|
} else
|
||||||
|
spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
|
||||||
|
__releases(busiest->lock)
|
||||||
|
{
|
||||||
|
spin_unlock(&busiest->lock);
|
||||||
|
lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||||
@ -2264,6 +2278,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
|
|||||||
|
|
||||||
smp_wmb();
|
smp_wmb();
|
||||||
rq = task_rq_lock(p, &flags);
|
rq = task_rq_lock(p, &flags);
|
||||||
|
update_rq_clock(rq);
|
||||||
old_state = p->state;
|
old_state = p->state;
|
||||||
if (!(old_state & state))
|
if (!(old_state & state))
|
||||||
goto out;
|
goto out;
|
||||||
@ -2321,7 +2336,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
|
|||||||
schedstat_inc(p, se.nr_wakeups_local);
|
schedstat_inc(p, se.nr_wakeups_local);
|
||||||
else
|
else
|
||||||
schedstat_inc(p, se.nr_wakeups_remote);
|
schedstat_inc(p, se.nr_wakeups_remote);
|
||||||
update_rq_clock(rq);
|
|
||||||
activate_task(rq, p, 1);
|
activate_task(rq, p, 1);
|
||||||
success = 1;
|
success = 1;
|
||||||
|
|
||||||
@ -2821,40 +2835,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
|
|||||||
__release(rq2->lock);
|
__release(rq2->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
|
|
||||||
*/
|
|
||||||
static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
|
|
||||||
__releases(this_rq->lock)
|
|
||||||
__acquires(busiest->lock)
|
|
||||||
__acquires(this_rq->lock)
|
|
||||||
{
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
if (unlikely(!irqs_disabled())) {
|
|
||||||
/* printk() doesn't work good under rq->lock */
|
|
||||||
spin_unlock(&this_rq->lock);
|
|
||||||
BUG_ON(1);
|
|
||||||
}
|
|
||||||
if (unlikely(!spin_trylock(&busiest->lock))) {
|
|
||||||
if (busiest < this_rq) {
|
|
||||||
spin_unlock(&this_rq->lock);
|
|
||||||
spin_lock(&busiest->lock);
|
|
||||||
spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
|
|
||||||
ret = 1;
|
|
||||||
} else
|
|
||||||
spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
|
|
||||||
__releases(busiest->lock)
|
|
||||||
{
|
|
||||||
spin_unlock(&busiest->lock);
|
|
||||||
lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If dest_cpu is allowed for this process, migrate the task to it.
|
* If dest_cpu is allowed for this process, migrate the task to it.
|
||||||
* This is accomplished by forcing the cpu_allowed mask to only
|
* This is accomplished by forcing the cpu_allowed mask to only
|
||||||
@ -3716,7 +3696,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
|
|||||||
static void idle_balance(int this_cpu, struct rq *this_rq)
|
static void idle_balance(int this_cpu, struct rq *this_rq)
|
||||||
{
|
{
|
||||||
struct sched_domain *sd;
|
struct sched_domain *sd;
|
||||||
int pulled_task = -1;
|
int pulled_task = 0;
|
||||||
unsigned long next_balance = jiffies + HZ;
|
unsigned long next_balance = jiffies + HZ;
|
||||||
cpumask_t tmpmask;
|
cpumask_t tmpmask;
|
||||||
|
|
||||||
@ -6150,7 +6130,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Figure out where task on dead CPU should go, use force if necessary.
|
* Figure out where task on dead CPU should go, use force if necessary.
|
||||||
* NOTE: interrupts should be disabled by the caller
|
|
||||||
*/
|
*/
|
||||||
static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
|
static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
|
||||||
{
|
{
|
||||||
@ -6662,28 +6641,6 @@ early_initcall(migration_init);
|
|||||||
|
|
||||||
#ifdef CONFIG_SCHED_DEBUG
|
#ifdef CONFIG_SCHED_DEBUG
|
||||||
|
|
||||||
static inline const char *sd_level_to_string(enum sched_domain_level lvl)
|
|
||||||
{
|
|
||||||
switch (lvl) {
|
|
||||||
case SD_LV_NONE:
|
|
||||||
return "NONE";
|
|
||||||
case SD_LV_SIBLING:
|
|
||||||
return "SIBLING";
|
|
||||||
case SD_LV_MC:
|
|
||||||
return "MC";
|
|
||||||
case SD_LV_CPU:
|
|
||||||
return "CPU";
|
|
||||||
case SD_LV_NODE:
|
|
||||||
return "NODE";
|
|
||||||
case SD_LV_ALLNODES:
|
|
||||||
return "ALLNODES";
|
|
||||||
case SD_LV_MAX:
|
|
||||||
return "MAX";
|
|
||||||
|
|
||||||
}
|
|
||||||
return "MAX";
|
|
||||||
}
|
|
||||||
|
|
||||||
static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
||||||
cpumask_t *groupmask)
|
cpumask_t *groupmask)
|
||||||
{
|
{
|
||||||
@ -6703,8 +6660,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
printk(KERN_CONT "span %s level %s\n",
|
printk(KERN_CONT "span %s level %s\n", str, sd->name);
|
||||||
str, sd_level_to_string(sd->level));
|
|
||||||
|
|
||||||
if (!cpu_isset(cpu, sd->span)) {
|
if (!cpu_isset(cpu, sd->span)) {
|
||||||
printk(KERN_ERR "ERROR: domain->span does not contain "
|
printk(KERN_ERR "ERROR: domain->span does not contain "
|
||||||
@ -6840,6 +6796,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
|
|||||||
SD_BALANCE_EXEC |
|
SD_BALANCE_EXEC |
|
||||||
SD_SHARE_CPUPOWER |
|
SD_SHARE_CPUPOWER |
|
||||||
SD_SHARE_PKG_RESOURCES);
|
SD_SHARE_PKG_RESOURCES);
|
||||||
|
if (nr_node_ids == 1)
|
||||||
|
pflags &= ~SD_SERIALIZE;
|
||||||
}
|
}
|
||||||
if (~cflags & pflags)
|
if (~cflags & pflags)
|
||||||
return 0;
|
return 0;
|
||||||
@ -7360,13 +7318,21 @@ struct allmasks {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#if NR_CPUS > 128
|
#if NR_CPUS > 128
|
||||||
#define SCHED_CPUMASK_ALLOC 1
|
#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
|
||||||
#define SCHED_CPUMASK_FREE(v) kfree(v)
|
static inline void sched_cpumask_alloc(struct allmasks **masks)
|
||||||
#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
|
{
|
||||||
|
*masks = kmalloc(sizeof(**masks), GFP_KERNEL);
|
||||||
|
}
|
||||||
|
static inline void sched_cpumask_free(struct allmasks *masks)
|
||||||
|
{
|
||||||
|
kfree(masks);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
#define SCHED_CPUMASK_ALLOC 0
|
#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
|
||||||
#define SCHED_CPUMASK_FREE(v)
|
static inline void sched_cpumask_alloc(struct allmasks **masks)
|
||||||
#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
|
{ }
|
||||||
|
static inline void sched_cpumask_free(struct allmasks *masks)
|
||||||
|
{ }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
|
#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
|
||||||
@ -7442,9 +7408,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if SCHED_CPUMASK_ALLOC
|
|
||||||
/* get space for all scratch cpumask variables */
|
/* get space for all scratch cpumask variables */
|
||||||
allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
|
sched_cpumask_alloc(&allmasks);
|
||||||
if (!allmasks) {
|
if (!allmasks) {
|
||||||
printk(KERN_WARNING "Cannot alloc cpumask array\n");
|
printk(KERN_WARNING "Cannot alloc cpumask array\n");
|
||||||
kfree(rd);
|
kfree(rd);
|
||||||
@ -7453,7 +7418,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
|
|||||||
#endif
|
#endif
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
tmpmask = (cpumask_t *)allmasks;
|
tmpmask = (cpumask_t *)allmasks;
|
||||||
|
|
||||||
|
|
||||||
@ -7707,13 +7672,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
|
|||||||
cpu_attach_domain(sd, rd, i);
|
cpu_attach_domain(sd, rd, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
SCHED_CPUMASK_FREE((void *)allmasks);
|
sched_cpumask_free(allmasks);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
error:
|
error:
|
||||||
free_sched_groups(cpu_map, tmpmask);
|
free_sched_groups(cpu_map, tmpmask);
|
||||||
SCHED_CPUMASK_FREE((void *)allmasks);
|
sched_cpumask_free(allmasks);
|
||||||
kfree(rd);
|
kfree(rd);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
#endif
|
#endif
|
||||||
@ -7736,8 +7701,14 @@ static struct sched_domain_attr *dattr_cur;
|
|||||||
*/
|
*/
|
||||||
static cpumask_t fallback_doms;
|
static cpumask_t fallback_doms;
|
||||||
|
|
||||||
void __attribute__((weak)) arch_update_cpu_topology(void)
|
/*
|
||||||
|
* arch_update_cpu_topology lets virtualized architectures update the
|
||||||
|
* cpu core maps. It is supposed to return 1 if the topology changed
|
||||||
|
* or 0 if it stayed the same.
|
||||||
|
*/
|
||||||
|
int __attribute__((weak)) arch_update_cpu_topology(void)
|
||||||
{
|
{
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -7777,8 +7748,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
|
|||||||
cpumask_t tmpmask;
|
cpumask_t tmpmask;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
unregister_sched_domain_sysctl();
|
|
||||||
|
|
||||||
for_each_cpu_mask_nr(i, *cpu_map)
|
for_each_cpu_mask_nr(i, *cpu_map)
|
||||||
cpu_attach_domain(NULL, &def_root_domain, i);
|
cpu_attach_domain(NULL, &def_root_domain, i);
|
||||||
synchronize_sched();
|
synchronize_sched();
|
||||||
@ -7831,17 +7800,21 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
|
|||||||
struct sched_domain_attr *dattr_new)
|
struct sched_domain_attr *dattr_new)
|
||||||
{
|
{
|
||||||
int i, j, n;
|
int i, j, n;
|
||||||
|
int new_topology;
|
||||||
|
|
||||||
mutex_lock(&sched_domains_mutex);
|
mutex_lock(&sched_domains_mutex);
|
||||||
|
|
||||||
/* always unregister in case we don't destroy any domains */
|
/* always unregister in case we don't destroy any domains */
|
||||||
unregister_sched_domain_sysctl();
|
unregister_sched_domain_sysctl();
|
||||||
|
|
||||||
|
/* Let architecture update cpu core mappings. */
|
||||||
|
new_topology = arch_update_cpu_topology();
|
||||||
|
|
||||||
n = doms_new ? ndoms_new : 0;
|
n = doms_new ? ndoms_new : 0;
|
||||||
|
|
||||||
/* Destroy deleted domains */
|
/* Destroy deleted domains */
|
||||||
for (i = 0; i < ndoms_cur; i++) {
|
for (i = 0; i < ndoms_cur; i++) {
|
||||||
for (j = 0; j < n; j++) {
|
for (j = 0; j < n && !new_topology; j++) {
|
||||||
if (cpus_equal(doms_cur[i], doms_new[j])
|
if (cpus_equal(doms_cur[i], doms_new[j])
|
||||||
&& dattrs_equal(dattr_cur, i, dattr_new, j))
|
&& dattrs_equal(dattr_cur, i, dattr_new, j))
|
||||||
goto match1;
|
goto match1;
|
||||||
@ -7856,12 +7829,12 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
|
|||||||
ndoms_cur = 0;
|
ndoms_cur = 0;
|
||||||
doms_new = &fallback_doms;
|
doms_new = &fallback_doms;
|
||||||
cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
|
cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
|
||||||
dattr_new = NULL;
|
WARN_ON_ONCE(dattr_new);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Build new domains */
|
/* Build new domains */
|
||||||
for (i = 0; i < ndoms_new; i++) {
|
for (i = 0; i < ndoms_new; i++) {
|
||||||
for (j = 0; j < ndoms_cur; j++) {
|
for (j = 0; j < ndoms_cur && !new_topology; j++) {
|
||||||
if (cpus_equal(doms_new[i], doms_cur[j])
|
if (cpus_equal(doms_new[i], doms_cur[j])
|
||||||
&& dattrs_equal(dattr_new, i, dattr_cur, j))
|
&& dattrs_equal(dattr_new, i, dattr_cur, j))
|
||||||
goto match2;
|
goto match2;
|
||||||
@ -8516,7 +8489,7 @@ static
|
|||||||
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
||||||
{
|
{
|
||||||
struct cfs_rq *cfs_rq;
|
struct cfs_rq *cfs_rq;
|
||||||
struct sched_entity *se, *parent_se;
|
struct sched_entity *se;
|
||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -8532,18 +8505,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|||||||
for_each_possible_cpu(i) {
|
for_each_possible_cpu(i) {
|
||||||
rq = cpu_rq(i);
|
rq = cpu_rq(i);
|
||||||
|
|
||||||
cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
|
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
|
||||||
GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
|
GFP_KERNEL, cpu_to_node(i));
|
||||||
if (!cfs_rq)
|
if (!cfs_rq)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
se = kmalloc_node(sizeof(struct sched_entity),
|
se = kzalloc_node(sizeof(struct sched_entity),
|
||||||
GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
|
GFP_KERNEL, cpu_to_node(i));
|
||||||
if (!se)
|
if (!se)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
parent_se = parent ? parent->se[i] : NULL;
|
init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
|
||||||
init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -8604,7 +8576,7 @@ static
|
|||||||
int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
||||||
{
|
{
|
||||||
struct rt_rq *rt_rq;
|
struct rt_rq *rt_rq;
|
||||||
struct sched_rt_entity *rt_se, *parent_se;
|
struct sched_rt_entity *rt_se;
|
||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -8621,18 +8593,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
|||||||
for_each_possible_cpu(i) {
|
for_each_possible_cpu(i) {
|
||||||
rq = cpu_rq(i);
|
rq = cpu_rq(i);
|
||||||
|
|
||||||
rt_rq = kmalloc_node(sizeof(struct rt_rq),
|
rt_rq = kzalloc_node(sizeof(struct rt_rq),
|
||||||
GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
|
GFP_KERNEL, cpu_to_node(i));
|
||||||
if (!rt_rq)
|
if (!rt_rq)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
|
rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
|
||||||
GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
|
GFP_KERNEL, cpu_to_node(i));
|
||||||
if (!rt_se)
|
if (!rt_se)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
parent_se = parent ? parent->rt_se[i] : NULL;
|
init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
|
||||||
init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -9275,11 +9246,12 @@ struct cgroup_subsys cpu_cgroup_subsys = {
|
|||||||
* (balbir@in.ibm.com).
|
* (balbir@in.ibm.com).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* track cpu usage of a group of tasks */
|
/* track cpu usage of a group of tasks and its child groups */
|
||||||
struct cpuacct {
|
struct cpuacct {
|
||||||
struct cgroup_subsys_state css;
|
struct cgroup_subsys_state css;
|
||||||
/* cpuusage holds pointer to a u64-type object on every cpu */
|
/* cpuusage holds pointer to a u64-type object on every cpu */
|
||||||
u64 *cpuusage;
|
u64 *cpuusage;
|
||||||
|
struct cpuacct *parent;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct cgroup_subsys cpuacct_subsys;
|
struct cgroup_subsys cpuacct_subsys;
|
||||||
@ -9313,6 +9285,9 @@ static struct cgroup_subsys_state *cpuacct_create(
|
|||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cgrp->parent)
|
||||||
|
ca->parent = cgroup_ca(cgrp->parent);
|
||||||
|
|
||||||
return &ca->css;
|
return &ca->css;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -9326,6 +9301,41 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|||||||
kfree(ca);
|
kfree(ca);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
|
||||||
|
{
|
||||||
|
u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
|
||||||
|
u64 data;
|
||||||
|
|
||||||
|
#ifndef CONFIG_64BIT
|
||||||
|
/*
|
||||||
|
* Take rq->lock to make 64-bit read safe on 32-bit platforms.
|
||||||
|
*/
|
||||||
|
spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||||
|
data = *cpuusage;
|
||||||
|
spin_unlock_irq(&cpu_rq(cpu)->lock);
|
||||||
|
#else
|
||||||
|
data = *cpuusage;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
|
||||||
|
{
|
||||||
|
u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
|
||||||
|
|
||||||
|
#ifndef CONFIG_64BIT
|
||||||
|
/*
|
||||||
|
* Take rq->lock to make 64-bit write safe on 32-bit platforms.
|
||||||
|
*/
|
||||||
|
spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||||
|
*cpuusage = val;
|
||||||
|
spin_unlock_irq(&cpu_rq(cpu)->lock);
|
||||||
|
#else
|
||||||
|
*cpuusage = val;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* return total cpu usage (in nanoseconds) of a group */
|
/* return total cpu usage (in nanoseconds) of a group */
|
||||||
static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
|
static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
|
||||||
{
|
{
|
||||||
@ -9333,17 +9343,8 @@ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
|
|||||||
u64 totalcpuusage = 0;
|
u64 totalcpuusage = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for_each_possible_cpu(i) {
|
for_each_present_cpu(i)
|
||||||
u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
|
totalcpuusage += cpuacct_cpuusage_read(ca, i);
|
||||||
|
|
||||||
/*
|
|
||||||
* Take rq->lock to make 64-bit addition safe on 32-bit
|
|
||||||
* platforms.
|
|
||||||
*/
|
|
||||||
spin_lock_irq(&cpu_rq(i)->lock);
|
|
||||||
totalcpuusage += *cpuusage;
|
|
||||||
spin_unlock_irq(&cpu_rq(i)->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
return totalcpuusage;
|
return totalcpuusage;
|
||||||
}
|
}
|
||||||
@ -9360,23 +9361,39 @@ static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
for_each_possible_cpu(i) {
|
for_each_present_cpu(i)
|
||||||
u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
|
cpuacct_cpuusage_write(ca, i, 0);
|
||||||
|
|
||||||
spin_lock_irq(&cpu_rq(i)->lock);
|
|
||||||
*cpuusage = 0;
|
|
||||||
spin_unlock_irq(&cpu_rq(i)->lock);
|
|
||||||
}
|
|
||||||
out:
|
out:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
|
||||||
|
struct seq_file *m)
|
||||||
|
{
|
||||||
|
struct cpuacct *ca = cgroup_ca(cgroup);
|
||||||
|
u64 percpu;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for_each_present_cpu(i) {
|
||||||
|
percpu = cpuacct_cpuusage_read(ca, i);
|
||||||
|
seq_printf(m, "%llu ", (unsigned long long) percpu);
|
||||||
|
}
|
||||||
|
seq_printf(m, "\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static struct cftype files[] = {
|
static struct cftype files[] = {
|
||||||
{
|
{
|
||||||
.name = "usage",
|
.name = "usage",
|
||||||
.read_u64 = cpuusage_read,
|
.read_u64 = cpuusage_read,
|
||||||
.write_u64 = cpuusage_write,
|
.write_u64 = cpuusage_write,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.name = "usage_percpu",
|
||||||
|
.read_seq_string = cpuacct_percpu_seq_read,
|
||||||
|
},
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||||
@ -9392,14 +9409,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|||||||
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||||
{
|
{
|
||||||
struct cpuacct *ca;
|
struct cpuacct *ca;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
if (!cpuacct_subsys.active)
|
if (!cpuacct_subsys.active)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
cpu = task_cpu(tsk);
|
||||||
ca = task_ca(tsk);
|
ca = task_ca(tsk);
|
||||||
if (ca) {
|
|
||||||
u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
|
|
||||||
|
|
||||||
|
for (; ca; ca = ca->parent) {
|
||||||
|
u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
|
||||||
*cpuusage += cputime;
|
*cpuusage += cputime;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,6 +53,40 @@ static unsigned long nsec_low(unsigned long long nsec)
|
|||||||
|
|
||||||
#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
|
#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
|
||||||
|
|
||||||
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||||
|
static void print_cfs_group_stats(struct seq_file *m, int cpu,
|
||||||
|
struct task_group *tg)
|
||||||
|
{
|
||||||
|
struct sched_entity *se = tg->se[cpu];
|
||||||
|
if (!se)
|
||||||
|
return;
|
||||||
|
|
||||||
|
#define P(F) \
|
||||||
|
SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
|
||||||
|
#define PN(F) \
|
||||||
|
SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
|
||||||
|
|
||||||
|
PN(se->exec_start);
|
||||||
|
PN(se->vruntime);
|
||||||
|
PN(se->sum_exec_runtime);
|
||||||
|
#ifdef CONFIG_SCHEDSTATS
|
||||||
|
PN(se->wait_start);
|
||||||
|
PN(se->sleep_start);
|
||||||
|
PN(se->block_start);
|
||||||
|
PN(se->sleep_max);
|
||||||
|
PN(se->block_max);
|
||||||
|
PN(se->exec_max);
|
||||||
|
PN(se->slice_max);
|
||||||
|
PN(se->wait_max);
|
||||||
|
PN(se->wait_sum);
|
||||||
|
P(se->wait_count);
|
||||||
|
#endif
|
||||||
|
P(se->load.weight);
|
||||||
|
#undef PN
|
||||||
|
#undef P
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static void
|
static void
|
||||||
print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
||||||
{
|
{
|
||||||
@ -121,20 +155,19 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
|||||||
|
|
||||||
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
|
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
|
||||||
char path[128] = "";
|
char path[128] = "";
|
||||||
struct cgroup *cgroup = NULL;
|
|
||||||
struct task_group *tg = cfs_rq->tg;
|
struct task_group *tg = cfs_rq->tg;
|
||||||
|
|
||||||
if (tg)
|
cgroup_path(tg->css.cgroup, path, sizeof(path));
|
||||||
cgroup = tg->css.cgroup;
|
|
||||||
|
|
||||||
if (cgroup)
|
|
||||||
cgroup_path(cgroup, path, sizeof(path));
|
|
||||||
|
|
||||||
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
|
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
|
||||||
|
#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
|
||||||
|
{
|
||||||
|
uid_t uid = cfs_rq->tg->uid;
|
||||||
|
SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
|
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
|
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
|
||||||
SPLIT_NS(cfs_rq->exec_clock));
|
SPLIT_NS(cfs_rq->exec_clock));
|
||||||
|
|
||||||
@ -168,6 +201,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
|||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
|
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
|
||||||
#endif
|
#endif
|
||||||
|
print_cfs_group_stats(m, cpu, cfs_rq->tg);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -175,14 +209,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
|
|||||||
{
|
{
|
||||||
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
|
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
|
||||||
char path[128] = "";
|
char path[128] = "";
|
||||||
struct cgroup *cgroup = NULL;
|
|
||||||
struct task_group *tg = rt_rq->tg;
|
struct task_group *tg = rt_rq->tg;
|
||||||
|
|
||||||
if (tg)
|
cgroup_path(tg->css.cgroup, path, sizeof(path));
|
||||||
cgroup = tg->css.cgroup;
|
|
||||||
|
|
||||||
if (cgroup)
|
|
||||||
cgroup_path(cgroup, path, sizeof(path));
|
|
||||||
|
|
||||||
SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
|
SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
|
||||||
#else
|
#else
|
||||||
@ -272,7 +301,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
|
|||||||
u64 now = ktime_to_ns(ktime_get());
|
u64 now = ktime_to_ns(ktime_get());
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n",
|
SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n",
|
||||||
init_utsname()->release,
|
init_utsname()->release,
|
||||||
(int)strcspn(init_utsname()->version, " "),
|
(int)strcspn(init_utsname()->version, " "),
|
||||||
init_utsname()->version);
|
init_utsname()->version);
|
||||||
|
@ -492,6 +492,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
|
|||||||
* overflow on 32 bits):
|
* overflow on 32 bits):
|
||||||
*/
|
*/
|
||||||
delta_exec = (unsigned long)(now - curr->exec_start);
|
delta_exec = (unsigned long)(now - curr->exec_start);
|
||||||
|
if (!delta_exec)
|
||||||
|
return;
|
||||||
|
|
||||||
__update_curr(cfs_rq, curr, delta_exec);
|
__update_curr(cfs_rq, curr, delta_exec);
|
||||||
curr->exec_start = now;
|
curr->exec_start = now;
|
||||||
@ -1345,12 +1347,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
|
|||||||
{
|
{
|
||||||
struct task_struct *curr = rq->curr;
|
struct task_struct *curr = rq->curr;
|
||||||
struct sched_entity *se = &curr->se, *pse = &p->se;
|
struct sched_entity *se = &curr->se, *pse = &p->se;
|
||||||
|
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||||||
|
|
||||||
|
update_curr(cfs_rq);
|
||||||
|
|
||||||
if (unlikely(rt_prio(p->prio))) {
|
if (unlikely(rt_prio(p->prio))) {
|
||||||
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
|
||||||
|
|
||||||
update_rq_clock(rq);
|
|
||||||
update_curr(cfs_rq);
|
|
||||||
resched_task(curr);
|
resched_task(curr);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -77,7 +77,7 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define for_each_leaf_rt_rq(rt_rq, rq) \
|
#define for_each_leaf_rt_rq(rt_rq, rq) \
|
||||||
list_for_each_entry(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
|
list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
|
||||||
|
|
||||||
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
|
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
|
||||||
{
|
{
|
||||||
@ -537,13 +537,13 @@ static void update_curr_rt(struct rq *rq)
|
|||||||
for_each_sched_rt_entity(rt_se) {
|
for_each_sched_rt_entity(rt_se) {
|
||||||
rt_rq = rt_rq_of_se(rt_se);
|
rt_rq = rt_rq_of_se(rt_se);
|
||||||
|
|
||||||
spin_lock(&rt_rq->rt_runtime_lock);
|
|
||||||
if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
|
if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
|
||||||
|
spin_lock(&rt_rq->rt_runtime_lock);
|
||||||
rt_rq->rt_time += delta_exec;
|
rt_rq->rt_time += delta_exec;
|
||||||
if (sched_rt_runtime_exceeded(rt_rq))
|
if (sched_rt_runtime_exceeded(rt_rq))
|
||||||
resched_task(curr);
|
resched_task(curr);
|
||||||
|
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||||
}
|
}
|
||||||
spin_unlock(&rt_rq->rt_runtime_lock);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -909,9 +909,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
|
|||||||
/* Only try algorithms three times */
|
/* Only try algorithms three times */
|
||||||
#define RT_MAX_TRIES 3
|
#define RT_MAX_TRIES 3
|
||||||
|
|
||||||
static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
|
|
||||||
static void double_unlock_balance(struct rq *this_rq, struct rq *busiest);
|
|
||||||
|
|
||||||
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
|
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
|
||||||
|
|
||||||
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
|
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||||
|
@ -31,7 +31,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
|
|||||||
rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
|
rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
|
||||||
rq->sched_switch, rq->sched_count, rq->sched_goidle,
|
rq->sched_switch, rq->sched_count, rq->sched_goidle,
|
||||||
rq->ttwu_count, rq->ttwu_local,
|
rq->ttwu_count, rq->ttwu_local,
|
||||||
rq->rq_sched_info.cpu_time,
|
rq->rq_cpu_time,
|
||||||
rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
|
rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
|
||||||
|
|
||||||
seq_printf(seq, "\n");
|
seq_printf(seq, "\n");
|
||||||
@ -123,7 +123,7 @@ static inline void
|
|||||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||||
{
|
{
|
||||||
if (rq)
|
if (rq)
|
||||||
rq->rq_sched_info.cpu_time += delta;
|
rq->rq_cpu_time += delta;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
@ -236,7 +236,6 @@ static inline void sched_info_depart(struct task_struct *t)
|
|||||||
unsigned long long delta = task_rq(t)->clock -
|
unsigned long long delta = task_rq(t)->clock -
|
||||||
t->sched_info.last_arrival;
|
t->sched_info.last_arrival;
|
||||||
|
|
||||||
t->sched_info.cpu_time += delta;
|
|
||||||
rq_sched_info_depart(task_rq(t), delta);
|
rq_sched_info_depart(task_rq(t), delta);
|
||||||
|
|
||||||
if (t->state == TASK_RUNNING)
|
if (t->state == TASK_RUNNING)
|
||||||
|
@ -104,6 +104,8 @@ static int sched_create_user(struct user_struct *up)
|
|||||||
if (IS_ERR(up->tg))
|
if (IS_ERR(up->tg))
|
||||||
rc = -ENOMEM;
|
rc = -ENOMEM;
|
||||||
|
|
||||||
|
set_tg_uid(up);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user