linux-next/drivers/cpufreq/cpufreq_stats.c
Konrad Rzeszutek Wilk 46a310b80b [CPUFREQ] Don't set stat->last_index to -1 if the pol->cur has incorrect value.
If the driver submitted an non-existing pol>cur value (say it
used the default initialized value of zero), when the cpufreq
stats tries to setup its initial values it incorrectly sets
stat->last_index to -1 (or 0xfffff...). And cpufreq_stats_update
tries to update at that index location and fails.

This can be caused by:

stat->last_index = freq_table_get_index(stat, policy->cur);

not finding the appropiate frequency in the table (b/c the policy->cur
is wrong) and we end up crashing. The fix however is
concentrated in the 'cpufreq_stats_update' as the last_index
(and old_index) are updated there. Which means it can reset
the last_index to -1 again and on the next iteration cause a crash.

Without this patch, the following crash is observed:

powernow-k8: Found 1 AMD Athlon(tm) 64 Processor 3700+ (1 cpu cores) (version 2.20.00)
powernow-k8: fid 0x2 (1000 MHz), vid 0x12
powernow-k8: fid 0xa (1800 MHz), vid 0xa
powernow-k8: fid 0xc (2000 MHz), vid 0x8
powernow-k8: fid 0xe (2200 MHz), vid 0x8
Marking TSC unstable due to cpufreq changes
powernow-k8: fid trans failed, fid 0x2, curr 0x0
BUG: unable to handle kernel paging request at ffff880807e07b78
IP: [<ffffffff81479163>] cpufreq_stats_update+0x46/0x5b
.. snip..
Pid: 1, comm: swapper Not tainted 3.0.0-rc2 #45 MICRO-STAR INTERNATIONAL CO., LTD MS-7094/MS-7094
..snip..
Call Trace:
 [<ffffffff81479248>] cpufreq_stat_notifier_trans+0x48/0x7c
 [<ffffffff81095d68>] notifier_call_chain+0x32/0x5e
 [<ffffffff81095e6b>] __srcu_notifier_call_chain+0x47/0x63
 [<ffffffff81095e96>] srcu_notifier_call_chain+0xf/0x11
 [<ffffffff81477e7a>] cpufreq_notify_transition+0x111/0x134
 [<ffffffff8147b0d4>] powernowk8_target+0x53b/0x617
 [<ffffffff8147723a>] __cpufreq_driver_target+0x2e/0x30
 [<ffffffff8147a127>] cpufreq_governor_dbs+0x339/0x356
 [<ffffffff81477394>] __cpufreq_governor+0xa8/0xe9
 [<ffffffff81477525>] __cpufreq_set_policy+0x132/0x13e
 [<ffffffff8147848d>] cpufreq_add_dev_interface+0x272/0x28c

Reported-by: Tobias Diedrich <ranma+xen@tdiedrich.de>
Tested-by: Tobias Diedrich <ranma+xen@tdiedrich.de>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Dave Jones <davej@redhat.com>
2011-06-16 16:31:12 -04:00

403 lines
10 KiB
C

/*
* drivers/cpufreq/cpufreq_stats.c
*
* Copyright (C) 2003-2004 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
* (C) 2004 Zou Nan hai <nanhai.zou@intel.com>.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/sysdev.h>
#include <linux/cpu.h>
#include <linux/sysfs.h>
#include <linux/cpufreq.h>
#include <linux/jiffies.h>
#include <linux/percpu.h>
#include <linux/kobject.h>
#include <linux/spinlock.h>
#include <linux/notifier.h>
#include <asm/cputime.h>
static spinlock_t cpufreq_stats_lock;
#define CPUFREQ_STATDEVICE_ATTR(_name, _mode, _show) \
static struct freq_attr _attr_##_name = {\
.attr = {.name = __stringify(_name), .mode = _mode, }, \
.show = _show,\
};
struct cpufreq_stats {
unsigned int cpu;
unsigned int total_trans;
unsigned long long last_time;
unsigned int max_state;
unsigned int state_num;
unsigned int last_index;
cputime64_t *time_in_state;
unsigned int *freq_table;
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
unsigned int *trans_table;
#endif
};
static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table);
struct cpufreq_stats_attribute {
struct attribute attr;
ssize_t(*show) (struct cpufreq_stats *, char *);
};
static int cpufreq_stats_update(unsigned int cpu)
{
struct cpufreq_stats *stat;
unsigned long long cur_time;
cur_time = get_jiffies_64();
spin_lock(&cpufreq_stats_lock);
stat = per_cpu(cpufreq_stats_table, cpu);
if (stat->time_in_state)
stat->time_in_state[stat->last_index] =
cputime64_add(stat->time_in_state[stat->last_index],
cputime_sub(cur_time, stat->last_time));
stat->last_time = cur_time;
spin_unlock(&cpufreq_stats_lock);
return 0;
}
static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf)
{
struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
if (!stat)
return 0;
return sprintf(buf, "%d\n",
per_cpu(cpufreq_stats_table, stat->cpu)->total_trans);
}
static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
{
ssize_t len = 0;
int i;
struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
if (!stat)
return 0;
cpufreq_stats_update(stat->cpu);
for (i = 0; i < stat->state_num; i++) {
len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i],
(unsigned long long)
cputime64_to_clock_t(stat->time_in_state[i]));
}
return len;
}
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
{
ssize_t len = 0;
int i, j;
struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
if (!stat)
return 0;
cpufreq_stats_update(stat->cpu);
len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n");
len += snprintf(buf + len, PAGE_SIZE - len, " : ");
for (i = 0; i < stat->state_num; i++) {
if (len >= PAGE_SIZE)
break;
len += snprintf(buf + len, PAGE_SIZE - len, "%9u ",
stat->freq_table[i]);
}
if (len >= PAGE_SIZE)
return PAGE_SIZE;
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
for (i = 0; i < stat->state_num; i++) {
if (len >= PAGE_SIZE)
break;
len += snprintf(buf + len, PAGE_SIZE - len, "%9u: ",
stat->freq_table[i]);
for (j = 0; j < stat->state_num; j++) {
if (len >= PAGE_SIZE)
break;
len += snprintf(buf + len, PAGE_SIZE - len, "%9u ",
stat->trans_table[i*stat->max_state+j]);
}
if (len >= PAGE_SIZE)
break;
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
}
if (len >= PAGE_SIZE)
return PAGE_SIZE;
return len;
}
CPUFREQ_STATDEVICE_ATTR(trans_table, 0444, show_trans_table);
#endif
CPUFREQ_STATDEVICE_ATTR(total_trans, 0444, show_total_trans);
CPUFREQ_STATDEVICE_ATTR(time_in_state, 0444, show_time_in_state);
static struct attribute *default_attrs[] = {
&_attr_total_trans.attr,
&_attr_time_in_state.attr,
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
&_attr_trans_table.attr,
#endif
NULL
};
static struct attribute_group stats_attr_group = {
.attrs = default_attrs,
.name = "stats"
};
static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq)
{
int index;
for (index = 0; index < stat->max_state; index++)
if (stat->freq_table[index] == freq)
return index;
return -1;
}
/* should be called late in the CPU removal sequence so that the stats
* memory is still available in case someone tries to use it.
*/
static void cpufreq_stats_free_table(unsigned int cpu)
{
struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, cpu);
if (stat) {
kfree(stat->time_in_state);
kfree(stat);
}
per_cpu(cpufreq_stats_table, cpu) = NULL;
}
/* must be called early in the CPU removal sequence (before
* cpufreq_remove_dev) so that policy is still valid.
*/
static void cpufreq_stats_free_sysfs(unsigned int cpu)
{
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
if (policy && policy->cpu == cpu)
sysfs_remove_group(&policy->kobj, &stats_attr_group);
if (policy)
cpufreq_cpu_put(policy);
}
static int cpufreq_stats_create_table(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table)
{
unsigned int i, j, count = 0, ret = 0;
struct cpufreq_stats *stat;
struct cpufreq_policy *data;
unsigned int alloc_size;
unsigned int cpu = policy->cpu;
if (per_cpu(cpufreq_stats_table, cpu))
return -EBUSY;
stat = kzalloc(sizeof(struct cpufreq_stats), GFP_KERNEL);
if ((stat) == NULL)
return -ENOMEM;
data = cpufreq_cpu_get(cpu);
if (data == NULL) {
ret = -EINVAL;
goto error_get_fail;
}
ret = sysfs_create_group(&data->kobj, &stats_attr_group);
if (ret)
goto error_out;
stat->cpu = cpu;
per_cpu(cpufreq_stats_table, cpu) = stat;
for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
unsigned int freq = table[i].frequency;
if (freq == CPUFREQ_ENTRY_INVALID)
continue;
count++;
}
alloc_size = count * sizeof(int) + count * sizeof(cputime64_t);
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
alloc_size += count * count * sizeof(int);
#endif
stat->max_state = count;
stat->time_in_state = kzalloc(alloc_size, GFP_KERNEL);
if (!stat->time_in_state) {
ret = -ENOMEM;
goto error_out;
}
stat->freq_table = (unsigned int *)(stat->time_in_state + count);
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
stat->trans_table = stat->freq_table + count;
#endif
j = 0;
for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
unsigned int freq = table[i].frequency;
if (freq == CPUFREQ_ENTRY_INVALID)
continue;
if (freq_table_get_index(stat, freq) == -1)
stat->freq_table[j++] = freq;
}
stat->state_num = j;
spin_lock(&cpufreq_stats_lock);
stat->last_time = get_jiffies_64();
stat->last_index = freq_table_get_index(stat, policy->cur);
spin_unlock(&cpufreq_stats_lock);
cpufreq_cpu_put(data);
return 0;
error_out:
cpufreq_cpu_put(data);
error_get_fail:
kfree(stat);
per_cpu(cpufreq_stats_table, cpu) = NULL;
return ret;
}
static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
unsigned long val, void *data)
{
int ret;
struct cpufreq_policy *policy = data;
struct cpufreq_frequency_table *table;
unsigned int cpu = policy->cpu;
if (val != CPUFREQ_NOTIFY)
return 0;
table = cpufreq_frequency_get_table(cpu);
if (!table)
return 0;
ret = cpufreq_stats_create_table(policy, table);
if (ret)
return ret;
return 0;
}
static int cpufreq_stat_notifier_trans(struct notifier_block *nb,
unsigned long val, void *data)
{
struct cpufreq_freqs *freq = data;
struct cpufreq_stats *stat;
int old_index, new_index;
if (val != CPUFREQ_POSTCHANGE)
return 0;
stat = per_cpu(cpufreq_stats_table, freq->cpu);
if (!stat)
return 0;
old_index = stat->last_index;
new_index = freq_table_get_index(stat, freq->new);
/* We can't do stat->time_in_state[-1]= .. */
if (old_index == -1 || new_index == -1)
return 0;
cpufreq_stats_update(freq->cpu);
if (old_index == new_index)
return 0;
spin_lock(&cpufreq_stats_lock);
stat->last_index = new_index;
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
stat->trans_table[old_index * stat->max_state + new_index]++;
#endif
stat->total_trans++;
spin_unlock(&cpufreq_stats_lock);
return 0;
}
static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
cpufreq_update_policy(cpu);
break;
case CPU_DOWN_PREPARE:
cpufreq_stats_free_sysfs(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
cpufreq_stats_free_table(cpu);
break;
}
return NOTIFY_OK;
}
/* priority=1 so this will get called before cpufreq_remove_dev */
static struct notifier_block cpufreq_stat_cpu_notifier __refdata = {
.notifier_call = cpufreq_stat_cpu_callback,
.priority = 1,
};
static struct notifier_block notifier_policy_block = {
.notifier_call = cpufreq_stat_notifier_policy
};
static struct notifier_block notifier_trans_block = {
.notifier_call = cpufreq_stat_notifier_trans
};
static int __init cpufreq_stats_init(void)
{
int ret;
unsigned int cpu;
spin_lock_init(&cpufreq_stats_lock);
ret = cpufreq_register_notifier(&notifier_policy_block,
CPUFREQ_POLICY_NOTIFIER);
if (ret)
return ret;
ret = cpufreq_register_notifier(&notifier_trans_block,
CPUFREQ_TRANSITION_NOTIFIER);
if (ret) {
cpufreq_unregister_notifier(&notifier_policy_block,
CPUFREQ_POLICY_NOTIFIER);
return ret;
}
register_hotcpu_notifier(&cpufreq_stat_cpu_notifier);
for_each_online_cpu(cpu) {
cpufreq_update_policy(cpu);
}
return 0;
}
static void __exit cpufreq_stats_exit(void)
{
unsigned int cpu;
cpufreq_unregister_notifier(&notifier_policy_block,
CPUFREQ_POLICY_NOTIFIER);
cpufreq_unregister_notifier(&notifier_trans_block,
CPUFREQ_TRANSITION_NOTIFIER);
unregister_hotcpu_notifier(&cpufreq_stat_cpu_notifier);
for_each_online_cpu(cpu) {
cpufreq_stats_free_table(cpu);
cpufreq_stats_free_sysfs(cpu);
}
}
MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>");
MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats "
"through sysfs filesystem");
MODULE_LICENSE("GPL");
module_init(cpufreq_stats_init);
module_exit(cpufreq_stats_exit);