2006-09-26 10:52:42 +02:00
|
|
|
/*
|
2006-09-26 10:52:42 +02:00
|
|
|
* Thermal throttle event support code (such as syslog messaging and rate
|
|
|
|
* limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
|
2009-04-08 12:31:19 +02:00
|
|
|
*
|
2006-09-26 10:52:42 +02:00
|
|
|
* This allows consistent reporting of CPU thermal throttle events.
|
|
|
|
*
|
|
|
|
* Maintains a counter in /sys that keeps track of the number of thermal
|
|
|
|
* events, such that the user knows how bad the thermal problem might be
|
|
|
|
* (since the logging to syslog and mcelog is rate limited).
|
2006-09-26 10:52:42 +02:00
|
|
|
*
|
|
|
|
* Author: Dmitriy Zavin (dmitriyz@google.com)
|
|
|
|
*
|
|
|
|
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
|
2006-09-26 10:52:42 +02:00
|
|
|
* Inspired by Ross Biro's and Al Borchers' counter code.
|
2006-09-26 10:52:42 +02:00
|
|
|
*/
|
2009-06-15 17:25:27 +09:00
|
|
|
#include <linux/interrupt.h>
|
2009-04-08 12:31:19 +02:00
|
|
|
#include <linux/notifier.h>
|
|
|
|
#include <linux/jiffies.h>
|
2009-06-15 17:26:10 +09:00
|
|
|
#include <linux/kernel.h>
|
2006-09-26 10:52:42 +02:00
|
|
|
#include <linux/percpu.h>
|
2011-05-26 12:22:53 -04:00
|
|
|
#include <linux/export.h>
|
2009-06-15 17:26:10 +09:00
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/smp.h>
|
2006-09-26 10:52:42 +02:00
|
|
|
#include <linux/cpu.h>
|
2009-04-08 12:31:19 +02:00
|
|
|
|
2009-06-15 17:26:10 +09:00
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/apic.h>
|
2009-06-15 17:25:27 +09:00
|
|
|
#include <asm/idle.h>
|
|
|
|
#include <asm/mce.h>
|
2009-06-15 17:26:10 +09:00
|
|
|
#include <asm/msr.h>
|
2006-09-26 10:52:42 +02:00
|
|
|
|
|
|
|
/* How long to wait between reporting thermal events */
|
2009-04-08 12:31:19 +02:00
|
|
|
#define CHECK_INTERVAL (300 * HZ)
|
2006-09-26 10:52:42 +02:00
|
|
|
|
2010-07-29 17:13:45 -07:00
|
|
|
#define THERMAL_THROTTLING_EVENT 0
|
|
|
|
#define POWER_LIMIT_EVENT 1
|
|
|
|
|
2009-09-22 15:50:24 +02:00
|
|
|
/*
|
2010-07-29 17:13:45 -07:00
|
|
|
* Current thermal event state:
|
2009-09-22 15:50:24 +02:00
|
|
|
*/
|
2010-07-29 17:13:44 -07:00
|
|
|
struct _thermal_state {
|
2010-07-29 17:13:45 -07:00
|
|
|
bool new_event;
|
|
|
|
int event;
|
2009-09-22 15:50:24 +02:00
|
|
|
u64 next_check;
|
2010-07-29 17:13:45 -07:00
|
|
|
unsigned long count;
|
|
|
|
unsigned long last_count;
|
2009-09-22 15:50:24 +02:00
|
|
|
};
|
2009-04-08 12:31:19 +02:00
|
|
|
|
2010-07-29 17:13:44 -07:00
|
|
|
struct thermal_state {
|
2010-07-29 17:13:45 -07:00
|
|
|
struct _thermal_state core_throttle;
|
|
|
|
struct _thermal_state core_power_limit;
|
|
|
|
struct _thermal_state package_throttle;
|
|
|
|
struct _thermal_state package_power_limit;
|
2011-01-03 17:22:04 +05:30
|
|
|
struct _thermal_state core_thresh0;
|
|
|
|
struct _thermal_state core_thresh1;
|
2010-07-29 17:13:44 -07:00
|
|
|
};
|
|
|
|
|
2011-01-03 17:22:04 +05:30
|
|
|
/* Callback to handle core threshold interrupts */
|
|
|
|
int (*platform_thermal_notify)(__u64 msr_val);
|
2011-01-20 20:12:40 -08:00
|
|
|
EXPORT_SYMBOL(platform_thermal_notify);
|
2011-01-03 17:22:04 +05:30
|
|
|
|
2009-09-22 15:50:24 +02:00
|
|
|
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
|
|
|
|
|
|
|
|
static atomic_t therm_throt_en = ATOMIC_INIT(0);
|
2006-09-26 10:52:42 +02:00
|
|
|
|
2009-11-10 09:38:24 +08:00
|
|
|
static u32 lvtthmr_init __read_mostly;
|
|
|
|
|
2006-09-26 10:52:42 +02:00
|
|
|
#ifdef CONFIG_SYSFS
|
2011-12-21 14:29:42 -08:00
|
|
|
#define define_therm_throt_device_one_ro(_name) \
|
|
|
|
static DEVICE_ATTR(_name, 0444, \
|
|
|
|
therm_throt_device_show_##_name, \
|
2010-07-29 17:13:44 -07:00
|
|
|
NULL) \
|
2009-04-08 12:31:19 +02:00
|
|
|
|
2011-12-21 14:29:42 -08:00
|
|
|
#define define_therm_throt_device_show_func(event, name) \
|
2009-09-22 15:50:24 +02:00
|
|
|
\
|
2011-12-21 14:29:42 -08:00
|
|
|
static ssize_t therm_throt_device_show_##event##_##name( \
|
|
|
|
struct device *dev, \
|
|
|
|
struct device_attribute *attr, \
|
2009-09-22 15:50:24 +02:00
|
|
|
char *buf) \
|
2009-04-08 12:31:19 +02:00
|
|
|
{ \
|
|
|
|
unsigned int cpu = dev->id; \
|
|
|
|
ssize_t ret; \
|
|
|
|
\
|
|
|
|
preempt_disable(); /* CPU hotplug */ \
|
2010-07-29 17:13:44 -07:00
|
|
|
if (cpu_online(cpu)) { \
|
2009-04-08 12:31:19 +02:00
|
|
|
ret = sprintf(buf, "%lu\n", \
|
2010-07-29 17:13:45 -07:00
|
|
|
per_cpu(thermal_state, cpu).event.name); \
|
2010-07-29 17:13:44 -07:00
|
|
|
} else \
|
2009-04-08 12:31:19 +02:00
|
|
|
ret = 0; \
|
|
|
|
preempt_enable(); \
|
|
|
|
\
|
|
|
|
return ret; \
|
2006-09-26 10:52:42 +02:00
|
|
|
}
|
|
|
|
|
2011-12-21 14:29:42 -08:00
|
|
|
define_therm_throt_device_show_func(core_throttle, count);
|
|
|
|
define_therm_throt_device_one_ro(core_throttle_count);
|
2010-07-29 17:13:44 -07:00
|
|
|
|
2011-12-21 14:29:42 -08:00
|
|
|
define_therm_throt_device_show_func(core_power_limit, count);
|
|
|
|
define_therm_throt_device_one_ro(core_power_limit_count);
|
2010-07-29 17:13:45 -07:00
|
|
|
|
2011-12-21 14:29:42 -08:00
|
|
|
define_therm_throt_device_show_func(package_throttle, count);
|
|
|
|
define_therm_throt_device_one_ro(package_throttle_count);
|
2006-09-26 10:52:42 +02:00
|
|
|
|
2011-12-21 14:29:42 -08:00
|
|
|
define_therm_throt_device_show_func(package_power_limit, count);
|
|
|
|
define_therm_throt_device_one_ro(package_power_limit_count);
|
2010-07-29 17:13:45 -07:00
|
|
|
|
2006-09-26 10:52:42 +02:00
|
|
|
static struct attribute *thermal_throttle_attrs[] = {
|
2011-12-21 14:29:42 -08:00
|
|
|
&dev_attr_core_throttle_count.attr,
|
2006-09-26 10:52:42 +02:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2010-07-29 17:13:45 -07:00
|
|
|
static struct attribute_group thermal_attr_group = {
|
2009-04-08 12:31:19 +02:00
|
|
|
.attrs = thermal_throttle_attrs,
|
|
|
|
.name = "thermal_throttle"
|
2006-09-26 10:52:42 +02:00
|
|
|
};
|
|
|
|
#endif /* CONFIG_SYSFS */
|
2006-09-26 10:52:42 +02:00
|
|
|
|
2010-07-29 17:13:45 -07:00
|
|
|
#define CORE_LEVEL 0
|
|
|
|
#define PACKAGE_LEVEL 1
|
|
|
|
|
2006-09-26 10:52:42 +02:00
|
|
|
/***
|
2006-09-26 10:52:42 +02:00
|
|
|
* therm_throt_process - Process thermal throttling event from interrupt
|
2006-09-26 10:52:42 +02:00
|
|
|
* @curr: Whether the condition is current or not (boolean), since the
|
|
|
|
* thermal interrupt normally gets called both when the thermal
|
|
|
|
* event begins and once the event has ended.
|
|
|
|
*
|
2006-09-26 10:52:42 +02:00
|
|
|
* This function is called by the thermal interrupt after the
|
2006-09-26 10:52:42 +02:00
|
|
|
* IRQ has been acknowledged.
|
|
|
|
*
|
|
|
|
* It will take care of rate limiting and printing messages to the syslog.
|
|
|
|
*
|
|
|
|
* Returns: 0 : Event should NOT be further logged, i.e. still in
|
|
|
|
* "timeout" from previous log message.
|
|
|
|
* 1 : Event should be logged further, and a message has been
|
|
|
|
* printed to the syslog.
|
|
|
|
*/
|
2010-07-29 17:13:45 -07:00
|
|
|
static int therm_throt_process(bool new_event, int event, int level)
|
2006-09-26 10:52:42 +02:00
|
|
|
{
|
2010-07-29 17:13:44 -07:00
|
|
|
struct _thermal_state *state;
|
2010-07-29 17:13:45 -07:00
|
|
|
unsigned int this_cpu = smp_processor_id();
|
|
|
|
bool old_event;
|
2009-09-22 15:50:24 +02:00
|
|
|
u64 now;
|
2010-07-29 17:13:45 -07:00
|
|
|
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
|
2009-09-22 15:50:24 +02:00
|
|
|
|
|
|
|
now = get_jiffies_64();
|
2010-07-29 17:13:45 -07:00
|
|
|
if (level == CORE_LEVEL) {
|
|
|
|
if (event == THERMAL_THROTTLING_EVENT)
|
|
|
|
state = &pstate->core_throttle;
|
|
|
|
else if (event == POWER_LIMIT_EVENT)
|
|
|
|
state = &pstate->core_power_limit;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
} else if (level == PACKAGE_LEVEL) {
|
|
|
|
if (event == THERMAL_THROTTLING_EVENT)
|
|
|
|
state = &pstate->package_throttle;
|
|
|
|
else if (event == POWER_LIMIT_EVENT)
|
|
|
|
state = &pstate->package_power_limit;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
} else
|
|
|
|
return 0;
|
2009-09-22 15:50:24 +02:00
|
|
|
|
2010-07-29 17:13:45 -07:00
|
|
|
old_event = state->new_event;
|
|
|
|
state->new_event = new_event;
|
2006-09-26 10:52:42 +02:00
|
|
|
|
2010-07-29 17:13:45 -07:00
|
|
|
if (new_event)
|
|
|
|
state->count++;
|
2006-09-26 10:52:42 +02:00
|
|
|
|
2009-09-22 15:50:24 +02:00
|
|
|
if (time_before64(now, state->next_check) &&
|
2010-07-29 17:13:45 -07:00
|
|
|
state->count != state->last_count)
|
2006-09-26 10:52:42 +02:00
|
|
|
return 0;
|
|
|
|
|
2009-09-22 15:50:24 +02:00
|
|
|
state->next_check = now + CHECK_INTERVAL;
|
2010-07-29 17:13:45 -07:00
|
|
|
state->last_count = state->count;
|
2006-09-26 10:52:42 +02:00
|
|
|
|
|
|
|
/* if we just entered the thermal event */
|
2010-07-29 17:13:45 -07:00
|
|
|
if (new_event) {
|
|
|
|
if (event == THERMAL_THROTTLING_EVENT)
|
|
|
|
printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
|
|
|
|
this_cpu,
|
|
|
|
level == CORE_LEVEL ? "Core" : "Package",
|
|
|
|
state->count);
|
|
|
|
else
|
|
|
|
printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n",
|
|
|
|
this_cpu,
|
|
|
|
level == CORE_LEVEL ? "Core" : "Package",
|
|
|
|
state->count);
|
2009-08-16 15:54:37 +01:00
|
|
|
return 1;
|
|
|
|
}
|
2010-07-29 17:13:45 -07:00
|
|
|
if (old_event) {
|
|
|
|
if (event == THERMAL_THROTTLING_EVENT)
|
|
|
|
printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
|
|
|
|
this_cpu,
|
|
|
|
level == CORE_LEVEL ? "Core" : "Package");
|
|
|
|
else
|
|
|
|
printk(KERN_INFO "CPU%d: %s power limit normal\n",
|
|
|
|
this_cpu,
|
|
|
|
level == CORE_LEVEL ? "Core" : "Package");
|
2009-08-16 15:54:37 +01:00
|
|
|
return 1;
|
2006-09-26 10:52:42 +02:00
|
|
|
}
|
|
|
|
|
2009-08-16 15:54:37 +01:00
|
|
|
return 0;
|
2006-09-26 10:52:42 +02:00
|
|
|
}
|
2006-09-26 10:52:42 +02:00
|
|
|
|
2011-01-03 17:22:04 +05:30
|
|
|
static int thresh_event_valid(int event)
|
|
|
|
{
|
|
|
|
struct _thermal_state *state;
|
|
|
|
unsigned int this_cpu = smp_processor_id();
|
|
|
|
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
|
|
|
|
u64 now = get_jiffies_64();
|
|
|
|
|
|
|
|
state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
|
|
|
|
|
|
|
|
if (time_before64(now, state->next_check))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
state->next_check = now + CHECK_INTERVAL;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2006-09-26 10:52:42 +02:00
|
|
|
#ifdef CONFIG_SYSFS
|
2009-04-08 12:31:19 +02:00
|
|
|
/* Add/Remove thermal_throttle interface for CPU device: */
|
2011-12-21 14:29:42 -08:00
|
|
|
static __cpuinit int thermal_throttle_add_dev(struct device *dev,
|
2010-08-20 10:36:34 +03:00
|
|
|
unsigned int cpu)
|
2006-09-26 10:52:42 +02:00
|
|
|
{
|
2010-07-29 17:13:44 -07:00
|
|
|
int err;
|
2010-08-20 10:36:34 +03:00
|
|
|
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
2010-07-29 17:13:44 -07:00
|
|
|
|
2011-12-21 14:29:42 -08:00
|
|
|
err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
|
2010-07-29 17:13:44 -07:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2010-07-29 17:13:45 -07:00
|
|
|
if (cpu_has(c, X86_FEATURE_PLN))
|
2011-12-21 14:29:42 -08:00
|
|
|
err = sysfs_add_file_to_group(&dev->kobj,
|
|
|
|
&dev_attr_core_power_limit_count.attr,
|
2010-07-29 17:13:45 -07:00
|
|
|
thermal_attr_group.name);
|
2010-08-26 17:29:05 +09:00
|
|
|
if (cpu_has(c, X86_FEATURE_PTS)) {
|
2011-12-21 14:29:42 -08:00
|
|
|
err = sysfs_add_file_to_group(&dev->kobj,
|
|
|
|
&dev_attr_package_throttle_count.attr,
|
2010-07-29 17:13:45 -07:00
|
|
|
thermal_attr_group.name);
|
|
|
|
if (cpu_has(c, X86_FEATURE_PLN))
|
2011-12-21 14:29:42 -08:00
|
|
|
err = sysfs_add_file_to_group(&dev->kobj,
|
|
|
|
&dev_attr_package_power_limit_count.attr,
|
2010-07-29 17:13:45 -07:00
|
|
|
thermal_attr_group.name);
|
2010-08-26 17:29:05 +09:00
|
|
|
}
|
2010-07-29 17:13:44 -07:00
|
|
|
|
|
|
|
return err;
|
2006-09-26 10:52:42 +02:00
|
|
|
}
|
|
|
|
|
2011-12-21 14:29:42 -08:00
|
|
|
static __cpuinit void thermal_throttle_remove_dev(struct device *dev)
|
2006-09-26 10:52:42 +02:00
|
|
|
{
|
2011-12-21 14:29:42 -08:00
|
|
|
sysfs_remove_group(&dev->kobj, &thermal_attr_group);
|
2006-09-26 10:52:42 +02:00
|
|
|
}
|
|
|
|
|
2009-04-08 12:31:19 +02:00
|
|
|
/* Mutex protecting device creation against CPU hotplug: */
|
2006-09-26 10:52:42 +02:00
|
|
|
static DEFINE_MUTEX(therm_cpu_lock);
|
|
|
|
|
|
|
|
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
|
2009-04-08 12:31:19 +02:00
|
|
|
static __cpuinit int
|
|
|
|
thermal_throttle_cpu_callback(struct notifier_block *nfb,
|
|
|
|
unsigned long action,
|
|
|
|
void *hcpu)
|
2006-09-26 10:52:42 +02:00
|
|
|
{
|
|
|
|
unsigned int cpu = (unsigned long)hcpu;
|
2011-12-21 14:29:42 -08:00
|
|
|
struct device *dev;
|
2007-10-18 03:05:13 -07:00
|
|
|
int err = 0;
|
2006-09-26 10:52:42 +02:00
|
|
|
|
2011-12-21 14:29:42 -08:00
|
|
|
dev = get_cpu_device(cpu);
|
2009-04-08 12:31:19 +02:00
|
|
|
|
2006-09-26 10:52:42 +02:00
|
|
|
switch (action) {
|
2007-10-18 03:05:13 -07:00
|
|
|
case CPU_UP_PREPARE:
|
|
|
|
case CPU_UP_PREPARE_FROZEN:
|
2007-05-24 12:37:34 +03:00
|
|
|
mutex_lock(&therm_cpu_lock);
|
2011-12-21 14:29:42 -08:00
|
|
|
err = thermal_throttle_add_dev(dev, cpu);
|
2007-05-24 12:37:34 +03:00
|
|
|
mutex_unlock(&therm_cpu_lock);
|
2006-10-12 11:01:30 -07:00
|
|
|
WARN_ON(err);
|
2006-09-26 10:52:42 +02:00
|
|
|
break;
|
2007-10-18 03:05:13 -07:00
|
|
|
case CPU_UP_CANCELED:
|
|
|
|
case CPU_UP_CANCELED_FROZEN:
|
2006-09-26 10:52:42 +02:00
|
|
|
case CPU_DEAD:
|
2007-05-09 02:35:10 -07:00
|
|
|
case CPU_DEAD_FROZEN:
|
2007-05-24 12:37:34 +03:00
|
|
|
mutex_lock(&therm_cpu_lock);
|
2011-12-21 14:29:42 -08:00
|
|
|
thermal_throttle_remove_dev(dev);
|
2007-05-24 12:37:34 +03:00
|
|
|
mutex_unlock(&therm_cpu_lock);
|
2006-09-26 10:52:42 +02:00
|
|
|
break;
|
|
|
|
}
|
2010-05-26 14:43:30 -07:00
|
|
|
return notifier_from_errno(err);
|
2006-09-26 10:52:42 +02:00
|
|
|
}
|
|
|
|
|
2007-10-17 18:04:33 +02:00
|
|
|
static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
|
2006-09-26 10:52:42 +02:00
|
|
|
{
|
|
|
|
.notifier_call = thermal_throttle_cpu_callback,
|
|
|
|
};
|
|
|
|
|
|
|
|
static __init int thermal_throttle_init_device(void)
|
|
|
|
{
|
|
|
|
unsigned int cpu = 0;
|
2006-10-12 11:01:30 -07:00
|
|
|
int err;
|
2006-09-26 10:52:42 +02:00
|
|
|
|
|
|
|
if (!atomic_read(&therm_throt_en))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
|
|
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
mutex_lock(&therm_cpu_lock);
|
|
|
|
#endif
|
|
|
|
/* connect live CPUs to sysfs */
|
2006-10-12 11:01:30 -07:00
|
|
|
for_each_online_cpu(cpu) {
|
2011-12-21 14:29:42 -08:00
|
|
|
err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
|
2006-10-12 11:01:30 -07:00
|
|
|
WARN_ON(err);
|
|
|
|
}
|
2006-09-26 10:52:42 +02:00
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
mutex_unlock(&therm_cpu_lock);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
device_initcall(thermal_throttle_init_device);
|
2009-06-15 17:25:27 +09:00
|
|
|
|
2006-09-26 10:52:42 +02:00
|
|
|
#endif /* CONFIG_SYSFS */
|
2009-06-15 17:25:27 +09:00
|
|
|
|
2011-01-03 17:22:04 +05:30
|
|
|
static void notify_thresholds(__u64 msr_val)
|
|
|
|
{
|
|
|
|
/* check whether the interrupt handler is defined;
|
|
|
|
* otherwise simply return
|
|
|
|
*/
|
|
|
|
if (!platform_thermal_notify)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* lower threshold reached */
|
|
|
|
if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
|
|
|
|
platform_thermal_notify(msr_val);
|
|
|
|
/* higher threshold reached */
|
|
|
|
if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
|
|
|
|
platform_thermal_notify(msr_val);
|
|
|
|
}
|
|
|
|
|
2009-06-15 17:25:27 +09:00
|
|
|
/* Thermal transition interrupt handler */
|
2009-06-15 17:26:36 +09:00
|
|
|
static void intel_thermal_interrupt(void)
|
2009-06-15 17:25:27 +09:00
|
|
|
{
|
|
|
|
__u64 msr_val;
|
|
|
|
|
|
|
|
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
2010-07-29 17:13:45 -07:00
|
|
|
|
2011-01-03 17:22:04 +05:30
|
|
|
/* Check for violation of core thermal thresholds*/
|
|
|
|
notify_thresholds(msr_val);
|
|
|
|
|
2010-07-29 17:13:44 -07:00
|
|
|
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
|
2010-07-29 17:13:45 -07:00
|
|
|
THERMAL_THROTTLING_EVENT,
|
2010-07-29 17:13:44 -07:00
|
|
|
CORE_LEVEL) != 0)
|
x86, mce, therm_throt: Don't report power limit and package level thermal throttle events in mcelog
Thermal throttle and power limit events are not defined as MCE errors in x86
architecture and should not generate MCE errors in mcelog.
Current kernel generates fake software defined MCE errors for these events.
This may confuse users because they may think the machine has real MCE errors
while actually only thermal throttle or power limit events happen.
To make it worse, buggy firmware on some platforms may falsely generate
the events. Therefore, kernel reports MCE errors which users think as real
hardware errors. Although the firmware bugs should be fixed, on the other hand,
kernel should not report MCE errors either.
So mcelog is not a good mechanism to report these events. To report the events, we count them in respective counters (core_power_limit_count,
package_power_limit_count, core_throttle_count, and package_throttle_count) in
/sys/devices/system/cpu/cpu#/thermal_throttle/. Users can check the counters
for each event on each CPU. Please note that all CPU's on one package report
duplicate counters. It's user application's responsibity to retrieve a package
level counter for one package.
This patch doesn't report package level power limit, core level power limit, and
package level thermal throttle events in mcelog. When the events happen, only
report them in respective counters in sysfs.
Since core level thermal throttle has been legacy code in kernel for a while and
users accepted it as MCE error in mcelog, core level thermal throttle is still
reported in mcelog. In the mean time, the event is counted in a counter in sysfs
as well.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Borislav Petkov <bp@amd64.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20111215001945.GA21009@linux-os.sc.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2011-11-04 13:31:23 -07:00
|
|
|
mce_log_therm_throt_event(msr_val);
|
2010-07-29 17:13:45 -07:00
|
|
|
|
2011-03-12 12:50:46 +01:00
|
|
|
if (this_cpu_has(X86_FEATURE_PLN))
|
x86, mce, therm_throt: Don't report power limit and package level thermal throttle events in mcelog
Thermal throttle and power limit events are not defined as MCE errors in x86
architecture and should not generate MCE errors in mcelog.
Current kernel generates fake software defined MCE errors for these events.
This may confuse users because they may think the machine has real MCE errors
while actually only thermal throttle or power limit events happen.
To make it worse, buggy firmware on some platforms may falsely generate
the events. Therefore, kernel reports MCE errors which users think as real
hardware errors. Although the firmware bugs should be fixed, on the other hand,
kernel should not report MCE errors either.
So mcelog is not a good mechanism to report these events. To report the events, we count them in respective counters (core_power_limit_count,
package_power_limit_count, core_throttle_count, and package_throttle_count) in
/sys/devices/system/cpu/cpu#/thermal_throttle/. Users can check the counters
for each event on each CPU. Please note that all CPU's on one package report
duplicate counters. It's user application's responsibity to retrieve a package
level counter for one package.
This patch doesn't report package level power limit, core level power limit, and
package level thermal throttle events in mcelog. When the events happen, only
report them in respective counters in sysfs.
Since core level thermal throttle has been legacy code in kernel for a while and
users accepted it as MCE error in mcelog, core level thermal throttle is still
reported in mcelog. In the mean time, the event is counted in a counter in sysfs
as well.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Borislav Petkov <bp@amd64.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20111215001945.GA21009@linux-os.sc.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2011-11-04 13:31:23 -07:00
|
|
|
therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
|
2010-07-29 17:13:45 -07:00
|
|
|
POWER_LIMIT_EVENT,
|
x86, mce, therm_throt: Don't report power limit and package level thermal throttle events in mcelog
Thermal throttle and power limit events are not defined as MCE errors in x86
architecture and should not generate MCE errors in mcelog.
Current kernel generates fake software defined MCE errors for these events.
This may confuse users because they may think the machine has real MCE errors
while actually only thermal throttle or power limit events happen.
To make it worse, buggy firmware on some platforms may falsely generate
the events. Therefore, kernel reports MCE errors which users think as real
hardware errors. Although the firmware bugs should be fixed, on the other hand,
kernel should not report MCE errors either.
So mcelog is not a good mechanism to report these events. To report the events, we count them in respective counters (core_power_limit_count,
package_power_limit_count, core_throttle_count, and package_throttle_count) in
/sys/devices/system/cpu/cpu#/thermal_throttle/. Users can check the counters
for each event on each CPU. Please note that all CPU's on one package report
duplicate counters. It's user application's responsibity to retrieve a package
level counter for one package.
This patch doesn't report package level power limit, core level power limit, and
package level thermal throttle events in mcelog. When the events happen, only
report them in respective counters in sysfs.
Since core level thermal throttle has been legacy code in kernel for a while and
users accepted it as MCE error in mcelog, core level thermal throttle is still
reported in mcelog. In the mean time, the event is counted in a counter in sysfs
as well.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Borislav Petkov <bp@amd64.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20111215001945.GA21009@linux-os.sc.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2011-11-04 13:31:23 -07:00
|
|
|
CORE_LEVEL);
|
2010-07-29 17:13:44 -07:00
|
|
|
|
2011-03-12 12:50:46 +01:00
|
|
|
if (this_cpu_has(X86_FEATURE_PTS)) {
|
2010-07-29 17:13:44 -07:00
|
|
|
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
|
x86, mce, therm_throt: Don't report power limit and package level thermal throttle events in mcelog
Thermal throttle and power limit events are not defined as MCE errors in x86
architecture and should not generate MCE errors in mcelog.
Current kernel generates fake software defined MCE errors for these events.
This may confuse users because they may think the machine has real MCE errors
while actually only thermal throttle or power limit events happen.
To make it worse, buggy firmware on some platforms may falsely generate
the events. Therefore, kernel reports MCE errors which users think as real
hardware errors. Although the firmware bugs should be fixed, on the other hand,
kernel should not report MCE errors either.
So mcelog is not a good mechanism to report these events. To report the events, we count them in respective counters (core_power_limit_count,
package_power_limit_count, core_throttle_count, and package_throttle_count) in
/sys/devices/system/cpu/cpu#/thermal_throttle/. Users can check the counters
for each event on each CPU. Please note that all CPU's on one package report
duplicate counters. It's user application's responsibity to retrieve a package
level counter for one package.
This patch doesn't report package level power limit, core level power limit, and
package level thermal throttle events in mcelog. When the events happen, only
report them in respective counters in sysfs.
Since core level thermal throttle has been legacy code in kernel for a while and
users accepted it as MCE error in mcelog, core level thermal throttle is still
reported in mcelog. In the mean time, the event is counted in a counter in sysfs
as well.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Borislav Petkov <bp@amd64.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20111215001945.GA21009@linux-os.sc.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2011-11-04 13:31:23 -07:00
|
|
|
therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
|
2010-07-29 17:13:45 -07:00
|
|
|
THERMAL_THROTTLING_EVENT,
|
x86, mce, therm_throt: Don't report power limit and package level thermal throttle events in mcelog
Thermal throttle and power limit events are not defined as MCE errors in x86
architecture and should not generate MCE errors in mcelog.
Current kernel generates fake software defined MCE errors for these events.
This may confuse users because they may think the machine has real MCE errors
while actually only thermal throttle or power limit events happen.
To make it worse, buggy firmware on some platforms may falsely generate
the events. Therefore, kernel reports MCE errors which users think as real
hardware errors. Although the firmware bugs should be fixed, on the other hand,
kernel should not report MCE errors either.
So mcelog is not a good mechanism to report these events. To report the events, we count them in respective counters (core_power_limit_count,
package_power_limit_count, core_throttle_count, and package_throttle_count) in
/sys/devices/system/cpu/cpu#/thermal_throttle/. Users can check the counters
for each event on each CPU. Please note that all CPU's on one package report
duplicate counters. It's user application's responsibity to retrieve a package
level counter for one package.
This patch doesn't report package level power limit, core level power limit, and
package level thermal throttle events in mcelog. When the events happen, only
report them in respective counters in sysfs.
Since core level thermal throttle has been legacy code in kernel for a while and
users accepted it as MCE error in mcelog, core level thermal throttle is still
reported in mcelog. In the mean time, the event is counted in a counter in sysfs
as well.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Borislav Petkov <bp@amd64.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20111215001945.GA21009@linux-os.sc.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2011-11-04 13:31:23 -07:00
|
|
|
PACKAGE_LEVEL);
|
2011-03-12 12:50:46 +01:00
|
|
|
if (this_cpu_has(X86_FEATURE_PLN))
|
x86, mce, therm_throt: Don't report power limit and package level thermal throttle events in mcelog
Thermal throttle and power limit events are not defined as MCE errors in x86
architecture and should not generate MCE errors in mcelog.
Current kernel generates fake software defined MCE errors for these events.
This may confuse users because they may think the machine has real MCE errors
while actually only thermal throttle or power limit events happen.
To make it worse, buggy firmware on some platforms may falsely generate
the events. Therefore, kernel reports MCE errors which users think as real
hardware errors. Although the firmware bugs should be fixed, on the other hand,
kernel should not report MCE errors either.
So mcelog is not a good mechanism to report these events. To report the events, we count them in respective counters (core_power_limit_count,
package_power_limit_count, core_throttle_count, and package_throttle_count) in
/sys/devices/system/cpu/cpu#/thermal_throttle/. Users can check the counters
for each event on each CPU. Please note that all CPU's on one package report
duplicate counters. It's user application's responsibity to retrieve a package
level counter for one package.
This patch doesn't report package level power limit, core level power limit, and
package level thermal throttle events in mcelog. When the events happen, only
report them in respective counters in sysfs.
Since core level thermal throttle has been legacy code in kernel for a while and
users accepted it as MCE error in mcelog, core level thermal throttle is still
reported in mcelog. In the mean time, the event is counted in a counter in sysfs
as well.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Borislav Petkov <bp@amd64.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20111215001945.GA21009@linux-os.sc.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2011-11-04 13:31:23 -07:00
|
|
|
therm_throt_process(msr_val &
|
2010-07-29 17:13:45 -07:00
|
|
|
PACKAGE_THERM_STATUS_POWER_LIMIT,
|
|
|
|
POWER_LIMIT_EVENT,
|
x86, mce, therm_throt: Don't report power limit and package level thermal throttle events in mcelog
Thermal throttle and power limit events are not defined as MCE errors in x86
architecture and should not generate MCE errors in mcelog.
Current kernel generates fake software defined MCE errors for these events.
This may confuse users because they may think the machine has real MCE errors
while actually only thermal throttle or power limit events happen.
To make it worse, buggy firmware on some platforms may falsely generate
the events. Therefore, kernel reports MCE errors which users think as real
hardware errors. Although the firmware bugs should be fixed, on the other hand,
kernel should not report MCE errors either.
So mcelog is not a good mechanism to report these events. To report the events, we count them in respective counters (core_power_limit_count,
package_power_limit_count, core_throttle_count, and package_throttle_count) in
/sys/devices/system/cpu/cpu#/thermal_throttle/. Users can check the counters
for each event on each CPU. Please note that all CPU's on one package report
duplicate counters. It's user application's responsibity to retrieve a package
level counter for one package.
This patch doesn't report package level power limit, core level power limit, and
package level thermal throttle events in mcelog. When the events happen, only
report them in respective counters in sysfs.
Since core level thermal throttle has been legacy code in kernel for a while and
users accepted it as MCE error in mcelog, core level thermal throttle is still
reported in mcelog. In the mean time, the event is counted in a counter in sysfs
as well.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Borislav Petkov <bp@amd64.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20111215001945.GA21009@linux-os.sc.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2011-11-04 13:31:23 -07:00
|
|
|
PACKAGE_LEVEL);
|
2010-07-29 17:13:44 -07:00
|
|
|
}
|
2009-06-15 17:25:27 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
static void unexpected_thermal_interrupt(void)
|
|
|
|
{
|
2010-08-31 09:13:33 +09:00
|
|
|
printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
|
2009-06-15 17:25:27 +09:00
|
|
|
smp_processor_id());
|
|
|
|
}
|
|
|
|
|
|
|
|
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
|
|
|
|
|
|
|
|
asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
irq_enter();
|
2011-10-07 18:22:09 +02:00
|
|
|
exit_idle();
|
2009-06-15 17:25:27 +09:00
|
|
|
inc_irq_stat(irq_thermal_count);
|
|
|
|
smp_thermal_vector();
|
|
|
|
irq_exit();
|
|
|
|
/* Ack only at the end to avoid potential reentry */
|
|
|
|
ack_APIC_irq();
|
|
|
|
}
|
|
|
|
|
2009-12-14 17:57:00 +09:00
|
|
|
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
|
|
|
|
static int intel_thermal_supported(struct cpuinfo_x86 *c)
|
|
|
|
{
|
|
|
|
if (!cpu_has_apic)
|
|
|
|
return 0;
|
|
|
|
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2009-11-11 15:51:25 +08:00
|
|
|
void __init mcheck_intel_therm_init(void)
|
2009-11-10 09:38:24 +08:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* This function is only called on boot CPU. Save the init thermal
|
|
|
|
* LVT value on BSP and use that value to restore APs' thermal LVT
|
|
|
|
* entry BIOS programmed later
|
|
|
|
*/
|
2009-12-14 17:57:00 +09:00
|
|
|
if (intel_thermal_supported(&boot_cpu_data))
|
2009-11-10 09:38:24 +08:00
|
|
|
lvtthmr_init = apic_read(APIC_LVTTHMR);
|
|
|
|
}
|
|
|
|
|
2009-11-12 15:52:40 +09:00
|
|
|
void intel_init_thermal(struct cpuinfo_x86 *c)
|
2009-06-15 17:26:10 +09:00
|
|
|
{
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
int tm2 = 0;
|
|
|
|
u32 l, h;
|
|
|
|
|
2009-12-14 17:57:00 +09:00
|
|
|
if (!intel_thermal_supported(c))
|
2009-06-15 17:26:10 +09:00
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First check if its enabled already, in which case there might
|
|
|
|
* be some SMM goo which handles it, so we can't even put a handler
|
|
|
|
* since it might be delivered via SMI already:
|
|
|
|
*/
|
|
|
|
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
2009-11-10 09:38:24 +08:00
|
|
|
|
2011-04-22 00:22:43 +08:00
|
|
|
h = lvtthmr_init;
|
2009-11-10 09:38:24 +08:00
|
|
|
/*
|
|
|
|
* The initial value of thermal LVT entries on all APs always reads
|
|
|
|
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
|
|
|
|
* sequence to them and LVT registers are reset to 0s except for
|
|
|
|
* the mask bits which are set to 1s when APs receive INIT IPI.
|
2011-04-22 00:22:43 +08:00
|
|
|
* If BIOS takes over the thermal interrupt and sets its interrupt
|
|
|
|
* delivery mode to SMI (not fixed), it restores the value that the
|
|
|
|
* BIOS has programmed on AP based on BSP's info we saved since BIOS
|
|
|
|
* is always setting the same value for all threads/cores.
|
2009-11-10 09:38:24 +08:00
|
|
|
*/
|
2011-04-22 00:22:43 +08:00
|
|
|
if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
|
|
|
|
apic_write(APIC_LVTTHMR, lvtthmr_init);
|
2009-11-10 09:38:24 +08:00
|
|
|
|
|
|
|
|
2009-06-15 17:26:10 +09:00
|
|
|
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
|
|
|
printk(KERN_DEBUG
|
|
|
|
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check whether a vector already exists */
|
|
|
|
if (h & APIC_VECTOR_MASK) {
|
|
|
|
printk(KERN_DEBUG
|
|
|
|
"CPU%d: Thermal LVT vector (%#x) already installed\n",
|
|
|
|
cpu, (h & APIC_VECTOR_MASK));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-07-29 00:04:59 +02:00
|
|
|
/* early Pentium M models use different method for enabling TM2 */
|
|
|
|
if (cpu_has(c, X86_FEATURE_TM2)) {
|
|
|
|
if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
|
|
|
|
rdmsr(MSR_THERM2_CTL, l, h);
|
|
|
|
if (l & MSR_THERM2_CTL_TM_SELECT)
|
|
|
|
tm2 = 1;
|
|
|
|
} else if (l & MSR_IA32_MISC_ENABLE_TM2)
|
|
|
|
tm2 = 1;
|
|
|
|
}
|
|
|
|
|
2009-06-15 17:26:10 +09:00
|
|
|
/* We'll mask the thermal vector in the lapic till we're ready: */
|
|
|
|
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
|
|
|
|
apic_write(APIC_LVTTHMR, h);
|
|
|
|
|
|
|
|
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
2010-07-29 17:13:45 -07:00
|
|
|
if (cpu_has(c, X86_FEATURE_PLN))
|
|
|
|
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
|
|
|
l | (THERM_INT_LOW_ENABLE
|
|
|
|
| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
|
|
|
|
else
|
|
|
|
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
|
|
|
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
|
2009-06-15 17:26:10 +09:00
|
|
|
|
2010-07-29 17:13:44 -07:00
|
|
|
if (cpu_has(c, X86_FEATURE_PTS)) {
|
|
|
|
rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
|
2010-07-29 17:13:45 -07:00
|
|
|
if (cpu_has(c, X86_FEATURE_PLN))
|
|
|
|
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
|
|
|
l | (PACKAGE_THERM_INT_LOW_ENABLE
|
|
|
|
| PACKAGE_THERM_INT_HIGH_ENABLE
|
|
|
|
| PACKAGE_THERM_INT_PLN_ENABLE), h);
|
|
|
|
else
|
|
|
|
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
|
|
|
l | (PACKAGE_THERM_INT_LOW_ENABLE
|
|
|
|
| PACKAGE_THERM_INT_HIGH_ENABLE), h);
|
2010-07-29 17:13:44 -07:00
|
|
|
}
|
|
|
|
|
2009-06-15 17:26:36 +09:00
|
|
|
smp_thermal_vector = intel_thermal_interrupt;
|
2009-06-15 17:26:10 +09:00
|
|
|
|
|
|
|
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
|
|
|
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
|
|
|
|
|
|
|
|
/* Unmask the thermal vector: */
|
|
|
|
l = apic_read(APIC_LVTTHMR);
|
|
|
|
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
|
|
|
|
2009-12-10 17:19:36 -08:00
|
|
|
printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
|
|
|
|
tm2 ? "TM2" : "TM1");
|
2009-06-15 17:26:10 +09:00
|
|
|
|
|
|
|
/* enable thermal throttle processing */
|
|
|
|
atomic_set(&therm_throt_en, 1);
|
|
|
|
}
|