mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
Additional thermal control fix for 6.11-rc1
Prevent the thermal core from flooding the kernel log with useless messages if thermal zone temperature can never be determined (or its sensor has failed permanently) and make it finally give up and disable defective thermal zones (Rafael Wysocki). -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEE4fcc61cGeeHD/fCwgsRv/nhiVHEFAmajgZUSHHJqd0Byand5 c29ja2kubmV0AAoJEILEb/54YlRxiacP/269B//oY03dsPV2Y7KHourWHmcZL0xf wVNVNecRdhZ3mGxlJiQ682Sm4HF3dsgW368OgdedKXxpCdSGoi+nOyhxk8Cw1SVh INgq2PILfkqkT3GeUUCxI/i9fRLEl3MQMGMbrTPHlt/YjSE1BTYD8jx5+nx9uMvO +AiBf+n2BmxWRdZKPXgLp9VzDYIr3eG9+6wNEvBfnXN0BpfhOdI40YCsGIQGT0uf ISmNWOhrgNZZQageWgywwMytkyiJqWAB+mGNGtkUqZsD9K9Q8qWh/WvWRVWd7N57 GNCkr2KiqoJ/kI2he2gQCgL27W73EFbMtt3XIdFy4pSur6UBvhs06mcUYsw3yhp6 mXtYmf69/w9TH6Hs8Fo6teA2L3C9yasUYBr8/Sf2CUM1NtKLDi/nPBG6uTqcOj4F qZf25OeTkpN5ejTtZdrdUgdjkt2bAVM+zDx3JQjmfrsWHenEpCSQ4xQi4zdh8cEp 4SocP2t6w8g3sQK/i5dDgFBW4JUbx+WgkPD/L5NrEJwPvthHHw68CuNdNgeateaH 6WkedCm3JMLuNr4sEzLU7TArEhpFLgRnuAa3/eOQmJdWBVfY6eqLl9V2oeKsraIA cGSvaahumTEUcCZ/bWeMZp7jntv4txS67GVchMpMoC9oRJw9rnilGDld6l1+HWdF oSMv3cGaCNwH =DymR -----END PGP SIGNATURE----- Merge tag 'thermal-6.11-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm Pull thermal control fix from Rafael Wysocki: "Prevent the thermal core from flooding the kernel log with useless messages if thermal zone temperature can never be determined (or its sensor has failed permanently) and make it finally give up and disable defective thermal zones (Rafael Wysocki)" * tag 'thermal-6.11-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: thermal: core: Back off when polling thermal zones on errors thermal: trip: Split thermal_zone_device_set_mode()
This commit is contained in:
commit
1fcaa5db40
@ -272,6 +272,44 @@ static int __init thermal_register_governors(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __thermal_zone_device_set_mode(struct thermal_zone_device *tz,
|
||||
enum thermal_device_mode mode)
|
||||
{
|
||||
if (tz->ops.change_mode) {
|
||||
int ret;
|
||||
|
||||
ret = tz->ops.change_mode(tz, mode);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
tz->mode = mode;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void thermal_zone_broken_disable(struct thermal_zone_device *tz)
|
||||
{
|
||||
struct thermal_trip_desc *td;
|
||||
|
||||
dev_err(&tz->device, "Unable to get temperature, disabling!\n");
|
||||
/*
|
||||
* This function only runs for enabled thermal zones, so no need to
|
||||
* check for the current mode.
|
||||
*/
|
||||
__thermal_zone_device_set_mode(tz, THERMAL_DEVICE_DISABLED);
|
||||
thermal_notify_tz_disable(tz);
|
||||
|
||||
for_each_trip_desc(tz, td) {
|
||||
if (td->trip.type == THERMAL_TRIP_CRITICAL &&
|
||||
td->trip.temperature > THERMAL_TEMP_INVALID) {
|
||||
dev_crit(&tz->device,
|
||||
"Disabled thermal zone with critical trip point\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Zone update section: main control loop applied to each zone while monitoring
|
||||
* in polling mode. The monitoring is done using a workqueue.
|
||||
@ -292,6 +330,34 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
|
||||
cancel_delayed_work(&tz->poll_queue);
|
||||
}
|
||||
|
||||
static void thermal_zone_recheck(struct thermal_zone_device *tz, int error)
|
||||
{
|
||||
if (error == -EAGAIN) {
|
||||
thermal_zone_device_set_polling(tz, THERMAL_RECHECK_DELAY);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Print the message once to reduce log noise. It will be followed by
|
||||
* another one if the temperature cannot be determined after multiple
|
||||
* attempts.
|
||||
*/
|
||||
if (tz->recheck_delay_jiffies == THERMAL_RECHECK_DELAY)
|
||||
dev_info(&tz->device, "Temperature check failed (%d)\n", error);
|
||||
|
||||
thermal_zone_device_set_polling(tz, tz->recheck_delay_jiffies);
|
||||
|
||||
tz->recheck_delay_jiffies += max(tz->recheck_delay_jiffies >> 1, 1ULL);
|
||||
if (tz->recheck_delay_jiffies > THERMAL_MAX_RECHECK_DELAY) {
|
||||
thermal_zone_broken_disable(tz);
|
||||
/*
|
||||
* Restore the original recheck delay value to allow the thermal
|
||||
* zone to try to recover when it is reenabled by user space.
|
||||
*/
|
||||
tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY;
|
||||
}
|
||||
}
|
||||
|
||||
static void monitor_thermal_zone(struct thermal_zone_device *tz)
|
||||
{
|
||||
if (tz->mode != THERMAL_DEVICE_ENABLED)
|
||||
@ -491,10 +557,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
|
||||
|
||||
ret = __thermal_zone_get_temp(tz, &temp);
|
||||
if (ret) {
|
||||
if (ret != -EAGAIN)
|
||||
dev_info(&tz->device, "Temperature check failed (%d)\n", ret);
|
||||
|
||||
thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS));
|
||||
thermal_zone_recheck(tz, ret);
|
||||
return;
|
||||
} else if (temp <= THERMAL_TEMP_INVALID) {
|
||||
/*
|
||||
@ -506,6 +569,8 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
|
||||
goto monitor;
|
||||
}
|
||||
|
||||
tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY;
|
||||
|
||||
tz->last_temperature = tz->temperature;
|
||||
tz->temperature = temp;
|
||||
|
||||
@ -540,7 +605,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
|
||||
static int thermal_zone_device_set_mode(struct thermal_zone_device *tz,
|
||||
enum thermal_device_mode mode)
|
||||
{
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&tz->lock);
|
||||
|
||||
@ -548,14 +613,15 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz,
|
||||
if (mode == tz->mode) {
|
||||
mutex_unlock(&tz->lock);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (tz->ops.change_mode)
|
||||
ret = tz->ops.change_mode(tz, mode);
|
||||
ret = __thermal_zone_device_set_mode(tz, mode);
|
||||
if (ret) {
|
||||
mutex_unlock(&tz->lock);
|
||||
|
||||
if (!ret)
|
||||
tz->mode = mode;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
|
||||
|
||||
@ -566,7 +632,7 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz,
|
||||
else
|
||||
thermal_notify_tz_disable(tz);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int thermal_zone_device_enable(struct thermal_zone_device *tz)
|
||||
@ -1445,6 +1511,7 @@ thermal_zone_device_register_with_trips(const char *type,
|
||||
|
||||
thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay);
|
||||
thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay);
|
||||
tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY;
|
||||
|
||||
/* sys I/F */
|
||||
/* Add nodes that are always present via .groups */
|
||||
|
@ -67,6 +67,8 @@ struct thermal_governor {
|
||||
* @polling_delay_jiffies: number of jiffies to wait between polls when
|
||||
* checking whether trip points have been crossed (0 for
|
||||
* interrupt driven systems)
|
||||
* @recheck_delay_jiffies: delay after a failed attempt to determine the zone
|
||||
* temperature before trying again
|
||||
* @temperature: current temperature. This is only for core code,
|
||||
* drivers should use thermal_zone_get_temp() to get the
|
||||
* current temperature
|
||||
@ -108,6 +110,7 @@ struct thermal_zone_device {
|
||||
int num_trips;
|
||||
unsigned long passive_delay_jiffies;
|
||||
unsigned long polling_delay_jiffies;
|
||||
unsigned long recheck_delay_jiffies;
|
||||
int temperature;
|
||||
int last_temperature;
|
||||
int emul_temperature;
|
||||
@ -137,10 +140,11 @@ struct thermal_zone_device {
|
||||
#define THERMAL_TEMP_INIT INT_MIN
|
||||
|
||||
/*
|
||||
* Default delay after a failing thermal zone temperature check before
|
||||
* attempting to check it again.
|
||||
* Default and maximum delay after a failed thermal zone temperature check
|
||||
* before attempting to check it again (in jiffies).
|
||||
*/
|
||||
#define THERMAL_RECHECK_DELAY_MS 250
|
||||
#define THERMAL_RECHECK_DELAY msecs_to_jiffies(250)
|
||||
#define THERMAL_MAX_RECHECK_DELAY (120 * HZ)
|
||||
|
||||
/* Default Thermal Governor */
|
||||
#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
|
||||
|
Loading…
Reference in New Issue
Block a user