From 919bfa9b2dbf3bc0c478afd4e44445836381dacb Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Tue, 10 Dec 2024 03:25:57 +0000 Subject: [PATCH 1/5] cpufreq/amd-pstate: Detect preferred core support before driver registration Booting with amd-pstate on 3rd Generation EPYC system incorrectly enabled ITMT support despite the system not supporting Preferred Core ranking. amd_pstate_init_prefcore() called during amd_pstate*_cpu_init() requires "amd_pstate_prefcore" to be set correctly however the preferred core support is detected only after driver registration which is too late. Swap the function calls around to detect preferred core support before registring the driver via amd_pstate_register_driver(). This ensures amd_pstate*_cpu_init() sees the correct value of "amd_pstate_prefcore" considering the platform support. Fixes: 279f838a61f9 ("x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()") Fixes: ff2653ded4d9 ("cpufreq/amd-pstate: Move registration after static function call update") Signed-off-by: K Prateek Nayak Acked-by: Mario Limonciello Link: https://lore.kernel.org/r/20241210032557.754-1-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d7630bab2516..8b36450bbdf6 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1869,18 +1869,18 @@ static int __init amd_pstate_init(void) static_call_update(amd_pstate_update_perf, shmem_update_perf); } - ret = amd_pstate_register_driver(cppc_state); - if (ret) { - pr_err("failed to register with return %d\n", ret); - return ret; - } - if (amd_pstate_prefcore) { ret = amd_detect_prefcore(&amd_pstate_prefcore); if (ret) return ret; } + ret = amd_pstate_register_driver(cppc_state); + if (ret) { + pr_err("failed to register with return %d\n", ret); + return ret; + } + dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group); From 50a062a7620051c09adacd6d140ebd56881a333b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:34 -0600 Subject: [PATCH 2/5] cpufreq/amd-pstate: Store the boost numerator as highest perf again commit ad4caad58d91d ("cpufreq: amd-pstate: Merge amd_pstate_highest_perf_set() into amd_get_boost_ratio_numerator()") changed the semantics for highest perf and commit 18d9b52271213 ("cpufreq/amd-pstate: Use nominal perf for limits when boost is disabled") worked around those semantic changes. This however is a confusing result and furthermore makes it awkward to change frequency limits and boost due to the scaling differences. Restore the boost numerator to highest perf again. Suggested-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Fixes: ad4caad58d91 ("cpufreq: amd-pstate: Merge amd_pstate_highest_perf_set() into amd_get_boost_ratio_numerator()") Link: https://lore.kernel.org/r/20241209185248.16301-2-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- Documentation/admin-guide/pm/amd-pstate.rst | 4 +--- drivers/cpufreq/amd-pstate.c | 25 ++++++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 210a808b74ec..412423c54f25 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -251,9 +251,7 @@ performance supported in `AMD CPPC Performance Capability `_). In some ASICs, the highest CPPC performance is not the one in the ``_CPC`` table, so we need to expose it to sysfs. If boost is not active, but still supported, this maximum frequency will be larger than the one in -``cpuinfo``. On systems that support preferred core, the driver will have -different values for some cores than others and this will reflect the values -advertised by the platform at bootup. +``cpuinfo``. This attribute is read-only. ``amd_pstate_lowest_nonlinear_freq`` diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 8b36450bbdf6..ab6fe9c2150c 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -374,15 +374,19 @@ static inline int amd_pstate_cppc_enable(bool enable) static int msr_init_perf(struct amd_cpudata *cpudata) { - u64 cap1; + u64 cap1, numerator; int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &cap1); if (ret) return ret; - WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); - WRITE_ONCE(cpudata->max_limit_perf, AMD_CPPC_HIGHEST_PERF(cap1)); + ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); + if (ret) + return ret; + + WRITE_ONCE(cpudata->highest_perf, numerator); + WRITE_ONCE(cpudata->max_limit_perf, numerator); WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); @@ -394,13 +398,18 @@ static int msr_init_perf(struct amd_cpudata *cpudata) static int shmem_init_perf(struct amd_cpudata *cpudata) { struct cppc_perf_caps cppc_perf; + u64 numerator; int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret; - WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf); - WRITE_ONCE(cpudata->max_limit_perf, cppc_perf.highest_perf); + ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); + if (ret) + return ret; + + WRITE_ONCE(cpudata->highest_perf, numerator); + WRITE_ONCE(cpudata->max_limit_perf, numerator); WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); @@ -889,7 +898,6 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { int ret; u32 min_freq, max_freq; - u64 numerator; u32 nominal_perf, nominal_freq; u32 lowest_nonlinear_perf, lowest_nonlinear_freq; u32 boost_ratio, lowest_nonlinear_ratio; @@ -911,10 +919,7 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) nominal_perf = READ_ONCE(cpudata->nominal_perf); - ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); - if (ret) - return ret; - boost_ratio = div_u64(numerator << SCHED_CAPACITY_SHIFT, nominal_perf); + boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); From 2993b29b2a98f2bc9d55dfd37ef39f56a2908748 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:35 -0600 Subject: [PATCH 3/5] cpufreq/amd-pstate: Use boost numerator for upper bound of frequencies commit 18d9b5227121 ("cpufreq/amd-pstate: Use nominal perf for limits when boost is disabled") introduced different semantics for min/max limits based upon whether the user turned off boost from sysfs. This however is not necessary when the highest perf value is the boost numerator. Suggested-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Fixes: 18d9b5227121 ("cpufreq/amd-pstate: Use nominal perf for limits when boost is disabled") Link: https://lore.kernel.org/r/20241209185248.16301-3-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ab6fe9c2150c..66e5dfc711c0 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -570,16 +570,13 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf; + u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; struct amd_cpudata *cpudata = policy->driver_data; - if (cpudata->boost_supported && !policy->boost_enabled) - max_perf = READ_ONCE(cpudata->nominal_perf); - else - max_perf = READ_ONCE(cpudata->highest_perf); - - max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq); - min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq); + max_perf = READ_ONCE(cpudata->highest_perf); + max_freq = READ_ONCE(cpudata->max_freq); + max_limit_perf = div_u64(policy->max * max_perf, max_freq); + min_limit_perf = div_u64(policy->min * max_perf, max_freq); lowest_perf = READ_ONCE(cpudata->lowest_perf); if (min_limit_perf < lowest_perf) From 65c8c78cc74d5bcbc43f1f785a004796a2d78360 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 12 Dec 2024 21:13:10 +0100 Subject: [PATCH 4/5] thermal/thresholds: Fix uapi header macros leading to a compilation error The macros giving the direction of the crossing thresholds use the BIT macro which is not exported to the userspace. Consequently when an userspace program includes the header, it fails to compile. Replace the macros by their litteral to allow the compilation of userspace program using this header. Fixes: 445936f9e258 ("thermal: core: Add user thresholds support") Signed-off-by: Daniel Lezcano Link: https://patch.msgid.link/20241212201311.4143196-1-daniel.lezcano@linaro.org [ rjw: Add Fixes: ] Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/thermal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index ba8604bdf206..349718c271eb 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -3,8 +3,8 @@ #define _UAPI_LINUX_THERMAL_H #define THERMAL_NAME_LENGTH 20 -#define THERMAL_THRESHOLD_WAY_UP BIT(0) -#define THERMAL_THRESHOLD_WAY_DOWN BIT(1) +#define THERMAL_THRESHOLD_WAY_UP 0x1 +#define THERMAL_THRESHOLD_WAY_DOWN 0x2 enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, From 4feaedf7d243f1a9af36dfb2711a5641fe3559dc Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 16 Dec 2024 22:26:44 +0100 Subject: [PATCH 5/5] thermal/thresholds: Fix boundaries and detection routine The current implementation does not work if the thermal zone is interrupt driven only. The boundaries are not correctly checked and computed as it happens only when the temperature is increasing or decreasing. The problem arises because the routine to detect when we cross a threshold is correlated with the computation of the boundaries. We assume we have to recompute the boundaries when a threshold is crossed but actually we should do that even if the it is not the case. Mixing the boundaries computation and the threshold detection for the sake of optimizing the routine is much more complex as it appears intuitively and prone to errors. This fix separates the boundaries computation and the threshold crossing detection into different routines. The result is a code much more simple to understand, thus easier to maintain. The drawback is we browse the thresholds list several time but we can consider that as neglictible because that happens when the temperature is updated. There are certainly some aeras to improve in the temperature update routine but it would be not adequate as this change aims to fix the thresholds for v6.13. Fixes: 445936f9e258 ("thermal: core: Add user thresholds support") Tested-by: Daniel Lezcano # rock5b, Lenovo x13s Signed-off-by: Daniel Lezcano Link: https://patch.msgid.link/20241216212644.1145122-1-daniel.lezcano@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_thresholds.c | 76 +++++++++++++++------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/drivers/thermal/thermal_thresholds.c b/drivers/thermal/thermal_thresholds.c index d9b2a0bb44fc..38f5fd0e8930 100644 --- a/drivers/thermal/thermal_thresholds.c +++ b/drivers/thermal/thermal_thresholds.c @@ -69,40 +69,18 @@ static struct user_threshold *__thermal_thresholds_find(const struct list_head * return NULL; } -static bool __thermal_threshold_is_crossed(struct user_threshold *threshold, int temperature, - int last_temperature, int direction, - int *low, int *high) -{ - - if (temperature >= threshold->temperature) { - if (threshold->temperature > *low && - THERMAL_THRESHOLD_WAY_DOWN & threshold->direction) - *low = threshold->temperature; - - if (last_temperature < threshold->temperature && - threshold->direction & direction) - return true; - } else { - if (threshold->temperature < *high && THERMAL_THRESHOLD_WAY_UP - & threshold->direction) - *high = threshold->temperature; - - if (last_temperature >= threshold->temperature && - threshold->direction & direction) - return true; - } - - return false; -} - static bool thermal_thresholds_handle_raising(struct list_head *thresholds, int temperature, - int last_temperature, int *low, int *high) + int last_temperature) { struct user_threshold *t; list_for_each_entry(t, thresholds, list_node) { - if (__thermal_threshold_is_crossed(t, temperature, last_temperature, - THERMAL_THRESHOLD_WAY_UP, low, high)) + + if (!(t->direction & THERMAL_THRESHOLD_WAY_UP)) + continue; + + if (temperature >= t->temperature && + last_temperature < t->temperature) return true; } @@ -110,19 +88,43 @@ static bool thermal_thresholds_handle_raising(struct list_head *thresholds, int } static bool thermal_thresholds_handle_dropping(struct list_head *thresholds, int temperature, - int last_temperature, int *low, int *high) + int last_temperature) { struct user_threshold *t; list_for_each_entry_reverse(t, thresholds, list_node) { - if (__thermal_threshold_is_crossed(t, temperature, last_temperature, - THERMAL_THRESHOLD_WAY_DOWN, low, high)) + + if (!(t->direction & THERMAL_THRESHOLD_WAY_DOWN)) + continue; + + if (temperature <= t->temperature && + last_temperature > t->temperature) return true; } return false; } +static void thermal_threshold_find_boundaries(struct list_head *thresholds, int temperature, + int *low, int *high) +{ + struct user_threshold *t; + + list_for_each_entry(t, thresholds, list_node) { + if (temperature < t->temperature && + (t->direction & THERMAL_THRESHOLD_WAY_UP) && + *high > t->temperature) + *high = t->temperature; + } + + list_for_each_entry_reverse(t, thresholds, list_node) { + if (temperature > t->temperature && + (t->direction & THERMAL_THRESHOLD_WAY_DOWN) && + *low < t->temperature) + *low = t->temperature; + } +} + void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *high) { struct list_head *thresholds = &tz->user_thresholds; @@ -132,6 +134,8 @@ void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *hi lockdep_assert_held(&tz->lock); + thermal_threshold_find_boundaries(thresholds, temperature, low, high); + /* * We need a second update in order to detect a threshold being crossed */ @@ -151,12 +155,12 @@ void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *hi * - decreased : thresholds are crossed the way down */ if (temperature > last_temperature) { - if (thermal_thresholds_handle_raising(thresholds, temperature, - last_temperature, low, high)) + if (thermal_thresholds_handle_raising(thresholds, + temperature, last_temperature)) thermal_notify_threshold_up(tz); } else { - if (thermal_thresholds_handle_dropping(thresholds, temperature, - last_temperature, low, high)) + if (thermal_thresholds_handle_dropping(thresholds, + temperature, last_temperature)) thermal_notify_threshold_down(tz); } }