From 9b18d536b124357fee56d82b1462c02f78d219e5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Dec 2024 12:39:05 +0100 Subject: [PATCH 01/24] cpufreq: intel_pstate: Use CPPC to get scaling factors The perf-to-frequency scaling factors are used by intel_pstate on hybrid platforms to cast performance levels to frequency on different types of CPUs which is needed because the generic cpufreq sysfs interface works in the frequency domain. For some hybrid platforms already in the field, the scaling factors are known, but for others (including some upcoming ones) they most likely will be different and the only way to get them that scales is to use information provided by the platform firmware. In this particular case, the requisite information can be obtained via CPPC. If the P-core hybrid scaling factor for the given processor model is not known, use CPPC to compute hybrid scaling factors for all CPUs. Since the current default hybrid scaling factor is only suitable for a few early hybrid platforms, add intel_hybrid_scaling_factor[] entries for them and initialize the scaling factor to zero ("unknown") by default. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/8476313.T7Z3S40VBb@rjwysocki.net --- drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b8e2396a708a..e16b27c35cfb 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -302,11 +303,11 @@ static bool hwp_is_hybrid; static struct cpufreq_driver *intel_pstate_driver __read_mostly; -#define HYBRID_SCALING_FACTOR 78741 +#define HYBRID_SCALING_FACTOR_ADL 78741 #define HYBRID_SCALING_FACTOR_MTL 80000 #define HYBRID_SCALING_FACTOR_LNL 86957 -static int hybrid_scaling_factor = HYBRID_SCALING_FACTOR; +static int hybrid_scaling_factor; static inline int core_get_scaling(void) { @@ -414,18 +415,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu) static int intel_pstate_cppc_get_scaling(int cpu) { struct cppc_perf_caps cppc_perf; - int ret; - - ret = cppc_get_perf_caps(cpu, &cppc_perf); /* - * If the nominal frequency and the nominal performance are not - * zero and the ratio between them is not 100, return the hybrid - * scaling factor. + * Compute the perf-to-frequency scaling factor for the given CPU if + * possible, unless it would be 0. */ - if (!ret && cppc_perf.nominal_perf && cppc_perf.nominal_freq && - cppc_perf.nominal_perf * 100 != cppc_perf.nominal_freq) - return hybrid_scaling_factor; + if (!cppc_get_perf_caps(cpu, &cppc_perf) && + cppc_perf.nominal_perf && cppc_perf.nominal_freq) + return div_u64(cppc_perf.nominal_freq * KHZ_PER_MHZ, + cppc_perf.nominal_perf); return core_get_scaling(); } @@ -2211,24 +2209,30 @@ static void hybrid_get_type(void *data) static int hwp_get_cpu_scaling(int cpu) { - u8 cpu_type = 0; + if (hybrid_scaling_factor) { + u8 cpu_type = 0; - smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); - /* P-cores have a smaller perf level-to-freqency scaling factor. */ - if (cpu_type == 0x40) - return hybrid_scaling_factor; + smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); - /* Use default core scaling for E-cores */ - if (cpu_type == 0x20) + /* + * Return the hybrid scaling factor for P-cores and use the + * default core scaling for E-cores. + */ + if (cpu_type == 0x40) + return hybrid_scaling_factor; + + if (cpu_type == 0x20) + return core_get_scaling(); + } + + /* Use core scaling on non-hybrid systems. */ + if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) return core_get_scaling(); /* - * If reached here, this system is either non-hybrid (like Tiger - * Lake) or hybrid-capable (like Alder Lake or Raptor Lake) with - * no E cores (in which case CPUID for hybrid support is 0). - * - * The CPPC nominal_frequency field is 0 for non-hybrid systems, - * so the default core scaling will be used for them. + * The system is hybrid, but the hybrid scaling factor is not known or + * the CPU type is not one of the above, so use CPPC to compute the + * scaling factor for this CPU. */ return intel_pstate_cppc_get_scaling(cpu); } @@ -3665,6 +3669,11 @@ static const struct x86_cpu_id intel_epp_default[] = { }; static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { + X86_MATCH_VFM(INTEL_ALDERLAKE, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, HYBRID_SCALING_FACTOR_ADL), X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), X86_MATCH_VFM(INTEL_ARROWLAKE, HYBRID_SCALING_FACTOR_MTL), X86_MATCH_VFM(INTEL_LUNARLAKE_M, HYBRID_SCALING_FACTOR_LNL), From 20e20f83dd88a25c1e4ab0a3838e9a4ce583c30d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Dec 2024 12:40:19 +0100 Subject: [PATCH 02/24] cpufreq: intel_pstate: Drop Arrow Lake from "scaling factor" list Since HYBRID_SCALING_FACTOR_MTL is not going to be suitable for Arrow Lake in general, drop it from the "known hybrid scaling factors" list of platforms, so the scaling factor for it will be determined with the help of information provided by the platform firmware via CPPC. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2307515.iZASKD2KPV@rjwysocki.net --- drivers/cpufreq/intel_pstate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e16b27c35cfb..9e14374498d6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3675,7 +3675,6 @@ static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { X86_MATCH_VFM(INTEL_RAPTORLAKE_P, HYBRID_SCALING_FACTOR_ADL), X86_MATCH_VFM(INTEL_RAPTORLAKE_S, HYBRID_SCALING_FACTOR_ADL), X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), - X86_MATCH_VFM(INTEL_ARROWLAKE, HYBRID_SCALING_FACTOR_MTL), X86_MATCH_VFM(INTEL_LUNARLAKE_M, HYBRID_SCALING_FACTOR_LNL), {} }; From 16c977f8177f9c2ecb88319c944722107c952731 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:38 +0000 Subject: [PATCH 03/24] cpufreq/amd-pstate: Convert the amd_pstate_get/set_epp() to static calls MSR and shared memory based systems have different mechanisms to get and set the epp value. Split those mechanisms into different functions and assign them appropriately to the static calls at boot time. This eliminates the need for the "if(cpu_feature_enabled(X86_FEATURE_CPPC))" checks at runtime. Also, propagate the error code from rdmsrl_on_cpu() and cppc_get_epp_perf() to *_get_epp()'s caller, instead of returning -EIO unconditionally. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-2-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 94 +++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 33 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 66e5dfc711c0..bc42d96984f4 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -180,26 +180,40 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); -static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) { u64 epp; int ret; - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - if (!cppc_req_cached) { - epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - &cppc_req_cached); - if (epp) - return epp; - } - epp = (cppc_req_cached >> 24) & 0xFF; - } else { - ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (!cppc_req_cached) { + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req_cached); if (ret < 0) { pr_debug("Could not retrieve energy perf value (%d)\n", ret); - return -EIO; + return ret; } } + epp = (cppc_req_cached >> 24) & 0xFF; + + return (s16)epp; +} + +DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); + +static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +{ + return static_call(amd_pstate_get_epp)(cpudata, cppc_req_cached); +} + +static s16 shmem_get_epp(struct amd_cpudata *cpudata, u64 dummy) +{ + u64 epp; + int ret; + + ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; + } return (s16)(epp & 0xff); } @@ -253,33 +267,45 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, max_perf, fast_switch); } -static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + int ret; + + u64 value = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + if (!ret) + cpudata->epp_cached = epp; + + return ret; +} + +DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); + +static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + return static_call(amd_pstate_set_epp)(cpudata, epp); +} + +static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) { int ret; struct cppc_perf_ctrls perf_ctrls; - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - u64 value = READ_ONCE(cpudata->cppc_req_cached); + amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, false); - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - WRITE_ONCE(cpudata->cppc_req_cached, value); - - ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - if (!ret) - cpudata->epp_cached = epp; - } else { - amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, false); - - perf_ctrls.energy_perf = epp; - ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - if (ret) { - pr_debug("failed to set energy perf value (%d)\n", ret); - return ret; - } - cpudata->epp_cached = epp; + perf_ctrls.energy_perf = epp; + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (ret) { + pr_debug("failed to set energy perf value (%d)\n", ret); + return ret; } + cpudata->epp_cached = epp; return ret; } @@ -1869,6 +1895,8 @@ static int __init amd_pstate_init(void) static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); static_call_update(amd_pstate_init_perf, shmem_init_perf); static_call_update(amd_pstate_update_perf, shmem_update_perf); + static_call_update(amd_pstate_get_epp, shmem_get_epp); + static_call_update(amd_pstate_set_epp, shmem_set_epp); } if (amd_pstate_prefcore) { From 57a2b25e45cd40eaa2e505452384fa1b7248895a Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:39 +0000 Subject: [PATCH 04/24] cpufreq/amd-pstate: Move the invocation of amd_pstate_update_perf() amd_pstate_update_perf() should not be a part of shmem_set_epp() function, so move it to the amd_pstate_epp_update_limit() function, where it is needed. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-3-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bc42d96984f4..bd3e0f113a88 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -296,9 +296,6 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) int ret; struct cppc_perf_ctrls perf_ctrls; - amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, false); - perf_ctrls.energy_perf = epp; ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); if (ret) { @@ -1600,6 +1597,10 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) epp = 0; WRITE_ONCE(cpudata->cppc_req_cached, value); + + amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, false); + return amd_pstate_set_epp(cpudata, epp); } From b1089e0c8817fda93d474eaa82ad86386887aefe Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:40 +0000 Subject: [PATCH 05/24] cpufreq/amd-pstate: Refactor amd_pstate_epp_reenable() and amd_pstate_epp_offline() Replace similar code chunks with amd_pstate_update_perf() and amd_pstate_set_epp() function calls. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-4-Dhananjay.Ugwekar@amd.com [ML: Fix LKP reported error about unused variable] Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 36 ++++++------------------------------ 1 file changed, 6 insertions(+), 30 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bd3e0f113a88..23c5840dc406 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1632,25 +1632,17 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) { - struct cppc_perf_ctrls perf_ctrls; - u64 value, max_perf; + u64 max_perf; int ret; ret = amd_pstate_cppc_enable(true); if (ret) pr_err("failed to enable amd pstate during resume, return %d\n", ret); - value = READ_ONCE(cpudata->cppc_req_cached); max_perf = READ_ONCE(cpudata->highest_perf); - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.max_perf = max_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); - cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - } + amd_pstate_update_perf(cpudata, 0, 0, max_perf, false); + amd_pstate_set_epp(cpudata, cpudata->epp_cached); } static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) @@ -1670,31 +1662,15 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) static void amd_pstate_epp_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - struct cppc_perf_ctrls perf_ctrls; int min_perf; - u64 value; min_perf = READ_ONCE(cpudata->lowest_perf); - value = READ_ONCE(cpudata->cppc_req_cached); mutex_lock(&amd_pstate_limits_lock); - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; - /* Set max perf same as min perf */ - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(min_perf); - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.desired_perf = 0; - perf_ctrls.min_perf = min_perf; - perf_ctrls.max_perf = min_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); - cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - } + amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); + amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); + mutex_unlock(&amd_pstate_limits_lock); } From b78f8c87ec3e7499bb049986838636d3afbc7ece Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:41 +0000 Subject: [PATCH 06/24] cpufreq/amd-pstate: Remove the cppc_state check in offline/online functions Only amd_pstate_epp driver (i.e. cppc_state = ACTIVE) enters the amd_pstate_epp_offline() and amd_pstate_epp_cpu_online() functions, so remove the unnecessary if condition checking if cppc_state is equal to AMD_PSTATE_ACTIVE. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-5-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 23c5840dc406..9b8d7f299fca 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1651,10 +1651,8 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - if (cppc_state == AMD_PSTATE_ACTIVE) { - amd_pstate_epp_reenable(cpudata); - cpudata->suspended = false; - } + amd_pstate_epp_reenable(cpudata); + cpudata->suspended = false; return 0; } @@ -1683,8 +1681,7 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) if (cpudata->suspended) return 0; - if (cppc_state == AMD_PSTATE_ACTIVE) - amd_pstate_epp_offline(policy); + amd_pstate_epp_offline(policy); return 0; } From 53ec2101dfede8fecdd240662281a12e537c3411 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:42 +0000 Subject: [PATCH 07/24] cpufreq/amd-pstate: Merge amd_pstate_epp_cpu_offline() and amd_pstate_epp_offline() amd_pstate_epp_offline() is only called from within amd_pstate_epp_cpu_offline() and doesn't make much sense to have it at all. Hence, remove it. Also remove the unncessary debug print in the offline path while at it. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241204144842.164178-6-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9b8d7f299fca..8ce754ead328 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1657,11 +1657,14 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) return 0; } -static void amd_pstate_epp_offline(struct cpufreq_policy *policy) +static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; int min_perf; + if (cpudata->suspended) + return 0; + min_perf = READ_ONCE(cpudata->lowest_perf); mutex_lock(&amd_pstate_limits_lock); @@ -1670,18 +1673,6 @@ static void amd_pstate_epp_offline(struct cpufreq_policy *policy) amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); mutex_unlock(&amd_pstate_limits_lock); -} - -static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) -{ - struct amd_cpudata *cpudata = policy->driver_data; - - pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); - - if (cpudata->suspended) - return 0; - - amd_pstate_epp_offline(policy); return 0; } From 4dcd130151a654108a414b298df9e21a0d3575c9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:36 -0600 Subject: [PATCH 08/24] cpufreq/amd-pstate: Add trace event for EPP perf updates In "active" mode the most important thing for debugging whether an issue is hardware or software based is to look at what was the last thing written to the CPPC request MSR or shared memory region. The 'amd_pstate_epp_perf' trace event shows the values being written for all CPUs. Reviewed-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-4-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-trace.h | 45 ++++++++++++++++++++++++++++++ drivers/cpufreq/amd-pstate.c | 28 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index 35f38ae67fb1..e2221a4b6901 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -88,6 +88,51 @@ TRACE_EVENT(amd_pstate_perf, ) ); +TRACE_EVENT(amd_pstate_epp_perf, + + TP_PROTO(unsigned int cpu_id, + unsigned int highest_perf, + unsigned int epp, + unsigned int min_perf, + unsigned int max_perf, + bool boost + ), + + TP_ARGS(cpu_id, + highest_perf, + epp, + min_perf, + max_perf, + boost), + + TP_STRUCT__entry( + __field(unsigned int, cpu_id) + __field(unsigned int, highest_perf) + __field(unsigned int, epp) + __field(unsigned int, min_perf) + __field(unsigned int, max_perf) + __field(bool, boost) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + __entry->highest_perf = highest_perf; + __entry->epp = epp; + __entry->min_perf = min_perf; + __entry->max_perf = max_perf; + __entry->boost = boost; + ), + + TP_printk("cpu%u: [%u<->%u]/%u, epp=%u, boost=%u", + (unsigned int)__entry->cpu_id, + (unsigned int)__entry->min_perf, + (unsigned int)__entry->max_perf, + (unsigned int)__entry->highest_perf, + (unsigned int)__entry->epp, + (bool)__entry->boost + ) +); + #endif /* _AMD_PSTATE_TRACE_H */ /* This part must be outside protection */ diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 8ce754ead328..20d52bce1882 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -324,6 +324,14 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, return -EBUSY; } + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + epp, + AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), + AMD_CPPC_MAX_PERF(cpudata->cppc_req_cached), + cpudata->boost_state); + } + ret = amd_pstate_set_epp(cpudata, epp); return ret; @@ -1598,6 +1606,13 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) WRITE_ONCE(cpudata->cppc_req_cached, value); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, + cpudata->min_limit_perf, + cpudata->max_limit_perf, + policy->boost_enabled); + } + amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, cpudata->max_limit_perf, false); @@ -1641,6 +1656,13 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) max_perf = READ_ONCE(cpudata->highest_perf); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + cpudata->epp_cached, + AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), + max_perf, cpudata->boost_state); + } + amd_pstate_update_perf(cpudata, 0, 0, max_perf, false); amd_pstate_set_epp(cpudata, cpudata->epp_cached); } @@ -1669,6 +1691,12 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) mutex_lock(&amd_pstate_limits_lock); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + AMD_CPPC_EPP_BALANCE_POWERSAVE, + min_perf, min_perf, policy->boost_enabled); + } + amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); From 6c093d5a5b73ec1caf1e706510ae6031af2f9d43 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:37 -0600 Subject: [PATCH 09/24] cpufreq/amd-pstate: convert mutex use to guard() Using scoped guard declaration will unlock mutexes automatically. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-5-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 20d52bce1882..bcb9367aa9ca 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -758,12 +758,12 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) pr_err("Boost mode is not supported by this processor or SBIOS\n"); return -EOPNOTSUPP; } - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); + ret = amd_pstate_cpu_boost_update(policy, state); WRITE_ONCE(cpudata->boost_state, !ret ? state : false); policy->boost_enabled = !ret ? state : false; refresh_frequency_limits(policy); - mutex_unlock(&amd_pstate_driver_lock); return ret; } @@ -854,7 +854,8 @@ static void amd_pstate_update_limits(unsigned int cpu) if (!amd_pstate_prefcore) return; - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); + ret = amd_get_highest_perf(cpu, &cur_high); if (ret) goto free_cpufreq_put; @@ -874,7 +875,6 @@ free_cpufreq_put: if (!highest_perf_changed) cpufreq_update_policy(cpu); - mutex_unlock(&amd_pstate_driver_lock); } /* @@ -1203,11 +1203,11 @@ static ssize_t store_energy_performance_preference( if (ret < 0) return -EINVAL; - mutex_lock(&amd_pstate_limits_lock); - ret = amd_pstate_set_energy_pref_index(cpudata, ret); - mutex_unlock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); - return ret ?: count; + ret = amd_pstate_set_energy_pref_index(cpudata, ret); + + return ret ? ret : count; } static ssize_t show_energy_performance_preference( @@ -1371,13 +1371,10 @@ EXPORT_SYMBOL_GPL(amd_pstate_update_status); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { - ssize_t ret; - mutex_lock(&amd_pstate_driver_lock); - ret = amd_pstate_show_status(buf); - mutex_unlock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); - return ret; + return amd_pstate_show_status(buf); } static ssize_t status_store(struct device *a, struct device_attribute *b, @@ -1386,9 +1383,8 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, char *p = memchr(buf, '\n', count); int ret; - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_update_status(buf, p ? p - buf : count); - mutex_unlock(&amd_pstate_driver_lock); return ret < 0 ? ret : count; } @@ -1689,7 +1685,7 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) min_perf = READ_ONCE(cpudata->lowest_perf); - mutex_lock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); if (trace_amd_pstate_epp_perf_enabled()) { trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, @@ -1700,8 +1696,6 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); - mutex_unlock(&amd_pstate_limits_lock); - return 0; } @@ -1730,13 +1724,11 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata->suspended) { - mutex_lock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); /* enable amd pstate from suspend state*/ amd_pstate_epp_reenable(cpudata); - mutex_unlock(&amd_pstate_limits_lock); - cpudata->suspended = false; } From 3b43739824a6b617d8213dd2bce6fb1b2747c377 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:38 -0600 Subject: [PATCH 10/24] cpufreq/amd-pstate: Drop cached epp_policy variable epp_policy is not used by any of the current code and there is no need to cache it. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-6-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 3 --- drivers/cpufreq/amd-pstate.h | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bcb9367aa9ca..0e77584dfade 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1478,7 +1478,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return -ENOMEM; cpudata->cpu = policy->cpu; - cpudata->epp_policy = 0; ret = amd_pstate_init_perf(cpudata); if (ret) @@ -1585,8 +1584,6 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) value &= ~AMD_CPPC_DES_PERF(~0L); value |= AMD_CPPC_DES_PERF(0); - cpudata->epp_policy = cpudata->policy; - /* Get BIOS pre-defined epp value */ epp = amd_pstate_get_epp(cpudata, value); if (epp < 0) { diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index cd573bc6b6db..7765c82f975c 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -57,7 +57,6 @@ struct amd_aperf_mperf { * @hw_prefcore: check whether HW supports preferred core featue. * Only when hw_prefcore and early prefcore param are true, * AMD P-State driver supports preferred core featue. - * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value @@ -94,7 +93,6 @@ struct amd_cpudata { bool hw_prefcore; /* EPP feature related attributes*/ - s16 epp_policy; s16 epp_cached; u32 policy; u64 cppc_cap1_cached; From 88a95ba066a962d4d39c6a36b18bf665f51d3767 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:39 -0600 Subject: [PATCH 11/24] cpufreq/amd-pstate: Use FIELD_PREP and FIELD_GET macros The FIELD_PREP and FIELD_GET macros improve readability and help to avoid shifting bugs. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-7-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 51 ++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 0e77584dfade..fbd1b36846c5 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -88,6 +89,11 @@ static bool cppc_enabled; static bool amd_pstate_prefcore = true; static struct quirk_entry *quirks; +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) + /* * AMD Energy Preference Performance (EPP) * The EPP is used in the CCLK DPM controller to drive @@ -182,7 +188,6 @@ static DEFINE_MUTEX(amd_pstate_driver_lock); static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) { - u64 epp; int ret; if (!cppc_req_cached) { @@ -192,9 +197,8 @@ static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) return ret; } } - epp = (cppc_req_cached >> 24) & 0xFF; - return (s16)epp; + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cppc_req_cached); } DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); @@ -269,12 +273,11 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) { + u64 value = READ_ONCE(cpudata->cppc_req_cached); int ret; - u64 value = READ_ONCE(cpudata->cppc_req_cached); - - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; + value &= ~AMD_CPPC_EPP_PERF_MASK; + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); @@ -327,8 +330,8 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, if (trace_amd_pstate_epp_perf_enabled()) { trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, - AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), - AMD_CPPC_MAX_PERF(cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), cpudata->boost_state); } @@ -542,18 +545,15 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, des_perf = 0; } - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(des_perf); - /* limit the max perf when core performance boost feature is disabled */ if (!cpudata->boost_supported) max_perf = min_t(unsigned long, nominal_perf, max_perf); - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, @@ -1573,16 +1573,11 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) min_perf = min(cpudata->nominal_perf, max_perf); - /* Initial min/max values for CPPC Performance Controls Register */ - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); - - /* CPPC EPP feature require to set zero to the desire perf bit */ - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(0); + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); /* Get BIOS pre-defined epp value */ epp = amd_pstate_get_epp(cpudata, value); @@ -1652,7 +1647,7 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) if (trace_amd_pstate_epp_perf_enabled()) { trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, cpudata->epp_cached, - AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), max_perf, cpudata->boost_state); } From 474e7218e81e7932ed18f91969b72169005ff038 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:40 -0600 Subject: [PATCH 12/24] cpufreq/amd-pstate: Only update the cached value in msr_set_epp() on success If writing the MSR MSR_AMD_CPPC_REQ fails then the cached value in the amd_cpudata structure should not be updated. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-8-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index fbd1b36846c5..ebfc9e20b6cb 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -278,11 +278,15 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) value &= ~AMD_CPPC_EPP_PERF_MASK; value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); - WRITE_ONCE(cpudata->cppc_req_cached, value); ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - if (!ret) - cpudata->epp_cached = epp; + if (ret) { + pr_err("failed to set energy perf value (%d)\n", ret); + return ret; + } + + cpudata->epp_cached = epp; + WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; } From 68cb0e77b6439fea64c6907c563b7bd27f2ee57f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:41 -0600 Subject: [PATCH 13/24] cpufreq/amd-pstate: store all values in cpudata struct in khz Storing values in the cpudata structure in different units leads to confusion and hardcoded conversions elsewhere. After ratios are calculated store everything in khz for any future use. Adjust all relevant consumers for this change as well. Suggested-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-9-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 12 +++++------- drivers/cpufreq/amd-pstate.c | 28 ++++++++++++++-------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index a261d7300951..3a0a380c3590 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -207,7 +207,6 @@ static void amd_pstate_ut_check_freq(u32 index) int cpu = 0; struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata = NULL; - u32 nominal_freq_khz; for_each_possible_cpu(cpu) { policy = cpufreq_cpu_get(cpu); @@ -215,14 +214,13 @@ static void amd_pstate_ut_check_freq(u32 index) break; cpudata = policy->driver_data; - nominal_freq_khz = cpudata->nominal_freq*1000; - if (!((cpudata->max_freq >= nominal_freq_khz) && - (nominal_freq_khz > cpudata->lowest_nonlinear_freq) && + if (!((cpudata->max_freq >= cpudata->nominal_freq) && + (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && (cpudata->lowest_nonlinear_freq > cpudata->min_freq) && (cpudata->min_freq > 0))) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", - __func__, cpu, cpudata->max_freq, nominal_freq_khz, + __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, cpudata->lowest_nonlinear_freq, cpudata->min_freq); goto skip_test; } @@ -236,13 +234,13 @@ static void amd_pstate_ut_check_freq(u32 index) if (cpudata->boost_supported) { if ((policy->max == cpudata->max_freq) || - (policy->max == nominal_freq_khz)) + (policy->max == cpudata->nominal_freq)) amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; else { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", __func__, cpu, policy->max, cpudata->max_freq, - nominal_freq_khz); + cpudata->nominal_freq); goto skip_test; } } else { diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ebfc9e20b6cb..bf2512e1f117 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -739,8 +739,8 @@ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) if (on) policy->cpuinfo.max_freq = max_freq; - else if (policy->cpuinfo.max_freq > nominal_freq * 1000) - policy->cpuinfo.max_freq = nominal_freq * 1000; + else if (policy->cpuinfo.max_freq > nominal_freq) + policy->cpuinfo.max_freq = nominal_freq; policy->max = policy->cpuinfo.max_freq; @@ -940,29 +940,29 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) return ret; if (quirks && quirks->lowest_freq) - min_freq = quirks->lowest_freq * 1000; + min_freq = quirks->lowest_freq; else - min_freq = cppc_perf.lowest_freq * 1000; + min_freq = cppc_perf.lowest_freq; if (quirks && quirks->nominal_freq) - nominal_freq = quirks->nominal_freq ; + nominal_freq = quirks->nominal_freq; else nominal_freq = cppc_perf.nominal_freq; nominal_perf = READ_ONCE(cpudata->nominal_perf); boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT); - WRITE_ONCE(cpudata->min_freq, min_freq); - WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); - WRITE_ONCE(cpudata->nominal_freq, nominal_freq); - WRITE_ONCE(cpudata->max_freq, max_freq); + WRITE_ONCE(cpudata->min_freq, min_freq * 1000); + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); + WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); + WRITE_ONCE(cpudata->max_freq, max_freq * 1000); /** * Below values need to be initialized correctly, otherwise driver will fail to load @@ -972,13 +972,13 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) */ if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", - min_freq, max_freq, nominal_freq * 1000); + min_freq, max_freq, nominal_freq); return -EINVAL; } - if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) { + if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) { pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", - lowest_nonlinear_freq, min_freq, nominal_freq * 1000); + lowest_nonlinear_freq, min_freq, nominal_freq); return -EINVAL; } From 942718f2a236cb3b27d2dbb5942538681b6e0e88 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:42 -0600 Subject: [PATCH 14/24] cpufreq/amd-pstate: Change amd_pstate_update_perf() to return an int As msr_update_perf() calls an MSR it's possible that it fails. Pass this return code up to the caller. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-10-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bf2512e1f117..d279ace500d7 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -251,24 +251,26 @@ static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) return index; } -static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, +static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { - if (fast_switch) + if (fast_switch) { wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); - else - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - READ_ONCE(cpudata->cppc_req_cached)); + return 0; + } + + return wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, + READ_ONCE(cpudata->cppc_req_cached)); } DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); -static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, +static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { - static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, - max_perf, fast_switch); + return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, + max_perf, fast_switch); } static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) @@ -480,7 +482,7 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static void shmem_update_perf(struct amd_cpudata *cpudata, +static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { @@ -490,7 +492,7 @@ static void shmem_update_perf(struct amd_cpudata *cpudata, perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); + return cppc_set_perf(cpudata->cpu, &perf_ctrls); } static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) From 3f7b835fa4d0d06f82249a3aca989fdf9bdf4656 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:43 -0600 Subject: [PATCH 15/24] cpufreq/amd-pstate: Move limit updating code The limit updating code in amd_pstate_epp_update_limit() should not only apply to EPP updates. Move it to amd_pstate_update_min_max_limit() so other callers can benefit as well. With this move it's not necessary to have clamp_t calls anymore because the verify callback is called when setting limits. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-11-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d279ace500d7..55529e32d325 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -537,10 +537,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); u64 value = prev; - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); max_freq = READ_ONCE(cpudata->max_limit_freq); @@ -607,7 +603,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; + u32 max_limit_perf, min_limit_perf, max_perf, max_freq; struct amd_cpudata *cpudata = policy->driver_data; max_perf = READ_ONCE(cpudata->highest_perf); @@ -615,12 +611,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * max_perf, max_freq); min_limit_perf = div_u64(policy->min * max_perf, max_freq); - lowest_perf = READ_ONCE(cpudata->lowest_perf); - if (min_limit_perf < lowest_perf) - min_limit_perf = lowest_perf; - - if (max_limit_perf < min_limit_perf) - max_limit_perf = min_limit_perf; + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); @@ -1562,28 +1554,18 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u32 max_perf, min_perf; u64 value; s16 epp; - max_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_perf); amd_pstate_update_min_max_limit(policy); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); value = READ_ONCE(cpudata->cppc_req_cached); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - min_perf = min(cpudata->nominal_perf, max_perf); - value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | AMD_CPPC_DES_PERF_MASK); - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); /* Get BIOS pre-defined epp value */ epp = amd_pstate_get_epp(cpudata, value); From b3781f30bfcfd7db12de2595adb01779e565e1c6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:44 -0600 Subject: [PATCH 16/24] cpufreq/amd-pstate: Cache EPP value and use that everywhere Cache the value in cpudata->epp_cached, and use that for all callers. As all callers use cached value merge amd_pstate_get_energy_pref_index() into show_energy_performance_preference(). Check if the EPP value is changed before writing it to MSR or shared memory region. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-12-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 105 ++++++++++++++--------------------- 1 file changed, 43 insertions(+), 62 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 55529e32d325..d1f82e4ca9b1 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -186,29 +186,28 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); -static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static s16 msr_get_epp(struct amd_cpudata *cpudata) { + u64 value; int ret; - if (!cppc_req_cached) { - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req_cached); - if (ret < 0) { - pr_debug("Could not retrieve energy perf value (%d)\n", ret); - return ret; - } + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; } - return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cppc_req_cached); + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value); } DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); -static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) { - return static_call(amd_pstate_get_epp)(cpudata, cppc_req_cached); + return static_call(amd_pstate_get_epp)(cpudata); } -static s16 shmem_get_epp(struct amd_cpudata *cpudata, u64 dummy) +static s16 shmem_get_epp(struct amd_cpudata *cpudata) { u64 epp; int ret; @@ -222,35 +221,6 @@ static s16 shmem_get_epp(struct amd_cpudata *cpudata, u64 dummy) return (s16)(epp & 0xff); } -static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) -{ - s16 epp; - int index = -EINVAL; - - epp = amd_pstate_get_epp(cpudata, 0); - if (epp < 0) - return epp; - - switch (epp) { - case AMD_CPPC_EPP_PERFORMANCE: - index = EPP_INDEX_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_PERFORMANCE: - index = EPP_INDEX_BALANCE_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_POWERSAVE: - index = EPP_INDEX_BALANCE_POWERSAVE; - break; - case AMD_CPPC_EPP_POWERSAVE: - index = EPP_INDEX_POWERSAVE; - break; - default: - break; - } - - return index; -} - static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { @@ -275,19 +245,23 @@ static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) { - u64 value = READ_ONCE(cpudata->cppc_req_cached); + u64 value, prev; int ret; + value = prev = READ_ONCE(cpudata->cppc_req_cached); value &= ~AMD_CPPC_EPP_PERF_MASK; value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (value == prev) + return 0; + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); if (ret) { pr_err("failed to set energy perf value (%d)\n", ret); return ret; } - cpudata->epp_cached = epp; + WRITE_ONCE(cpudata->epp_cached, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; @@ -305,13 +279,16 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) int ret; struct cppc_perf_ctrls perf_ctrls; + if (epp == cpudata->epp_cached) + return 0; + perf_ctrls.energy_perf = epp; ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); if (ret) { pr_debug("failed to set energy perf value (%d)\n", ret); return ret; } - cpudata->epp_cached = epp; + WRITE_ONCE(cpudata->epp_cached, epp); return ret; } @@ -1214,9 +1191,22 @@ static ssize_t show_energy_performance_preference( struct amd_cpudata *cpudata = policy->driver_data; int preference; - preference = amd_pstate_get_energy_pref_index(cpudata); - if (preference < 0) - return preference; + switch (cpudata->epp_cached) { + case AMD_CPPC_EPP_PERFORMANCE: + preference = EPP_INDEX_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_PERFORMANCE: + preference = EPP_INDEX_BALANCE_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_POWERSAVE: + preference = EPP_INDEX_BALANCE_POWERSAVE; + break; + case AMD_CPPC_EPP_POWERSAVE: + preference = EPP_INDEX_POWERSAVE; + break; + default: + return -EINVAL; + } return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); } @@ -1501,7 +1491,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; - cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0); + cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata); policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; @@ -1555,35 +1545,26 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; u64 value; - s16 epp; amd_pstate_update_min_max_limit(policy); value = READ_ONCE(cpudata->cppc_req_cached); value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | - AMD_CPPC_DES_PERF_MASK); + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); - /* Get BIOS pre-defined epp value */ - epp = amd_pstate_get_epp(cpudata, value); - if (epp < 0) { - /** - * This return value can only be negative for shared_memory - * systems where EPP register read/write not supported. - */ - return epp; - } - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - epp = 0; + WRITE_ONCE(cpudata->epp_cached, 0); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, cpudata->epp_cached); WRITE_ONCE(cpudata->cppc_req_cached, value); if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + cpudata->epp_cached, cpudata->min_limit_perf, cpudata->max_limit_perf, policy->boost_enabled); @@ -1592,7 +1573,7 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, cpudata->max_limit_perf, false); - return amd_pstate_set_epp(cpudata, epp); + return amd_pstate_set_epp(cpudata, READ_ONCE(cpudata->epp_cached)); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) From fff395796917ac3fe3b4c4607cb74a8dbdc17593 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:45 -0600 Subject: [PATCH 17/24] cpufreq/amd-pstate: Always write EPP value when updating perf For MSR systems the EPP value is in the same register as perf targets and so divding them into two separate MSR writes is wasteful. In msr_update_perf(), update both EPP and perf values in one write to MSR_AMD_CPPC_REQ, and cache them if successful. To accomplish this plumb the EPP value into the update_perf call and modify all its callers to check the return value. As this unifies calls, ensure that the MSR write is necessary before flushing a write out. Also drop the comparison from the passive flow tracing. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-13-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-trace.h | 7 +- drivers/cpufreq/amd-pstate.c | 110 +++++++++++++++-------------- 2 files changed, 57 insertions(+), 60 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index e2221a4b6901..8d692415d905 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -32,7 +32,6 @@ TRACE_EVENT(amd_pstate_perf, u64 aperf, u64 tsc, unsigned int cpu_id, - bool changed, bool fast_switch ), @@ -44,7 +43,6 @@ TRACE_EVENT(amd_pstate_perf, aperf, tsc, cpu_id, - changed, fast_switch ), @@ -57,7 +55,6 @@ TRACE_EVENT(amd_pstate_perf, __field(unsigned long long, aperf) __field(unsigned long long, tsc) __field(unsigned int, cpu_id) - __field(bool, changed) __field(bool, fast_switch) ), @@ -70,11 +67,10 @@ TRACE_EVENT(amd_pstate_perf, __entry->aperf = aperf; __entry->tsc = tsc; __entry->cpu_id = cpu_id; - __entry->changed = changed; __entry->fast_switch = fast_switch; ), - TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s", + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u fast_switch=%s", (unsigned long)__entry->min_perf, (unsigned long)__entry->target_perf, (unsigned long)__entry->capacity, @@ -83,7 +79,6 @@ TRACE_EVENT(amd_pstate_perf, (unsigned long long)__entry->aperf, (unsigned long long)__entry->tsc, (unsigned int)__entry->cpu_id, - (__entry->changed) ? "true" : "false", (__entry->fast_switch) ? "true" : "false" ) ); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d1f82e4ca9b1..419790e52d91 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -222,25 +222,47 @@ static s16 shmem_get_epp(struct amd_cpudata *cpudata) } static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) + u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { - if (fast_switch) { - wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); + u64 value, prev; + + value = prev = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (value == prev) return 0; + + if (fast_switch) { + wrmsrl(MSR_AMD_CPPC_REQ, value); + return 0; + } else { + int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + + if (ret) + return ret; } - return wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - READ_ONCE(cpudata->cppc_req_cached)); + WRITE_ONCE(cpudata->cppc_req_cached, value); + WRITE_ONCE(cpudata->epp_cached, epp); + + return 0; } DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) + u32 max_perf, u32 epp, + bool fast_switch) { return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, - max_perf, fast_switch); + max_perf, epp, fast_switch); } static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) @@ -261,6 +283,7 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) return ret; } + /* update both so that msr_update_perf() can effectively check */ WRITE_ONCE(cpudata->epp_cached, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); @@ -459,12 +482,18 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static int shmem_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) +static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { struct cppc_perf_ctrls perf_ctrls; + if (cppc_state == AMD_PSTATE_ACTIVE) { + int ret = shmem_set_epp(cpudata, epp); + + if (ret) + return ret; + } + perf_ctrls.max_perf = max_perf; perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; @@ -510,9 +539,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, { unsigned long max_freq; struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); - u64 prev = READ_ONCE(cpudata->cppc_req_cached); u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); - u64 value = prev; des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); @@ -528,27 +555,14 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, if (!cpudata->boost_supported) max_perf = min_t(unsigned long, nominal_perf, max_perf); - value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | - AMD_CPPC_DES_PERF_MASK); - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); - value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); - if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, - cpudata->cpu, (value != prev), fast_switch); + cpudata->cpu, fast_switch); } - if (value == prev) - goto cpufreq_policy_put; + amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); - WRITE_ONCE(cpudata->cppc_req_cached, value); - - amd_pstate_update_perf(cpudata, min_perf, des_perf, - max_perf, fast_switch); - -cpufreq_policy_put: cpufreq_cpu_put(policy); } @@ -1544,36 +1558,24 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u64 value; + u32 epp; amd_pstate_update_min_max_limit(policy); - value = READ_ONCE(cpudata->cppc_req_cached); - - value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | - AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); - value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - WRITE_ONCE(cpudata->epp_cached, 0); - value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, cpudata->epp_cached); - - WRITE_ONCE(cpudata->cppc_req_cached, value); + epp = 0; + else + epp = READ_ONCE(cpudata->epp_cached); if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, - cpudata->epp_cached, + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, cpudata->min_limit_perf, cpudata->max_limit_perf, policy->boost_enabled); } - amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, false); - - return amd_pstate_set_epp(cpudata, READ_ONCE(cpudata->epp_cached)); + return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, epp, false); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) @@ -1602,7 +1604,7 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) +static int amd_pstate_epp_reenable(struct amd_cpudata *cpudata) { u64 max_perf; int ret; @@ -1620,17 +1622,19 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) max_perf, cpudata->boost_state); } - amd_pstate_update_perf(cpudata, 0, 0, max_perf, false); - amd_pstate_set_epp(cpudata, cpudata->epp_cached); + return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); } static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + int ret; pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - amd_pstate_epp_reenable(cpudata); + ret = amd_pstate_epp_reenable(cpudata); + if (ret) + return ret; cpudata->suspended = false; return 0; @@ -1654,10 +1658,8 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) min_perf, min_perf, policy->boost_enabled); } - amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); - amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); - - return 0; + return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, + AMD_CPPC_EPP_BALANCE_POWERSAVE, false); } static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) From f8fde687c911a366a6132aed85f4ee6b647b9160 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:46 -0600 Subject: [PATCH 18/24] cpufreq/amd-pstate: Drop ret variable from amd_pstate_set_energy_pref_index() The ret variable is not necessary. Reviewed-and-tested-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-14-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 419790e52d91..4d665d9c76d3 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -319,13 +319,11 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, int pref_index) { - int epp = -EINVAL; - int ret; + int epp; if (!pref_index) epp = cpudata->epp_default; - - if (epp == -EINVAL) + else epp = epp_values[pref_index]; if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { @@ -341,9 +339,7 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, cpudata->boost_state); } - ret = amd_pstate_set_epp(cpudata, epp); - - return ret; + return amd_pstate_set_epp(cpudata, epp); } static inline int msr_cppc_enable(bool enable) From f9a378ff6443cdcd4387e5dbb76fa5fa549a83ec Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:47 -0600 Subject: [PATCH 19/24] cpufreq/amd-pstate: Set different default EPP policy for Epyc and Ryzen For Ryzen systems the EPP policy set by the BIOS is generally configured to performance as this is the default register value for the CPPC request MSR. If a user doesn't use additional software to configure EPP then the system will default biased towards performance and consume extra battery. Instead configure the default to "balanced_performance" for this case. Suggested-by: Artem S. Tashkinov Reviewed-by: Dhananjay Ugwekar Tested-by: Dhananjay Ugwekar Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219526 Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-15-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 4d665d9c76d3..97aee213821d 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1501,8 +1501,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; - cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata); - policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; @@ -1513,10 +1511,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) * the default cpufreq governor is neither powersave nor performance. */ if (amd_pstate_acpi_pm_profile_server() || - amd_pstate_acpi_pm_profile_undefined()) + amd_pstate_acpi_pm_profile_undefined()) { policy->policy = CPUFREQ_POLICY_PERFORMANCE; - else + cpudata->epp_default = amd_pstate_get_epp(cpudata); + } else { policy->policy = CPUFREQ_POLICY_POWERSAVE; + cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; + } if (cpu_feature_enabled(X86_FEATURE_CPPC)) { ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); @@ -1529,6 +1530,9 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return ret; WRITE_ONCE(cpudata->cppc_cap1_cached, value); } + ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); + if (ret) + return ret; current_pstate_driver->adjust_perf = NULL; From 95fad7fb58cfaa2a295aa54a1f001a16b9324963 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:48 -0600 Subject: [PATCH 20/24] cpufreq/amd-pstate: Drop boost_state variable Currently boost_state is cached for every processor in cpudata structure and driver boost state is set for every processor. Both of these aren't necessary as the driver only needs to set once and the policy stores whether boost is enabled. Move the driver boost setting to registration and adjust all references to cached value to pull from the policy instead. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-16-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 26 +++++++++++++------------- drivers/cpufreq/amd-pstate.h | 1 - 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 97aee213821d..d7b1de97727a 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -316,9 +316,10 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) return ret; } -static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, - int pref_index) +static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, + int pref_index) { + struct amd_cpudata *cpudata = policy->driver_data; int epp; if (!pref_index) @@ -336,7 +337,7 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, epp, FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), - cpudata->boost_state); + policy->boost_enabled); } return amd_pstate_set_epp(cpudata, epp); @@ -746,7 +747,6 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_cpu_boost_update(policy, state); - WRITE_ONCE(cpudata->boost_state, !ret ? state : false); policy->boost_enabled = !ret ? state : false; refresh_frequency_limits(policy); @@ -768,9 +768,6 @@ static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) goto exit_err; } - /* at least one CPU supports CPB, even if others fail later on to set up */ - current_pstate_driver->boost_enabled = true; - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); if (ret) { pr_err_once("failed to read initial CPU boost state!\n"); @@ -1176,7 +1173,6 @@ static ssize_t show_energy_performance_available_preferences( static ssize_t store_energy_performance_preference( struct cpufreq_policy *policy, const char *buf, size_t count) { - struct amd_cpudata *cpudata = policy->driver_data; char str_preference[21]; ssize_t ret; @@ -1190,7 +1186,7 @@ static ssize_t store_energy_performance_preference( guard(mutex)(&amd_pstate_limits_lock); - ret = amd_pstate_set_energy_pref_index(cpudata, ret); + ret = amd_pstate_set_energy_pref_index(policy, ret); return ret ? ret : count; } @@ -1265,6 +1261,9 @@ static int amd_pstate_register_driver(int mode) return ret; } + /* at least one CPU supports CPB */ + current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); + ret = cpufreq_register_driver(current_pstate_driver); if (ret) { amd_pstate_driver_cleanup(); @@ -1604,8 +1603,9 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static int amd_pstate_epp_reenable(struct amd_cpudata *cpudata) +static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) { + struct amd_cpudata *cpudata = policy->driver_data; u64 max_perf; int ret; @@ -1619,7 +1619,7 @@ static int amd_pstate_epp_reenable(struct amd_cpudata *cpudata) trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, cpudata->epp_cached, FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - max_perf, cpudata->boost_state); + max_perf, policy->boost_enabled); } return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); @@ -1632,7 +1632,7 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - ret = amd_pstate_epp_reenable(cpudata); + ret = amd_pstate_epp_reenable(policy); if (ret) return ret; cpudata->suspended = false; @@ -1690,7 +1690,7 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) guard(mutex)(&amd_pstate_limits_lock); /* enable amd pstate from suspend state*/ - amd_pstate_epp_reenable(cpudata); + amd_pstate_epp_reenable(policy); cpudata->suspended = false; } diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index 7765c82f975c..9747e3be6cee 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -98,7 +98,6 @@ struct amd_cpudata { u64 cppc_cap1_cached; bool suspended; s16 epp_default; - bool boost_state; }; /* From de51589f9bd98efddf4ab776d3a490e81905ef7c Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Wed, 11 Dec 2024 12:26:05 +0000 Subject: [PATCH 21/24] cpufreq: intel_pstate: Use CPUFREQ_POLICY_UNKNOWN epp_policy uses the same values as cpufreq_policy.policy and resets to CPUFREQ_POLICY_UNKNOWN during offlining. Be consistent about it and initialize to CPUFREQ_POLICY_UNKNOWN instead of 0, too. No functional change intended. Signed-off-by: Christian Loehle Link: https://patch.msgid.link/20241211122605.3048503-3-christian.loehle@arm.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9e14374498d6..9c4cc01fd51a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2713,7 +2713,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) } cpu->epp_powersave = -EINVAL; - cpu->epp_policy = 0; + cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN; intel_pstate_get_cpu_pstates(cpu); From 8e461a1cb43d69d2fc8a97e61916dce571e6bb31 Mon Sep 17 00:00:00 2001 From: "Sultan Alsawaf (unemployed)" Date: Wed, 11 Dec 2024 17:57:32 -0800 Subject: [PATCH 22/24] cpufreq: schedutil: Fix superfluous updates caused by need_freq_update A redundant frequency update is only truly needed when there is a policy limits change with a driver that specifies CPUFREQ_NEED_UPDATE_LIMITS. In spite of that, drivers specifying CPUFREQ_NEED_UPDATE_LIMITS receive a frequency update _all the time_, not just for a policy limits change, because need_freq_update is never cleared. Furthermore, ignore_dl_rate_limit()'s usage of need_freq_update also leads to a redundant frequency update, regardless of whether or not the driver specifies CPUFREQ_NEED_UPDATE_LIMITS, when the next chosen frequency is the same as the current one. Fix the superfluous updates by only honoring CPUFREQ_NEED_UPDATE_LIMITS when there's a policy limits change, and clearing need_freq_update when a requisite redundant update occurs. This is neatly achieved by moving up the CPUFREQ_NEED_UPDATE_LIMITS test and instead setting need_freq_update to false in sugov_update_next_freq(). Fixes: 600f5badb78c ("cpufreq: schedutil: Don't skip freq update when limits change") Signed-off-by: Sultan Alsawaf (unemployed) Reviewed-by: Christian Loehle Link: https://patch.msgid.link/20241212015734.41241-2-sultan@kerneltoast.com Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 28c77904ea74..e51d5ce730be 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -83,7 +83,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) if (unlikely(sg_policy->limits_changed)) { sg_policy->limits_changed = false; - sg_policy->need_freq_update = true; + sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); return true; } @@ -96,7 +96,7 @@ static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { if (sg_policy->need_freq_update) - sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); + sg_policy->need_freq_update = false; else if (sg_policy->next_freq == next_freq) return false; From fd604ae6c261c5a56bb977ae99f875bbd7264a3f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 2 Jan 2025 08:12:04 -0600 Subject: [PATCH 23/24] cpufreq/amd-pstate: Fix prefcore rankings commit 50a062a76200 ("cpufreq/amd-pstate: Store the boost numerator as highest perf again") updated the value stored for highest perf to no longer store the highest perf value but instead the boost numerator. This is a fixed value for systems with preferred cores and not appropriate for use ITMT rankings. Update the value used for ITMT rankings to be the preferred core ranking. Reported-and-tested-by: Sebastian Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219640 Fixes: 50a062a76200 ("cpufreq/amd-pstate: Store the boost numerator as highest perf again") Reviewed-by: Dhananjay Ugwekar Link: https://lore.kernel.org/r/20250102141204.3413202-1-superm1@kernel.org Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d7b1de97727a..2330903a8b45 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -815,7 +815,7 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) * sched_set_itmt_support(true) has been called and it is valid to * update them at any time after it has been called. */ - sched_set_itmt_core_prio((int)READ_ONCE(cpudata->highest_perf), cpudata->cpu); + sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu); schedule_work(&sched_prefcore_work); } From 857a61c2ce74e30fc3b10bc89d68ddd8d05b188c Mon Sep 17 00:00:00 2001 From: Naresh Solanki Date: Fri, 20 Dec 2024 01:48:32 +0530 Subject: [PATCH 24/24] cpufreq/amd-pstate: Refactor max frequency calculation The previous approach introduced roundoff errors during division when calculating the boost ratio. This, in turn, affected the maximum frequency calculation, often resulting in reporting lower frequency values. For example, on the Glinda SoC based board with the following parameters: max_perf = 208 nominal_perf = 100 nominal_freq = 2600 MHz The Linux kernel previously calculated the frequency as: freq = ((max_perf * 1024 / nominal_perf) * nominal_freq) / 1024 freq = 5405 MHz // Integer arithmetic. With the updated formula: freq = (max_perf * nominal_freq) / nominal_perf freq = 5408 MHz This change ensures more accurate frequency calculations by eliminating unnecessary shifts and divisions, thereby improving precision. Signed-off-by: Naresh Solanki [ML: trim the changelog from commit message] Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241219201833.2750998-1-naresh.solanki@9elements.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 2330903a8b45..dd9b8d6993d6 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -908,9 +908,8 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { int ret; u32 min_freq, max_freq; - u32 nominal_perf, nominal_freq; + u32 highest_perf, nominal_perf, nominal_freq; u32 lowest_nonlinear_perf, lowest_nonlinear_freq; - u32 boost_ratio, lowest_nonlinear_ratio; struct cppc_perf_caps cppc_perf; ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); @@ -927,16 +926,12 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) else nominal_freq = cppc_perf.nominal_freq; + highest_perf = READ_ONCE(cpudata->highest_perf); nominal_perf = READ_ONCE(cpudata->nominal_perf); - - boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT); + max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); - lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, - nominal_perf); - lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT); - + lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf); WRITE_ONCE(cpudata->min_freq, min_freq * 1000); WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000);