From 022e6f5184ff73eaaf6aa7a89b7fafc7c2bd8c9c Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Fri, 5 Jan 2024 18:11:04 +0000 Subject: [PATCH 1/4] PM: QoS: Use kcalloc() instead of kzalloc() Use 2-factor multiplication argument form kcalloc() instead of kzalloc(). Link: https://github.com/KSPP/linux/issues/162 Signed-off-by: Erick Archer Reviewed-by: Gustavo A. R. Silva Signed-off-by: Rafael J. Wysocki --- drivers/base/power/qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 8e93167f1783..bd77f6734f14 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -201,7 +201,7 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) if (!qos) return -ENOMEM; - n = kzalloc(3 * sizeof(*n), GFP_KERNEL); + n = kcalloc(3, sizeof(*n), GFP_KERNEL); if (!n) { kfree(qos); return -ENOMEM; From bde4f5ff8295601554601f78a523d4d97e42433e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 9 Jan 2024 09:48:48 -0800 Subject: [PATCH 2/4] cpufreq: intel_pstate: Update hybrid scaling factor for Meteor Lake On some Meteor Lake platforms, maximum one core turbo frequency is not observed. During hybrid performance to frequency conversion, the maximum frequency is 100 MHz less. This results in requesting maximum frequency 100 MHz less. For example when the max one core turbo is 4.9 GHz: MSR HWP_CAPABILITIES shows highest performance ratio for P-core is 0x3E. With the current scaling factor of 78741 (1.27x for converting frequency to performance) results in max frequency of 4.8 GHz. This results in capping the max scaling frequency as 4.8 GHz, which is 100 MHz less than the desired. Add capability to define per CPU model specific scaling factor and define scaling factor of 80000 (1.25x for converting frequency to performance for P-cores) for Meteor Lake. Signed-off-by: Srinivas Pandruvada [ rjw: Debug message adjustment, subject edit ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3c69040920b8..2ca70b0b5fdc 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -302,7 +302,10 @@ static bool hwp_forced __read_mostly; static struct cpufreq_driver *intel_pstate_driver __read_mostly; -#define HYBRID_SCALING_FACTOR 78741 +#define HYBRID_SCALING_FACTOR 78741 +#define HYBRID_SCALING_FACTOR_MTL 80000 + +static int hybrid_scaling_factor = HYBRID_SCALING_FACTOR; static inline int core_get_scaling(void) { @@ -422,7 +425,7 @@ static int intel_pstate_cppc_get_scaling(int cpu) */ if (!ret && cppc_perf.nominal_perf && cppc_perf.nominal_freq && cppc_perf.nominal_perf * 100 != cppc_perf.nominal_freq) - return HYBRID_SCALING_FACTOR; + return hybrid_scaling_factor; return core_get_scaling(); } @@ -1968,7 +1971,7 @@ static int hwp_get_cpu_scaling(int cpu) smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); /* P-cores have a smaller perf level-to-freqency scaling factor. */ if (cpu_type == 0x40) - return HYBRID_SCALING_FACTOR; + return hybrid_scaling_factor; /* Use default core scaling for E-cores */ if (cpu_type == 0x20) @@ -3399,6 +3402,11 @@ static const struct x86_cpu_id intel_epp_balance_perf[] = { {} }; +static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), + {} +}; + static int __init intel_pstate_init(void) { static struct cpudata **_all_cpu_data; @@ -3489,9 +3497,16 @@ hwp_cpu_matched: if (hwp_active) { const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf); + const struct x86_cpu_id *hybrid_id = x86_match_cpu(intel_hybrid_scaling_factor); if (id) epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data; + + if (hybrid_id) { + hybrid_scaling_factor = hybrid_id->driver_data; + pr_debug("hybrid scaling factor: %d\n", hybrid_scaling_factor); + } + } mutex_lock(&intel_pstate_driver_lock); From 03c305861c70d6db898dd2379b882e7772a5c5d0 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Tue, 9 Jan 2024 18:57:53 +0100 Subject: [PATCH 3/4] Documentation: admin-guide: PM: Fix two typos Fix two typos in the admin-guide: - a missing e in "reference_perf" in cppc_sysfs.rst. - the amd_pstate sysfs path uses a dash instead of an underscore. Signed-off-by: Erwan Velu [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/acpi/cppc_sysfs.rst | 2 +- Documentation/admin-guide/pm/amd-pstate.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/acpi/cppc_sysfs.rst b/Documentation/admin-guide/acpi/cppc_sysfs.rst index e53d76365aa7..36981c667823 100644 --- a/Documentation/admin-guide/acpi/cppc_sysfs.rst +++ b/Documentation/admin-guide/acpi/cppc_sysfs.rst @@ -75,4 +75,4 @@ taking two different snapshots of feedback counters at time T1 and T2. delivered_counter_delta = fbc_t2[del] - fbc_t1[del] reference_counter_delta = fbc_t2[ref] - fbc_t1[ref] - delivered_perf = (refernce_perf x delivered_counter_delta) / reference_counter_delta + delivered_perf = (reference_perf x delivered_counter_delta) / reference_counter_delta diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 1cf40f69278c..9eb26014d34b 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -361,7 +361,7 @@ Global Attributes ``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to control its functionality at the system level. They are located in the -``/sys/devices/system/cpu/amd-pstate/`` directory and affect all CPUs. +``/sys/devices/system/cpu/amd_pstate/`` directory and affect all CPUs. ``status`` Operation mode of the driver: "active", "passive" or "disable". From 3e999770ac1c7c31a70685dd5b88e89473509e9c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 9 Jan 2024 17:59:22 +0100 Subject: [PATCH 4/4] PM: sleep: Restore asynchronous device resume optimization Before commit 7839d0078e0d ("PM: sleep: Fix possible deadlocks in core system-wide PM code"), the resume of devices that were allowed to resume asynchronously was scheduled before starting the resume of the other devices, so the former did not have to wait for the latter unless functional dependencies were present. Commit 7839d0078e0d removed that optimization in order to address a correctness issue, but it can be restored with the help of a new device power management flag, so do that now. Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka --- drivers/base/power/main.c | 117 +++++++++++++++++++++----------------- include/linux/pm.h | 1 + 2 files changed, 65 insertions(+), 53 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 9c5a5f4dba5a..fadcd0379dc2 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -579,7 +579,7 @@ bool dev_pm_skip_resume(struct device *dev) } /** - * __device_resume_noirq - Execute a "noirq resume" callback for given device. + * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. @@ -587,7 +587,7 @@ bool dev_pm_skip_resume(struct device *dev) * The driver of @dev will not receive interrupts while this function is being * executed. */ -static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async) +static void device_resume_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -674,16 +674,22 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) { reinit_completion(&dev->power.completion); - if (!is_async(dev)) - return false; + if (is_async(dev)) { + dev->power.async_in_progress = true; - get_device(dev); + get_device(dev); - if (async_schedule_dev_nocall(func, dev)) - return true; - - put_device(dev); + if (async_schedule_dev_nocall(func, dev)) + return true; + put_device(dev); + } + /* + * Because async_schedule_dev_nocall() above has returned false or it + * has not been called at all, func() is not running and it is safe to + * update the async_in_progress flag without extra synchronization. + */ + dev->power.async_in_progress = false; return false; } @@ -691,18 +697,10 @@ static void async_resume_noirq(void *data, async_cookie_t cookie) { struct device *dev = data; - __device_resume_noirq(dev, pm_transition, true); + device_resume_noirq(dev, pm_transition, true); put_device(dev); } -static void device_resume_noirq(struct device *dev) -{ - if (dpm_async_fn(dev, async_resume_noirq)) - return; - - __device_resume_noirq(dev, pm_transition, false); -} - static void dpm_noirq_resume_devices(pm_message_t state) { struct device *dev; @@ -712,18 +710,28 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; + /* + * Trigger the resume of "async" devices upfront so they don't have to + * wait for the "non-async" ones they don't depend on. + */ + list_for_each_entry(dev, &dpm_noirq_list, power.entry) + dpm_async_fn(dev, async_resume_noirq); + while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); - get_device(dev); list_move_tail(&dev->power.entry, &dpm_late_early_list); - mutex_unlock(&dpm_list_mtx); + if (!dev->power.async_in_progress) { + get_device(dev); - device_resume_noirq(dev); + mutex_unlock(&dpm_list_mtx); - put_device(dev); + device_resume_noirq(dev, state, false); - mutex_lock(&dpm_list_mtx); + put_device(dev); + + mutex_lock(&dpm_list_mtx); + } } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); @@ -747,14 +755,14 @@ void dpm_resume_noirq(pm_message_t state) } /** - * __device_resume_early - Execute an "early resume" callback for given device. + * device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ -static void __device_resume_early(struct device *dev, pm_message_t state, bool async) +static void device_resume_early(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -820,18 +828,10 @@ static void async_resume_early(void *data, async_cookie_t cookie) { struct device *dev = data; - __device_resume_early(dev, pm_transition, true); + device_resume_early(dev, pm_transition, true); put_device(dev); } -static void device_resume_early(struct device *dev) -{ - if (dpm_async_fn(dev, async_resume_early)) - return; - - __device_resume_early(dev, pm_transition, false); -} - /** * dpm_resume_early - Execute "early resume" callbacks for all devices. * @state: PM transition of the system being carried out. @@ -845,18 +845,28 @@ void dpm_resume_early(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; + /* + * Trigger the resume of "async" devices upfront so they don't have to + * wait for the "non-async" ones they don't depend on. + */ + list_for_each_entry(dev, &dpm_late_early_list, power.entry) + dpm_async_fn(dev, async_resume_early); + while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); - get_device(dev); list_move_tail(&dev->power.entry, &dpm_suspended_list); - mutex_unlock(&dpm_list_mtx); + if (!dev->power.async_in_progress) { + get_device(dev); - device_resume_early(dev); + mutex_unlock(&dpm_list_mtx); - put_device(dev); + device_resume_early(dev, state, false); - mutex_lock(&dpm_list_mtx); + put_device(dev); + + mutex_lock(&dpm_list_mtx); + } } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); @@ -876,12 +886,12 @@ void dpm_resume_start(pm_message_t state) EXPORT_SYMBOL_GPL(dpm_resume_start); /** - * __device_resume - Execute "resume" callbacks for given device. + * device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. */ -static void __device_resume(struct device *dev, pm_message_t state, bool async) +static void device_resume(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -975,18 +985,10 @@ static void async_resume(void *data, async_cookie_t cookie) { struct device *dev = data; - __device_resume(dev, pm_transition, true); + device_resume(dev, pm_transition, true); put_device(dev); } -static void device_resume(struct device *dev) -{ - if (dpm_async_fn(dev, async_resume)) - return; - - __device_resume(dev, pm_transition, false); -} - /** * dpm_resume - Execute "resume" callbacks for non-sysdev devices. * @state: PM transition of the system being carried out. @@ -1006,16 +1008,25 @@ void dpm_resume(pm_message_t state) pm_transition = state; async_error = 0; + /* + * Trigger the resume of "async" devices upfront so they don't have to + * wait for the "non-async" ones they don't depend on. + */ + list_for_each_entry(dev, &dpm_suspended_list, power.entry) + dpm_async_fn(dev, async_resume); + while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); get_device(dev); - mutex_unlock(&dpm_list_mtx); + if (!dev->power.async_in_progress) { + mutex_unlock(&dpm_list_mtx); - device_resume(dev); + device_resume(dev, state, false); - mutex_lock(&dpm_list_mtx); + mutex_lock(&dpm_list_mtx); + } if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); diff --git a/include/linux/pm.h b/include/linux/pm.h index 92a4f69de0e8..a2f3e53a8196 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -681,6 +681,7 @@ struct dev_pm_info { bool wakeup_path:1; bool syscore:1; bool no_pm_callbacks:1; /* Owned by the PM core */ + bool async_in_progress:1; /* Owned by the PM core */ unsigned int must_resume:1; /* Owned by the PM core */ unsigned int may_skip_resume:1; /* Set by subsystems */ #else