From de51589f9bd98efddf4ab776d3a490e81905ef7c Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Wed, 11 Dec 2024 12:26:05 +0000 Subject: [PATCH 1/4] cpufreq: intel_pstate: Use CPUFREQ_POLICY_UNKNOWN epp_policy uses the same values as cpufreq_policy.policy and resets to CPUFREQ_POLICY_UNKNOWN during offlining. Be consistent about it and initialize to CPUFREQ_POLICY_UNKNOWN instead of 0, too. No functional change intended. Signed-off-by: Christian Loehle Link: https://patch.msgid.link/20241211122605.3048503-3-christian.loehle@arm.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9e14374498d6..9c4cc01fd51a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2713,7 +2713,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) } cpu->epp_powersave = -EINVAL; - cpu->epp_policy = 0; + cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN; intel_pstate_get_cpu_pstates(cpu); From 8e461a1cb43d69d2fc8a97e61916dce571e6bb31 Mon Sep 17 00:00:00 2001 From: "Sultan Alsawaf (unemployed)" Date: Wed, 11 Dec 2024 17:57:32 -0800 Subject: [PATCH 2/4] cpufreq: schedutil: Fix superfluous updates caused by need_freq_update A redundant frequency update is only truly needed when there is a policy limits change with a driver that specifies CPUFREQ_NEED_UPDATE_LIMITS. In spite of that, drivers specifying CPUFREQ_NEED_UPDATE_LIMITS receive a frequency update _all the time_, not just for a policy limits change, because need_freq_update is never cleared. Furthermore, ignore_dl_rate_limit()'s usage of need_freq_update also leads to a redundant frequency update, regardless of whether or not the driver specifies CPUFREQ_NEED_UPDATE_LIMITS, when the next chosen frequency is the same as the current one. Fix the superfluous updates by only honoring CPUFREQ_NEED_UPDATE_LIMITS when there's a policy limits change, and clearing need_freq_update when a requisite redundant update occurs. This is neatly achieved by moving up the CPUFREQ_NEED_UPDATE_LIMITS test and instead setting need_freq_update to false in sugov_update_next_freq(). Fixes: 600f5badb78c ("cpufreq: schedutil: Don't skip freq update when limits change") Signed-off-by: Sultan Alsawaf (unemployed) Reviewed-by: Christian Loehle Link: https://patch.msgid.link/20241212015734.41241-2-sultan@kerneltoast.com Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 28c77904ea74..e51d5ce730be 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -83,7 +83,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) if (unlikely(sg_policy->limits_changed)) { sg_policy->limits_changed = false; - sg_policy->need_freq_update = true; + sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); return true; } @@ -96,7 +96,7 @@ static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { if (sg_policy->need_freq_update) - sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); + sg_policy->need_freq_update = false; else if (sg_policy->next_freq == next_freq) return false; From ebeeee390b6a341770789a50d81e677da9a103d9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 12 Dec 2024 13:01:02 +0100 Subject: [PATCH 3/4] PM: EM: Move sched domains rebuild function from schedutil to EM Function sugov_eas_rebuild_sd() defined in the schedutil cpufreq governor implements generic functionality that may be useful in other places. In particular, there is a plan to use it in the intel_pstate driver in the future. For this reason, move it from schedutil to the energy model code and rename it to em_rebuild_sched_domains(). This also helps to get rid of some #ifdeffery in schedutil which is a plus. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle --- drivers/cpufreq/cpufreq.c | 2 +- include/linux/energy_model.h | 2 ++ kernel/power/energy_model.c | 17 ++++++++++++++++ kernel/sched/cpufreq_schedutil.c | 33 ++++++-------------------------- 4 files changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1a4cae54a01b..418236fef172 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1538,7 +1538,7 @@ static int cpufreq_online(unsigned int cpu) /* * Register with the energy model before - * sugov_eas_rebuild_sd() is called, which will result + * em_rebuild_sched_domains() is called, which will result * in rebuilding of the sched domains, which should only be done * once the energy model is properly initialized for the policy * first. diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 752e0b297582..78318d49276d 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -179,6 +179,7 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int em_dev_update_chip_binning(struct device *dev); int em_update_performance_limits(struct em_perf_domain *pd, unsigned long freq_min_khz, unsigned long freq_max_khz); +void em_rebuild_sched_domains(void); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM @@ -404,6 +405,7 @@ int em_update_performance_limits(struct em_perf_domain *pd, { return -EINVAL; } +static inline void em_rebuild_sched_domains(void) {} #endif #endif diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index d07faf42eace..3874f0e97651 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -908,3 +908,20 @@ int em_update_performance_limits(struct em_perf_domain *pd, return 0; } EXPORT_SYMBOL_GPL(em_update_performance_limits); + +static void rebuild_sd_workfn(struct work_struct *work) +{ + rebuild_sched_domains_energy(); +} + +void em_rebuild_sched_domains(void) +{ + static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); + + /* + * When called from the cpufreq_register_driver() path, the + * cpu_hotplug_lock is already held, so use a work item to + * avoid nested locking in rebuild_sched_domains(). + */ + schedule_work(&rebuild_sd_work); +} diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 28c77904ea74..1c07a2f8c8b5 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -604,31 +604,6 @@ static const struct kobj_type sugov_tunables_ktype = { /********************** cpufreq governor interface *********************/ -#ifdef CONFIG_ENERGY_MODEL -static void rebuild_sd_workfn(struct work_struct *work) -{ - rebuild_sched_domains_energy(); -} - -static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); - -/* - * EAS shouldn't be attempted without sugov, so rebuild the sched_domains - * on governor changes to make sure the scheduler knows about it. - */ -static void sugov_eas_rebuild_sd(void) -{ - /* - * When called from the cpufreq_register_driver() path, the - * cpu_hotplug_lock is already held, so use a work item to - * avoid nested locking in rebuild_sched_domains(). - */ - schedule_work(&rebuild_sd_work); -} -#else -static inline void sugov_eas_rebuild_sd(void) { }; -#endif - struct cpufreq_governor schedutil_gov; static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) @@ -784,7 +759,11 @@ static int sugov_init(struct cpufreq_policy *policy) goto fail; out: - sugov_eas_rebuild_sd(); + /* + * Schedutil is the preferred governor for EAS, so rebuild sched domains + * on governor changes to make sure the scheduler knows about them. + */ + em_rebuild_sched_domains(); mutex_unlock(&global_tunables_lock); return 0; @@ -826,7 +805,7 @@ static void sugov_exit(struct cpufreq_policy *policy) sugov_policy_free(sg_policy); cpufreq_disable_fast_switch(policy); - sugov_eas_rebuild_sd(); + em_rebuild_sched_domains(); } static int sugov_start(struct cpufreq_policy *policy) From b317268368546d6401af788648668f82e3ba1bd3 Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Wed, 18 Dec 2024 13:09:35 +0900 Subject: [PATCH 4/4] PM: wakeup: implement devm_device_init_wakeup() helper Some drivers that enable device wakeup fail to properly disable it during their cleanup, which results in a memory leak. To address this, introduce devm_device_init_wakeup(), a managed variant of device_init_wakeup(dev, true). With this managed helper, wakeup functionality will be automatically disabled when the device is released, ensuring a more reliable cleanup process. This need for this addition arose during a previous discussion [1]. Link: https://lore.kernel.org/linux-rtc/20241212100403.3799667-1-joe@pf.is.s.u-tokyo.ac.jp/ [1] Suggested-by: Alexandre Belloni Signed-off-by: Joe Hattori Link: https://patch.msgid.link/20241218040935.1921416-1-joe@pf.is.s.u-tokyo.ac.jp Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 222f7530806c..d501c09c60cd 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -240,4 +240,21 @@ static inline int device_init_wakeup(struct device *dev, bool enable) return 0; } +static void device_disable_wakeup(void *dev) +{ + device_init_wakeup(dev, false); +} + +/** + * devm_device_init_wakeup - Resource managed device wakeup initialization. + * @dev: Device to handle. + * + * This function is the devm managed version of device_init_wakeup(dev, true). + */ +static inline int devm_device_init_wakeup(struct device *dev) +{ + device_init_wakeup(dev, true); + return devm_add_action_or_reset(dev, device_disable_wakeup, dev); +} + #endif /* _LINUX_PM_WAKEUP_H */