From 10aa694ea0d0adfbd97400fb39ea237a273c335f Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Wed, 20 Jan 2021 20:03:12 +0530 Subject: [PATCH 01/12] PM: runtime: Fix resposible -> responsible in runtime.c s/resposible/responsible/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index bfda153b1a41..a46a7e30881b 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1100,7 +1100,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume); * suspending the device when both its runtime PM status is %RPM_ACTIVE and its * runtime PM usage counter is not zero. * - * The caller is resposible for decrementing the runtime PM usage counter of + * The caller is responsible for decrementing the runtime PM usage counter of * @dev after this function has returned a positive value for it. */ int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count) From 67e3242ee28052daa90e1fa193efc601939fde58 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Wed, 20 Jan 2021 08:50:41 -0700 Subject: [PATCH 02/12] PM: domains: inform PM domain of a device's next wakeup Some devices may have a predictable interrupt pattern while executing usecases. An example would be the VSYNC interrupt associated with display devices. A 60 Hz display could cause a interrupt every 16 ms. If the device were in a PM domain, the domain would need to be powered up for device to resume and handle the interrupt. Entering a domain idle state saves power, only if the residency of the idle state is met. Without knowing the idle duration of the domain, the governor would just choose the deepest idle state that matches the QoS requirements. The domain might be powered off just as the device is expecting to wake up. If devices could inform PM frameworks of their next event, the parent PM domain's idle duration can be determined. So let's add the dev_pm_genpd_set_next_wakeup() API for the device to inform PM domains of the impending wakeup. This information will be the domain governor to determine the best idle state given the wakeup. Signed-off-by: Lina Iyer Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 30 ++++++++++++++++++++++++++++++ include/linux/pm_domain.h | 6 ++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 9a14eedacb92..014033c7c287 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -423,6 +423,35 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) } EXPORT_SYMBOL_GPL(dev_pm_genpd_set_performance_state); +/** + * dev_pm_genpd_set_next_wakeup - Notify PM framework of an impending wakeup. + * + * @dev: Device to handle + * @next: impending interrupt/wakeup for the device + * + * + * Allow devices to inform of the next wakeup. It's assumed that the users + * guarantee that the genpd wouldn't be detached while this routine is getting + * called. Additionally, it's also assumed that @dev isn't runtime suspended + * (RPM_SUSPENDED)." + * Although devices are expected to update the next_wakeup after the end of + * their usecase as well, it is possible the devices themselves may not know + * about that, so stale @next will be ignored when powering off the domain. + */ +void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next) +{ + struct generic_pm_domain_data *gpd_data; + struct generic_pm_domain *genpd; + + genpd = dev_to_genpd_safe(dev); + if (!genpd) + return; + + gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); + gpd_data->next_wakeup = next; +} +EXPORT_SYMBOL_GPL(dev_pm_genpd_set_next_wakeup); + static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed) { unsigned int state_idx = genpd->state_idx; @@ -1465,6 +1494,7 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev) gpd_data->td.constraint_changed = true; gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS; gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier; + gpd_data->next_wakeup = KTIME_MAX; spin_lock_irq(&dev->power.lock); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 2ca919ae8d36..735583c0bc6d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -9,6 +9,7 @@ #define _LINUX_PM_DOMAIN_H #include +#include #include #include #include @@ -191,6 +192,7 @@ struct generic_pm_domain_data { struct notifier_block *power_nb; int cpu; unsigned int performance_state; + ktime_t next_wakeup; void *data; }; @@ -217,6 +219,7 @@ int pm_genpd_remove(struct generic_pm_domain *genpd); int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb); int dev_pm_genpd_remove_notifier(struct device *dev); +void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -275,6 +278,9 @@ static inline int dev_pm_genpd_remove_notifier(struct device *dev) return -EOPNOTSUPP; } +static inline void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next) +{ } + #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif From c79aa080fb0f60a0e24c87014dc9c2f373e1379b Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Wed, 20 Jan 2021 08:50:42 -0700 Subject: [PATCH 03/12] PM: domains: use device's next wakeup to determine domain idle state Currently, a PM domain's idle state is determined based on whether the QoS requirements are met. However, even entering an idle state may waste power if the minimum residency requirements aren't fulfilled. CPU PM domains use the next timer wakeup for the CPUs in the domain to determine the sleep duration of the domain. This is compared with the idle state residencies to determine the optimal idle state. For other PM domains, determining the sleep length is not that straight forward. But if the device's next_event is available, we can use that to determine the sleep duration of the PM domain. Let's update the domain governor logic to check for idle state residency based on the next wakeup of devices as well as QoS constraints. But since, not all domains may contain devices capable of specifying the next wakeup, let's enable this additional check only if specified by the domain's flags when initializing the domain. Signed-off-by: Lina Iyer Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain_governor.c | 102 ++++++++++++++++++++++++--- include/linux/pm_domain.h | 6 ++ 2 files changed, 99 insertions(+), 9 deletions(-) diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 490ed7deb99a..c6c218758f0b 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -117,6 +117,55 @@ static bool default_suspend_ok(struct device *dev) return td->cached_suspend_ok; } +static void update_domain_next_wakeup(struct generic_pm_domain *genpd, ktime_t now) +{ + ktime_t domain_wakeup = KTIME_MAX; + ktime_t next_wakeup; + struct pm_domain_data *pdd; + struct gpd_link *link; + + if (!(genpd->flags & GENPD_FLAG_MIN_RESIDENCY)) + return; + + /* + * Devices that have a predictable wakeup pattern, may specify + * their next wakeup. Let's find the next wakeup from all the + * devices attached to this domain and from all the sub-domains. + * It is possible that component's a next wakeup may have become + * stale when we read that here. We will ignore to ensure the domain + * is able to enter its optimal idle state. + */ + list_for_each_entry(pdd, &genpd->dev_list, list_node) { + next_wakeup = to_gpd_data(pdd)->next_wakeup; + if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now)) + if (ktime_before(next_wakeup, domain_wakeup)) + domain_wakeup = next_wakeup; + } + + list_for_each_entry(link, &genpd->parent_links, parent_node) { + next_wakeup = link->child->next_wakeup; + if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now)) + if (ktime_before(next_wakeup, domain_wakeup)) + domain_wakeup = next_wakeup; + } + + genpd->next_wakeup = domain_wakeup; +} + +static bool next_wakeup_allows_state(struct generic_pm_domain *genpd, + unsigned int state, ktime_t now) +{ + ktime_t domain_wakeup = genpd->next_wakeup; + s64 idle_time_ns, min_sleep_ns; + + min_sleep_ns = genpd->states[state].power_off_latency_ns + + genpd->states[state].residency_ns; + + idle_time_ns = ktime_to_ns(ktime_sub(domain_wakeup, now)); + + return idle_time_ns >= min_sleep_ns; +} + static bool __default_power_down_ok(struct dev_pm_domain *pd, unsigned int state) { @@ -201,16 +250,41 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, } /** - * default_power_down_ok - Default generic PM domain power off governor routine. + * _default_power_down_ok - Default generic PM domain power off governor routine. * @pd: PM domain to check. * * This routine must be executed under the PM domain's lock. */ -static bool default_power_down_ok(struct dev_pm_domain *pd) +static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now) { struct generic_pm_domain *genpd = pd_to_genpd(pd); + int state_idx = genpd->state_count - 1; struct gpd_link *link; + /* + * Find the next wakeup from devices that can determine their own wakeup + * to find when the domain would wakeup and do it for every device down + * the hierarchy. It is not worth while to sleep if the state's residency + * cannot be met. + */ + update_domain_next_wakeup(genpd, now); + if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (genpd->next_wakeup != KTIME_MAX)) { + /* Let's find out the deepest domain idle state, the devices prefer */ + while (state_idx >= 0) { + if (next_wakeup_allows_state(genpd, state_idx, now)) { + genpd->max_off_time_changed = true; + break; + } + state_idx--; + } + + if (state_idx < 0) { + state_idx = 0; + genpd->cached_power_down_ok = false; + goto done; + } + } + if (!genpd->max_off_time_changed) { genpd->state_idx = genpd->cached_power_down_state_idx; return genpd->cached_power_down_ok; @@ -228,21 +302,30 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) genpd->max_off_time_ns = -1; genpd->max_off_time_changed = false; genpd->cached_power_down_ok = true; - genpd->state_idx = genpd->state_count - 1; - /* Find a state to power down to, starting from the deepest. */ - while (!__default_power_down_ok(pd, genpd->state_idx)) { - if (genpd->state_idx == 0) { + /* + * Find a state to power down to, starting from the state + * determined by the next wakeup. + */ + while (!__default_power_down_ok(pd, state_idx)) { + if (state_idx == 0) { genpd->cached_power_down_ok = false; break; } - genpd->state_idx--; + state_idx--; } +done: + genpd->state_idx = state_idx; genpd->cached_power_down_state_idx = genpd->state_idx; return genpd->cached_power_down_ok; } +static bool default_power_down_ok(struct dev_pm_domain *pd) +{ + return _default_power_down_ok(pd, ktime_get()); +} + static bool always_on_power_down_ok(struct dev_pm_domain *domain) { return false; @@ -254,11 +337,12 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd) struct generic_pm_domain *genpd = pd_to_genpd(pd); struct cpuidle_device *dev; ktime_t domain_wakeup, next_hrtimer; + ktime_t now = ktime_get(); s64 idle_duration_ns; int cpu, i; /* Validate dev PM QoS constraints. */ - if (!default_power_down_ok(pd)) + if (!_default_power_down_ok(pd, now)) return false; if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN)) @@ -280,7 +364,7 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd) } /* The minimum idle duration is from now - until the next wakeup. */ - idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, ktime_get())); + idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, now)); if (idle_duration_ns <= 0) return false; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 735583c0bc6d..dfcfbcecc34b 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -56,6 +56,10 @@ * * GENPD_FLAG_RPM_ALWAYS_ON: Instructs genpd to always keep the PM domain * powered on except for system suspend. + * + * GENPD_FLAG_MIN_RESIDENCY: Enable the genpd governor to consider its + * components' next wakeup when determining the + * optimal idle state. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) @@ -63,6 +67,7 @@ #define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) #define GENPD_FLAG_CPU_DOMAIN (1U << 4) #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) +#define GENPD_FLAG_MIN_RESIDENCY (1U << 6) enum gpd_status { GENPD_STATE_ON = 0, /* PM domain is on */ @@ -130,6 +135,7 @@ struct generic_pm_domain { unsigned int state); struct gpd_dev_ops dev_ops; s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ + ktime_t next_wakeup; /* Maintained by the domain governor */ bool max_off_time_changed; bool cached_power_down_ok; bool cached_power_down_state_idx; From 079c42a0ed73500f1d11b5564e31d56c52bee21e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 21 Jan 2021 00:12:30 +0300 Subject: [PATCH 04/12] PM: domains: Make set_performance_state() callback optional Make set_performance_state() callback optional in order to remove the need from power domain drivers to implement a dummy callback. If callback isn't implemented by a GENPD driver, then the performance state is passed to the parent domain. Tested-by: Peter Geis Tested-by: Nicolas Chauvet Tested-by: Matt Merhar [tested on NVIDIA Tegra20/30/124 SoCs] Suggested-by: Ulf Hansson Reviewed-by: Ulf Hansson Signed-off-by: Dmitry Osipenko Reviewed-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 014033c7c287..4878c824e66c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -297,6 +297,18 @@ static int _genpd_reeval_performance_state(struct generic_pm_domain *genpd, return state; } +static int genpd_xlate_performance_state(struct generic_pm_domain *genpd, + struct generic_pm_domain *parent, + unsigned int pstate) +{ + if (!parent->set_performance_state) + return pstate; + + return dev_pm_opp_xlate_performance_state(genpd->opp_table, + parent->opp_table, + pstate); +} + static int _genpd_set_performance_state(struct generic_pm_domain *genpd, unsigned int state, int depth) { @@ -311,13 +323,8 @@ static int _genpd_set_performance_state(struct generic_pm_domain *genpd, list_for_each_entry(link, &genpd->child_links, child_node) { parent = link->parent; - if (!parent->set_performance_state) - continue; - /* Find parent's performance state */ - ret = dev_pm_opp_xlate_performance_state(genpd->opp_table, - parent->opp_table, - state); + ret = genpd_xlate_performance_state(genpd, parent, state); if (unlikely(ret < 0)) goto err; @@ -339,9 +346,11 @@ static int _genpd_set_performance_state(struct generic_pm_domain *genpd, goto err; } - ret = genpd->set_performance_state(genpd, state); - if (ret) - goto err; + if (genpd->set_performance_state) { + ret = genpd->set_performance_state(genpd, state); + if (ret) + goto err; + } genpd->performance_state = state; return 0; @@ -352,9 +361,6 @@ err: child_node) { parent = link->parent; - if (!parent->set_performance_state) - continue; - genpd_lock_nested(parent, depth + 1); parent_state = link->prev_performance_state; @@ -399,9 +405,6 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) if (!genpd) return -ENODEV; - if (unlikely(!genpd->set_performance_state)) - return -EINVAL; - if (WARN_ON(!dev->power.subsys_data || !dev->power.subsys_data->domain_data)) return -EINVAL; From 18027d6f392ee8d89d9df4dff0a7db4fb2d6f8a5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 21 Jan 2021 00:12:31 +0300 Subject: [PATCH 05/12] PM: domains: Make of_genpd_add_subdomain() return -EPROBE_DEFER Driver of a power domain provider may not be ready at the time of of_genpd_add_subdomain() invocation. Make this function to return -EPROBE_DEFER instead of -ENOENT in order to remove a need from power domain drivers to handle the error code specially. Tested-by: Peter Geis Tested-by: Nicolas Chauvet Tested-by: Matt Merhar [tested on NVIDIA Tegra20/30/124 SoCs] Suggested-by: Ulf Hansson Reviewed-by: Ulf Hansson Reviewed-by: Viresh Kumar Signed-off-by: Dmitry Osipenko Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 4878c824e66c..c615abf56c52 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2496,7 +2496,7 @@ int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, out: mutex_unlock(&gpd_list_lock); - return ret; + return ret == -ENOENT ? -EPROBE_DEFER : ret; } EXPORT_SYMBOL_GPL(of_genpd_add_subdomain); From 45fbc464b047b3fbd760c9cb460a50a1ef2cf933 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 21 Jan 2021 00:12:32 +0300 Subject: [PATCH 06/12] PM: domains: Add "performance" column to debug summary Add "performance" column to debug summary which shows performance state of all power domains and theirs devices. Tested-by: Peter Geis Tested-by: Nicolas Chauvet Tested-by: Matt Merhar [tested on NVIDIA Tegra20/30/124 SoCs] Reviewed-by: Ulf Hansson Reviewed-by: Viresh Kumar Signed-off-by: Dmitry Osipenko Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c615abf56c52..50211a402fa5 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2985,7 +2985,15 @@ static void rtpm_status_str(struct seq_file *s, struct device *dev) else WARN_ON(1); - seq_puts(s, p); + seq_printf(s, "%-25s ", p); +} + +static void perf_status_str(struct seq_file *s, struct device *dev) +{ + struct generic_pm_domain_data *gpd_data; + + gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); + seq_put_decimal_ull(s, "", gpd_data->performance_state); } static int genpd_summary_one(struct seq_file *s, @@ -3013,7 +3021,7 @@ static int genpd_summary_one(struct seq_file *s, else snprintf(state, sizeof(state), "%s", status_lookup[genpd->status]); - seq_printf(s, "%-30s %-15s ", genpd->name, state); + seq_printf(s, "%-30s %-50s %u", genpd->name, state, genpd->performance_state); /* * Modifications on the list require holding locks on both @@ -3021,6 +3029,8 @@ static int genpd_summary_one(struct seq_file *s, * Also genpd->name is immutable. */ list_for_each_entry(link, &genpd->parent_links, parent_node) { + if (list_is_first(&link->parent_node, &genpd->parent_links)) + seq_printf(s, "\n%48s", " "); seq_printf(s, "%s", link->child->name); if (!list_is_last(&link->parent_node, &genpd->parent_links)) seq_puts(s, ", "); @@ -3035,6 +3045,7 @@ static int genpd_summary_one(struct seq_file *s, seq_printf(s, "\n %-50s ", kobj_path); rtpm_status_str(s, pm_data->dev); + perf_status_str(s, pm_data->dev); kfree(kobj_path); } @@ -3050,9 +3061,9 @@ static int summary_show(struct seq_file *s, void *data) struct generic_pm_domain *genpd; int ret = 0; - seq_puts(s, "domain status children\n"); + seq_puts(s, "domain status children performance\n"); seq_puts(s, " /device runtime status\n"); - seq_puts(s, "----------------------------------------------------------------------\n"); + seq_puts(s, "----------------------------------------------------------------------------------------------\n"); ret = mutex_lock_interruptible(&gpd_list_lock); if (ret) From ccf7ce46ab91515a7146c00300e168efa9dc777e Mon Sep 17 00:00:00 2001 From: Zqiang Date: Mon, 25 Jan 2021 12:18:28 +0800 Subject: [PATCH 07/12] PM: sleep: No need to check PF_WQ_WORKER in thaw_kernel_threads() Because PF_KTHREAD is set for all wq worker threads, it is not necessary to check PF_WQ_WORKER in addition to it in thaw_kernel_threads(), so stop doing that. Signed-off-by: Zqiang [ rjw: Subject and changelog rewrite ] Signed-off-by: Rafael J. Wysocki --- kernel/power/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index 45b054b7b5ec..50cc63534486 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -235,7 +235,7 @@ void thaw_kernel_threads(void) read_lock(&tasklist_lock); for_each_process_thread(g, p) { - if (p->flags & (PF_KTHREAD | PF_WQ_WORKER)) + if (p->flags & PF_KTHREAD) __thaw_task(p); } read_unlock(&tasklist_lock); From eb23d91af55bc2369fe3f0aa6997e72eb20e16fe Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 26 Jan 2021 15:29:40 -0600 Subject: [PATCH 08/12] PM: sleep: Use dev_printk() when possible Use dev_printk() when possible to make messages more consistent with other device-related messages. Signed-off-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 46793276598d..f893c3c5af07 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -16,6 +16,7 @@ */ #define pr_fmt(fmt) "PM: " fmt +#define dev_fmt pr_fmt #include #include @@ -449,8 +450,8 @@ static void pm_dev_dbg(struct device *dev, pm_message_t state, const char *info) static void pm_dev_err(struct device *dev, pm_message_t state, const char *info, int error) { - pr_err("Device %s failed to %s%s: error %d\n", - dev_name(dev), pm_verb(state.event), info, error); + dev_err(dev, "failed to %s%s: error %d\n", pm_verb(state.event), info, + error); } static void dpm_show_time(ktime_t starttime, pm_message_t state, int error, @@ -1897,8 +1898,8 @@ int dpm_prepare(pm_message_t state) error = 0; continue; } - pr_info("Device %s not prepared for power transition: code %d\n", - dev_name(dev), error); + dev_info(dev, "not prepared for power transition: code %d\n", + error); put_device(dev); break; } From 309663093c8aba02cbea83b0bc8ee9a99833c482 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 26 Jan 2021 15:26:55 -0600 Subject: [PATCH 09/12] PM: runtime: Fix typos and grammar Fix minor typos and grammatical issues. Signed-off-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index 0553008b6279..d9c777b18f7a 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -579,7 +579,7 @@ should be used. Of course, for this purpose the device's runtime PM has to be enabled earlier by calling pm_runtime_enable(). Note, if the device may execute pm_runtime calls during the probe (such as -if it is registers with a subsystem that may call back in) then the +if it is registered with a subsystem that may call back in) then the pm_runtime_get_sync() call paired with a pm_runtime_put() call will be appropriate to ensure that the device is not put back to sleep during the probe. This can happen with systems such as the network device layer. @@ -587,11 +587,11 @@ probe. This can happen with systems such as the network device layer. It may be desirable to suspend the device once ->probe() has finished. Therefore the driver core uses the asynchronous pm_request_idle() to submit a request to execute the subsystem-level idle callback for the device at that -time. A driver that makes use of the runtime autosuspend feature, may want to +time. A driver that makes use of the runtime autosuspend feature may want to update the last busy mark before returning from ->probe(). Moreover, the driver core prevents runtime PM callbacks from racing with the bus -notifier callback in __device_release_driver(), which is necessary, because the +notifier callback in __device_release_driver(), which is necessary because the notifier is used by some subsystems to carry out operations affecting the runtime PM functionality. It does so by calling pm_runtime_get_sync() before driver_sysfs_remove() and the BUS_NOTIFY_UNBIND_DRIVER notifications. This @@ -603,7 +603,7 @@ calling pm_runtime_suspend() from their ->remove() routines, the driver core executes pm_runtime_put_sync() after running the BUS_NOTIFY_UNBIND_DRIVER notifications in __device_release_driver(). This requires bus types and drivers to make their ->remove() callbacks avoid races with runtime PM directly, -but also it allows of more flexibility in the handling of devices during the +but it also allows more flexibility in the handling of devices during the removal of their drivers. Drivers in ->remove() callback should undo the runtime PM changes done @@ -693,7 +693,7 @@ that the device appears to be runtime-suspended and its state is fine, so it may be left in runtime suspend provided that all of its descendants are also left in runtime suspend. If that happens, the PM core will not execute any system suspend and resume callbacks for all of those devices, except for the -complete callback, which is then entirely responsible for handling the device +.complete() callback, which is then entirely responsible for handling the device as appropriate. This only applies to system suspend transitions that are not related to hibernation (see Documentation/driver-api/pm/devices.rst for more information). @@ -706,7 +706,7 @@ out the following operations: right before executing the subsystem-level .prepare() callback for it and pm_runtime_barrier() is called for every device right before executing the subsystem-level .suspend() callback for it. In addition to that the PM core - calls __pm_runtime_disable() with 'false' as the second argument for every + calls __pm_runtime_disable() with 'false' as the second argument for every device right before executing the subsystem-level .suspend_late() callback for it. @@ -783,7 +783,7 @@ driver/base/power/generic_ops.c: `int pm_generic_restore_noirq(struct device *dev);` - invoke the ->restore_noirq() callback provided by the device's driver -These functions are the defaults used by the PM core, if a subsystem doesn't +These functions are the defaults used by the PM core if a subsystem doesn't provide its own callbacks for ->runtime_idle(), ->runtime_suspend(), ->runtime_resume(), ->suspend(), ->suspend_noirq(), ->resume(), ->resume_noirq(), ->freeze(), ->freeze_noirq(), ->thaw(), ->thaw_noirq(), From 0bfa0820c274b019583b3454c6c889c99c24558d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 25 Jan 2021 14:29:18 -0500 Subject: [PATCH 10/12] PM: clk: make PM clock layer compatible with clocks that must sleep The clock API splits its interface into sleepable ant atomic contexts: - clk_prepare/clk_unprepare for stuff that might sleep - clk_enable_clk_disable for anything that may be done in atomic context The code handling runtime PM for clocks only calls clk_disable() on suspend requests, and clk_enable on resume requests. This means that runtime PM with clock providers that only have the prepare/unprepare methods implemented is basically useless. Many clock implementations can't accommodate atomic contexts. This is often the case when communication with the clock happens through another subsystem like I2C or SCMI. Let's make the clock PM code useful with such clocks by safely invoking clk_prepare/clk_unprepare upon resume/suspend requests. Of course, when such clocks are registered with the PM layer then pm_runtime_irq_safe() can't be used, and neither pm_runtime_suspend() nor pm_runtime_resume() may be invoked in atomic context. For clocks that do implement the enable and disable methods then everything just works as before. A note on sparse: According to https://lwn.net/Articles/109066/ there are things that sparse can't cope with. In particular, pm_clk_op_lock() and pm_clk_op_unlock() may or may not lock/unlock psd->lock depending on some runtime condition. To work around that we tell it the lock is always untaken for the purpose of static analisys. Thanks to Naresh Kamboju for reporting issues with the initial patch. Signed-off-by: Nicolas Pitre Tested-by: Naresh Kamboju Signed-off-by: Rafael J. Wysocki --- drivers/base/power/clock_ops.c | 223 +++++++++++++++++++++++++++------ drivers/clk/clk.c | 21 ++++ include/linux/clk.h | 24 +++- include/linux/pm.h | 2 + 4 files changed, 228 insertions(+), 42 deletions(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index ced6863a16a5..84d5acb6301b 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -23,6 +23,7 @@ enum pce_status { PCE_STATUS_NONE = 0, PCE_STATUS_ACQUIRED, + PCE_STATUS_PREPARED, PCE_STATUS_ENABLED, PCE_STATUS_ERROR, }; @@ -32,8 +33,112 @@ struct pm_clock_entry { char *con_id; struct clk *clk; enum pce_status status; + bool enabled_when_prepared; }; +/** + * pm_clk_list_lock - ensure exclusive access for modifying the PM clock + * entry list. + * @psd: pm_subsys_data instance corresponding to the PM clock entry list + * and clk_op_might_sleep count to be modified. + * + * Get exclusive access before modifying the PM clock entry list and the + * clock_op_might_sleep count to guard against concurrent modifications. + * This also protects against a concurrent clock_op_might_sleep and PM clock + * entry list usage in pm_clk_suspend()/pm_clk_resume() that may or may not + * happen in atomic context, hence both the mutex and the spinlock must be + * taken here. + */ +static void pm_clk_list_lock(struct pm_subsys_data *psd) + __acquires(&psd->lock) +{ + mutex_lock(&psd->clock_mutex); + spin_lock_irq(&psd->lock); +} + +/** + * pm_clk_list_unlock - counterpart to pm_clk_list_lock(). + * @psd: the same pm_subsys_data instance previously passed to + * pm_clk_list_lock(). + */ +static void pm_clk_list_unlock(struct pm_subsys_data *psd) + __releases(&psd->lock) +{ + spin_unlock_irq(&psd->lock); + mutex_unlock(&psd->clock_mutex); +} + +/** + * pm_clk_op_lock - ensure exclusive access for performing clock operations. + * @psd: pm_subsys_data instance corresponding to the PM clock entry list + * and clk_op_might_sleep count being used. + * @flags: stored irq flags. + * @fn: string for the caller function's name. + * + * This is used by pm_clk_suspend() and pm_clk_resume() to guard + * against concurrent modifications to the clock entry list and the + * clock_op_might_sleep count. If clock_op_might_sleep is != 0 then + * only the mutex can be locked and those functions can only be used in + * non atomic context. If clock_op_might_sleep == 0 then these functions + * may be used in any context and only the spinlock can be locked. + * Returns -EINVAL if called in atomic context when clock ops might sleep. + */ +static int pm_clk_op_lock(struct pm_subsys_data *psd, unsigned long *flags, + const char *fn) + /* sparse annotations don't work here as exit state isn't static */ +{ + bool atomic_context = in_atomic() || irqs_disabled(); + +try_again: + spin_lock_irqsave(&psd->lock, *flags); + if (!psd->clock_op_might_sleep) { + /* the __release is there to work around sparse limitations */ + __release(&psd->lock); + return 0; + } + + /* bail out if in atomic context */ + if (atomic_context) { + pr_err("%s: atomic context with clock_ops_might_sleep = %d", + fn, psd->clock_op_might_sleep); + spin_unlock_irqrestore(&psd->lock, *flags); + might_sleep(); + return -EPERM; + } + + /* we must switch to the mutex */ + spin_unlock_irqrestore(&psd->lock, *flags); + mutex_lock(&psd->clock_mutex); + + /* + * There was a possibility for psd->clock_op_might_sleep + * to become 0 above. Keep the mutex only if not the case. + */ + if (likely(psd->clock_op_might_sleep)) + return 0; + + mutex_unlock(&psd->clock_mutex); + goto try_again; +} + +/** + * pm_clk_op_unlock - counterpart to pm_clk_op_lock(). + * @psd: the same pm_subsys_data instance previously passed to + * pm_clk_op_lock(). + * @flags: irq flags provided by pm_clk_op_lock(). + */ +static void pm_clk_op_unlock(struct pm_subsys_data *psd, unsigned long *flags) + /* sparse annotations don't work here as entry state isn't static */ +{ + if (psd->clock_op_might_sleep) { + mutex_unlock(&psd->clock_mutex); + } else { + /* the __acquire is there to work around sparse limitations */ + __acquire(&psd->lock); + spin_unlock_irqrestore(&psd->lock, *flags); + } +} + /** * pm_clk_enable - Enable a clock, reporting any errors * @dev: The device for the given clock @@ -43,14 +148,21 @@ static inline void __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce { int ret; - if (ce->status < PCE_STATUS_ERROR) { + switch (ce->status) { + case PCE_STATUS_ACQUIRED: + ret = clk_prepare_enable(ce->clk); + break; + case PCE_STATUS_PREPARED: ret = clk_enable(ce->clk); - if (!ret) - ce->status = PCE_STATUS_ENABLED; - else - dev_err(dev, "%s: failed to enable clk %p, error %d\n", - __func__, ce->clk, ret); + break; + default: + return; } + if (!ret) + ce->status = PCE_STATUS_ENABLED; + else + dev_err(dev, "%s: failed to enable clk %p, error %d\n", + __func__, ce->clk, ret); } /** @@ -64,17 +176,20 @@ static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce) ce->clk = clk_get(dev, ce->con_id); if (IS_ERR(ce->clk)) { ce->status = PCE_STATUS_ERROR; + return; + } else if (clk_is_enabled_when_prepared(ce->clk)) { + /* we defer preparing the clock in that case */ + ce->status = PCE_STATUS_ACQUIRED; + ce->enabled_when_prepared = true; + } else if (clk_prepare(ce->clk)) { + ce->status = PCE_STATUS_ERROR; + dev_err(dev, "clk_prepare() failed\n"); + return; } else { - if (clk_prepare(ce->clk)) { - ce->status = PCE_STATUS_ERROR; - dev_err(dev, "clk_prepare() failed\n"); - } else { - ce->status = PCE_STATUS_ACQUIRED; - dev_dbg(dev, - "Clock %pC con_id %s managed by runtime PM.\n", - ce->clk, ce->con_id); - } + ce->status = PCE_STATUS_PREPARED; } + dev_dbg(dev, "Clock %pC con_id %s managed by runtime PM.\n", + ce->clk, ce->con_id); } static int __pm_clk_add(struct device *dev, const char *con_id, @@ -106,9 +221,11 @@ static int __pm_clk_add(struct device *dev, const char *con_id, pm_clk_acquire(dev, ce); - spin_lock_irq(&psd->lock); + pm_clk_list_lock(psd); list_add_tail(&ce->node, &psd->clock_list); - spin_unlock_irq(&psd->lock); + if (ce->enabled_when_prepared) + psd->clock_op_might_sleep++; + pm_clk_list_unlock(psd); return 0; } @@ -239,14 +356,20 @@ static void __pm_clk_remove(struct pm_clock_entry *ce) if (!ce) return; - if (ce->status < PCE_STATUS_ERROR) { - if (ce->status == PCE_STATUS_ENABLED) - clk_disable(ce->clk); - - if (ce->status >= PCE_STATUS_ACQUIRED) { - clk_unprepare(ce->clk); + switch (ce->status) { + case PCE_STATUS_ENABLED: + clk_disable(ce->clk); + fallthrough; + case PCE_STATUS_PREPARED: + clk_unprepare(ce->clk); + fallthrough; + case PCE_STATUS_ACQUIRED: + case PCE_STATUS_ERROR: + if (!IS_ERR(ce->clk)) clk_put(ce->clk); - } + break; + default: + break; } kfree(ce->con_id); @@ -269,7 +392,7 @@ void pm_clk_remove(struct device *dev, const char *con_id) if (!psd) return; - spin_lock_irq(&psd->lock); + pm_clk_list_lock(psd); list_for_each_entry(ce, &psd->clock_list, node) { if (!con_id && !ce->con_id) @@ -280,12 +403,14 @@ void pm_clk_remove(struct device *dev, const char *con_id) goto remove; } - spin_unlock_irq(&psd->lock); + pm_clk_list_unlock(psd); return; remove: list_del(&ce->node); - spin_unlock_irq(&psd->lock); + if (ce->enabled_when_prepared) + psd->clock_op_might_sleep--; + pm_clk_list_unlock(psd); __pm_clk_remove(ce); } @@ -307,19 +432,21 @@ void pm_clk_remove_clk(struct device *dev, struct clk *clk) if (!psd || !clk) return; - spin_lock_irq(&psd->lock); + pm_clk_list_lock(psd); list_for_each_entry(ce, &psd->clock_list, node) { if (clk == ce->clk) goto remove; } - spin_unlock_irq(&psd->lock); + pm_clk_list_unlock(psd); return; remove: list_del(&ce->node); - spin_unlock_irq(&psd->lock); + if (ce->enabled_when_prepared) + psd->clock_op_might_sleep--; + pm_clk_list_unlock(psd); __pm_clk_remove(ce); } @@ -330,13 +457,16 @@ EXPORT_SYMBOL_GPL(pm_clk_remove_clk); * @dev: Device to initialize the list of PM clocks for. * * Initialize the lock and clock_list members of the device's pm_subsys_data - * object. + * object, set the count of clocks that might sleep to 0. */ void pm_clk_init(struct device *dev) { struct pm_subsys_data *psd = dev_to_psd(dev); - if (psd) + if (psd) { INIT_LIST_HEAD(&psd->clock_list); + mutex_init(&psd->clock_mutex); + psd->clock_op_might_sleep = 0; + } } EXPORT_SYMBOL_GPL(pm_clk_init); @@ -372,12 +502,13 @@ void pm_clk_destroy(struct device *dev) INIT_LIST_HEAD(&list); - spin_lock_irq(&psd->lock); + pm_clk_list_lock(psd); list_for_each_entry_safe_reverse(ce, c, &psd->clock_list, node) list_move(&ce->node, &list); + psd->clock_op_might_sleep = 0; - spin_unlock_irq(&psd->lock); + pm_clk_list_unlock(psd); dev_pm_put_subsys_data(dev); @@ -397,23 +528,30 @@ int pm_clk_suspend(struct device *dev) struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; unsigned long flags; + int ret; dev_dbg(dev, "%s()\n", __func__); if (!psd) return 0; - spin_lock_irqsave(&psd->lock, flags); + ret = pm_clk_op_lock(psd, &flags, __func__); + if (ret) + return ret; list_for_each_entry_reverse(ce, &psd->clock_list, node) { - if (ce->status < PCE_STATUS_ERROR) { - if (ce->status == PCE_STATUS_ENABLED) + if (ce->status == PCE_STATUS_ENABLED) { + if (ce->enabled_when_prepared) { + clk_disable_unprepare(ce->clk); + ce->status = PCE_STATUS_ACQUIRED; + } else { clk_disable(ce->clk); - ce->status = PCE_STATUS_ACQUIRED; + ce->status = PCE_STATUS_PREPARED; + } } } - spin_unlock_irqrestore(&psd->lock, flags); + pm_clk_op_unlock(psd, &flags); return 0; } @@ -428,18 +566,21 @@ int pm_clk_resume(struct device *dev) struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; unsigned long flags; + int ret; dev_dbg(dev, "%s()\n", __func__); if (!psd) return 0; - spin_lock_irqsave(&psd->lock, flags); + ret = pm_clk_op_lock(psd, &flags, __func__); + if (ret) + return ret; list_for_each_entry(ce, &psd->clock_list, node) __pm_clk_enable(dev, ce); - spin_unlock_irqrestore(&psd->lock, flags); + pm_clk_op_unlock(psd, &flags); return 0; } diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8c1d04db990d..3d751ae5bc70 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1164,6 +1164,27 @@ int clk_enable(struct clk *clk) } EXPORT_SYMBOL_GPL(clk_enable); +/** + * clk_is_enabled_when_prepared - indicate if preparing a clock also enables it. + * @clk: clock source + * + * Returns true if clk_prepare() implicitly enables the clock, effectively + * making clk_enable()/clk_disable() no-ops, false otherwise. + * + * This is of interest mainly to power management code where actually + * disabling the clock also requires unpreparing it to have any material + * effect. + * + * Regardless of the value returned here, the caller must always invoke + * clk_enable() or clk_prepare_enable() and counterparts for usage counts + * to be right. + */ +bool clk_is_enabled_when_prepared(struct clk *clk) +{ + return clk && !(clk->core->ops->enable && clk->core->ops->disable); +} +EXPORT_SYMBOL_GPL(clk_is_enabled_when_prepared); + static int clk_core_prepare_enable(struct clk_core *core) { int ret; diff --git a/include/linux/clk.h b/include/linux/clk.h index 31ff1bf1b79f..a4a86aa8b11a 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -238,6 +238,7 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q) #endif +#ifdef CONFIG_HAVE_CLK_PREPARE /** * clk_prepare - prepare a clock source * @clk: clock source @@ -246,10 +247,26 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q) * * Must not be called from within atomic context. */ -#ifdef CONFIG_HAVE_CLK_PREPARE int clk_prepare(struct clk *clk); int __must_check clk_bulk_prepare(int num_clks, const struct clk_bulk_data *clks); + +/** + * clk_is_enabled_when_prepared - indicate if preparing a clock also enables it. + * @clk: clock source + * + * Returns true if clk_prepare() implicitly enables the clock, effectively + * making clk_enable()/clk_disable() no-ops, false otherwise. + * + * This is of interest mainly to the power management code where actually + * disabling the clock also requires unpreparing it to have any material + * effect. + * + * Regardless of the value returned here, the caller must always invoke + * clk_enable() or clk_prepare_enable() and counterparts for usage counts + * to be right. + */ +bool clk_is_enabled_when_prepared(struct clk *clk); #else static inline int clk_prepare(struct clk *clk) { @@ -263,6 +280,11 @@ clk_bulk_prepare(int num_clks, const struct clk_bulk_data *clks) might_sleep(); return 0; } + +static inline bool clk_is_enabled_when_prepared(struct clk *clk) +{ + return false; +} #endif /** diff --git a/include/linux/pm.h b/include/linux/pm.h index 47aca6bac1d6..482313a8ccfc 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -537,6 +537,8 @@ struct pm_subsys_data { spinlock_t lock; unsigned int refcount; #ifdef CONFIG_PM_CLK + unsigned int clock_op_might_sleep; + struct mutex clock_mutex; struct list_head clock_list; #endif #ifdef CONFIG_PM_GENERIC_DOMAINS From 6dc466d34f51767ad34fb900de8d278a66a3f1ed Mon Sep 17 00:00:00 2001 From: Abaci Team Date: Wed, 27 Jan 2021 16:42:05 +0800 Subject: [PATCH 11/12] PM: domains: Simplify the calculation of variables Fix the following coccicheck warnings: ./drivers/base/power/domain.c:938:31-33: WARNING !A || A && B is equivalent to !A || B. Reported-by: Abaci Robot Suggested-by: Jiapeng Zhong Signed-off-by: Abaci Team Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 50211a402fa5..aaf6c83b5cf6 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -966,8 +966,7 @@ static int genpd_runtime_resume(struct device *dev) err_stop: genpd_stop_dev(genpd, dev); err_poweroff: - if (!pm_runtime_is_irq_safe(dev) || - (pm_runtime_is_irq_safe(dev) && genpd_is_irq_safe(genpd))) { + if (!pm_runtime_is_irq_safe(dev) || genpd_is_irq_safe(genpd)) { genpd_lock(genpd); genpd_power_off(genpd, true, 0); genpd_unlock(genpd); From 1556057413a304b3020180240d798ec135d90844 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Mon, 1 Feb 2021 23:57:35 +0100 Subject: [PATCH 12/12] PM: sleep: Constify static struct attribute_group The only usage of suspend_attr_group is to put its address in an array of pointers to const attribute_group structs. Make it const to allow the compiler to put it into read-only memory. Signed-off-by: Rikard Falkeborn Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/main.c b/kernel/power/main.c index 0aefd6f57e0a..12c7e1bb442f 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -387,7 +387,7 @@ static struct attribute *suspend_attrs[] = { NULL, }; -static struct attribute_group suspend_attr_group = { +static const struct attribute_group suspend_attr_group = { .name = "suspend_stats", .attrs = suspend_attrs, };