From 5b2ad5acaf5aa8a1ff441665967cf728a46ac967 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 5 Dec 2022 19:43:43 -0600 Subject: [PATCH 01/11] dt-bindings: opp: opp-v2-kryo-cpu: Add missing 'cache-unified' property in example The examples' cache nodes are incomplete as 'cache-unified' is a required cache property for unified caches which an L2 cache certainly is. Signed-off-by: Rob Herring Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml index 60cf3cbde4c5..b4947b326773 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml @@ -106,6 +106,7 @@ examples: L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -140,6 +141,7 @@ examples: L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; From cea7be909414d941a4616e6794f4a5282eb6e652 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Jan 2023 16:38:02 -0800 Subject: [PATCH 02/11] drivers/opp: Remove "select SRCU" Now that the SRCU Kconfig option is unconditionally selected, there is no longer any point in selecting it. Therefore, remove the "select SRCU" Kconfig statements. Signed-off-by: Paul E. McKenney Cc: Viresh Kumar Cc: Nishanth Menon Cc: Stephen Boyd Cc: Signed-off-by: Viresh Kumar --- drivers/opp/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/opp/Kconfig b/drivers/opp/Kconfig index e8ce47b32735..d7c649a1a981 100644 --- a/drivers/opp/Kconfig +++ b/drivers/opp/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config PM_OPP bool - select SRCU help SOCs have a standard set of tuples consisting of frequency and voltage pairs that the device will support per voltage domain. This From 1b6599f741a4525ca761ecde46e5885ff1e6ba58 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 3 Jan 2023 20:57:26 +0800 Subject: [PATCH 03/11] powercap: fix possible name leak in powercap_register_zone() In the error path after calling dev_set_name(), the device name is leaked. To fix this, calling dev_set_name() before device_register(), and call put_device() if it returns error. All the resources is released in powercap_release(), so it can return from powercap_register_zone() directly. Fixes: 75d2364ea0ca ("PowerCap: Add class driver") Signed-off-by: Yang Yingliang Signed-off-by: Rafael J. Wysocki --- drivers/powercap/powercap_sys.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 1f968353d479..e180dee0f83d 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -530,9 +530,6 @@ struct powercap_zone *powercap_register_zone( power_zone->name = kstrdup(name, GFP_KERNEL); if (!power_zone->name) goto err_name_alloc; - dev_set_name(&power_zone->dev, "%s:%x", - dev_name(power_zone->dev.parent), - power_zone->id); power_zone->constraints = kcalloc(nr_constraints, sizeof(*power_zone->constraints), GFP_KERNEL); @@ -555,9 +552,16 @@ struct powercap_zone *powercap_register_zone( power_zone->dev_attr_groups[0] = &power_zone->dev_zone_attr_group; power_zone->dev_attr_groups[1] = NULL; power_zone->dev.groups = power_zone->dev_attr_groups; + dev_set_name(&power_zone->dev, "%s:%x", + dev_name(power_zone->dev.parent), + power_zone->id); result = device_register(&power_zone->dev); - if (result) - goto err_dev_ret; + if (result) { + put_device(&power_zone->dev); + mutex_unlock(&control_type->lock); + + return ERR_PTR(result); + } control_type->nr_zones++; mutex_unlock(&control_type->lock); From bdaad038cc3c620a769f2156e7c9aab8605411c2 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 4 Jan 2023 22:36:01 +0800 Subject: [PATCH 04/11] powercap: intel_rapl: add support for Meteor Lake Add Meteor Lake to the list of supported processor models in the Intel RAPL power capping driver. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 26d00b1853b4..ca6ff27b4384 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1113,6 +1113,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core), From 7adc6885259edd4ef5c9a7a62fd4270cf38fdbfb Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 4 Jan 2023 22:36:02 +0800 Subject: [PATCH 05/11] powercap: intel_rapl: add support for Emerald Rapids Add Emerald Rapids to the list of supported processor models in the Intel RAPL power capping driver. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index ca6ff27b4384..9a9192fc8391 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1116,6 +1116,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), From c7cd6f04c0dfb6d44337f92b4c32126d20339873 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 17 Jan 2023 10:22:40 -0800 Subject: [PATCH 06/11] powercap: idle_inject: Support 100% idle injection The users of the idle injection framework allow 100% idle injection. For example: thermal/cpuidle_cooling.c driver. When the ratio is set to 100%, the runtime_duration becomes zero. However, idle_inject_set_duration() in the idle injection framework silently ignores run_duration_us == 0 without any error (it is a void function). The caller will then assume that everything is fine and 100% idle is effective, but in reality the idle duration will not change. There are two options: - The caller may change their max state to 99% instead of 100% and document that 100% is not supported by the idle inject framework. - Add 100% idle support to the idle inject framework. Since there are other protections via RT throttling, this framework can allow 100% idle. The RT throttling will be activated at 95% idle by default. The caller disabling RT throttling and injecting 100% idle, should be aware that CPU can't be used at all. The idle inject timer is started for (run_duration_us + idle_duration_us) duration. Hence replace (run_duration_us && idle_duration_us) with (run_duration_us + idle_duration_us) in the function idle_inject_set_duration(). Also check for !(run_duration_us + idle_duration_us) to return -EINVAL in idle_inject_start(). Signed-off-by: Srinivas Pandruvada Acked-by: Daniel Lezcano [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index fe86a09e3b67..c03b5402c03b 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -155,10 +155,12 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev, unsigned int run_duration_us, unsigned int idle_duration_us) { - if (run_duration_us && idle_duration_us) { + if (run_duration_us + idle_duration_us) { WRITE_ONCE(ii_dev->run_duration_us, run_duration_us); WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); } + if (!run_duration_us) + pr_debug("CPU is forced to 100 percent idle\n"); } /** @@ -201,7 +203,7 @@ int idle_inject_start(struct idle_inject_device *ii_dev) unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us); - if (!idle_duration_us || !run_duration_us) + if (!(idle_duration_us + run_duration_us)) return -EINVAL; pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", From 68d8ad3bd9c397f2bf009368cb13e48cb91ea018 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 16 Jan 2023 10:38:42 +0100 Subject: [PATCH 07/11] dt-bindings: opp: v2-qcom-level: Let qcom,opp-fuse-level be a 2-long array In some instances (particularly with CPRh) we might want to specifiy more than one qcom,opp-fuse-level, as the same OPP subnodes may be used by different "CPR threads". We need to make sure that n = num_threads entries is legal and so far nobody seems to use more than two, so let's allow that. Acked-by: Rob Herring Signed-off-by: Konrad Dybcio Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml b/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml index b9ce2e099ce9..a30ef93213c0 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml @@ -30,7 +30,9 @@ patternProperties: this OPP node. Sometimes several corners/levels shares a certain fuse corner/level. A fuse corner/level contains e.g. ref uV, min uV, and max uV. - $ref: /schemas/types.yaml#/definitions/uint32 + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 2 required: - opp-level From eca4c0eea53432ec4b711b2a8ad282cbad231b4f Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Wed, 8 Feb 2023 12:00:37 +0800 Subject: [PATCH 08/11] OPP: fix error checking in opp_migrate_dentry() Since commit ff9fb72bc077 ("debugfs: return error values, not NULL") changed return value of debugfs_rename() in error cases from %NULL to %ERR_PTR(-ERROR), we should also check error values instead of NULL. Fixes: ff9fb72bc077 ("debugfs: return error values, not NULL") Signed-off-by: Qi Zheng Signed-off-by: Viresh Kumar --- drivers/opp/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 96a30a032c5f..2c7fb683441e 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -235,7 +235,7 @@ static void opp_migrate_dentry(struct opp_device *opp_dev, dentry = debugfs_rename(rootdir, opp_dev->dentry, rootdir, opp_table->dentry_name); - if (!dentry) { + if (IS_ERR(dentry)) { dev_err(dev, "%s: Failed to rename link from: %s to %s\n", __func__, dev_name(opp_dev->dev), dev_name(dev)); return; From 0b6200e1e9f53dabdc30d0f6c51af9a5f664d32b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 Feb 2023 15:15:45 +0100 Subject: [PATCH 09/11] PM: domains: fix memory leak with using debugfs_lookup() When calling debugfs_lookup() the result must have dput() called on it, otherwise the memory will leak over time. To make things simpler, just call debugfs_lookup_and_remove() instead which handles all of the logic at once. Signed-off-by: Greg Kroah-Hartman Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 967bcf9d415e..6097644ebdc5 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -220,13 +220,10 @@ static void genpd_debug_add(struct generic_pm_domain *genpd); static void genpd_debug_remove(struct generic_pm_domain *genpd) { - struct dentry *d; - if (!genpd_debugfs_dir) return; - d = debugfs_lookup(genpd->name, genpd_debugfs_dir); - debugfs_remove(d); + debugfs_lookup_and_remove(genpd->name, genpd_debugfs_dir); } static void genpd_update_accounting(struct generic_pm_domain *genpd) From a0e8c13ccd6a9a636d27353da62c2410c4eca337 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 Feb 2023 16:15:15 +0100 Subject: [PATCH 10/11] PM: EM: fix memory leak with using debugfs_lookup() When calling debugfs_lookup() the result must have dput() called on it, otherwise the memory will leak over time. To make things simpler, just call debugfs_lookup_and_remove() instead which handles all of the logic at once. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index f82111837b8d..7b44f5b89fa1 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -87,10 +87,7 @@ static void em_debug_create_pd(struct device *dev) static void em_debug_remove_pd(struct device *dev) { - struct dentry *debug_dir; - - debug_dir = debugfs_lookup(dev_name(dev), rootdir); - debugfs_remove_recursive(debug_dir); + debugfs_lookup_and_remove(dev_name(dev), rootdir); } static int __init em_debug_init(void) From cf835b005b2857c2fd763a006c1957f332e5254b Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 11 Feb 2023 11:17:10 +0800 Subject: [PATCH 11/11] powercap: intel_rapl: Fix handling for large time window When setting the power limit time window, software updates the 'y' bits and 'f' bits in the power limit register, and the value hardware takes follows the formula below Time window = 2 ^ y * (1 + f / 4) * Time_Unit When handling large time window input from userspace, using left shifting breaks in two cases: 1. when ilog2(value) is bigger than 31, in expression "1 << y", left shifting by more than 31 bits has undefined behavior. This breaks 'y'. For example, on an Alderlake platform, "1 << 32" returns 1. 2. when ilog2(value) equals 31, "1 << 31" returns negative value because '1' is recognized as signed int. And this breaks 'f'. Given that 'y' has 5 bits and hardware can never take a value larger than 31, fix the first problem by clamp the time window to the maximum possible value that the hardware can take. Fix the second problem by using unsigned bit left shift. Note that hardware has its own maximum time window limitation, which may be lower than the time window value retrieved from the power limit register. When this happens, hardware clamps the input to its maximum time window limitation. That is why a software clamp is preferred to handle the problem on hand. Signed-off-by: Zhang Rui [ rjw: Adjusted the comment added by this change ] Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 9a9192fc8391..8970c7b80884 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -999,7 +999,15 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, do_div(value, rp->time_unit); y = ilog2(value); - f = div64_u64(4 * (value - (1 << y)), 1 << y); + + /* + * The target hardware field is 7 bits wide, so return all ones + * if the exponent is too large. + */ + if (y > 0x1f) + return 0x7f; + + f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y); value = (y & 0x1f) | ((f & 0x3) << 5); } return value;