diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 1e9797f96410..e937eb7355e7 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -23,6 +23,7 @@ properties: - enum: - qcom,qcm2290-cpufreq-hw - qcom,sc7180-cpufreq-hw + - qcom,sc8180x-cpufreq-hw - qcom,sdm670-cpufreq-hw - qcom,sdm845-cpufreq-hw - qcom,sm6115-cpufreq-hw @@ -34,7 +35,9 @@ properties: items: - enum: - qcom,qdu1000-cpufreq-epss + - qcom,sa8255p-cpufreq-epss - qcom,sa8775p-cpufreq-epss + - qcom,sar2130p-cpufreq-epss - qcom,sc7280-cpufreq-epss - qcom,sc8280xp-cpufreq-epss - qcom,sdx75-cpufreq-epss @@ -107,6 +110,7 @@ allOf: contains: enum: - qcom,qcm2290-cpufreq-hw + - qcom,sar2130p-cpufreq-epss then: properties: reg: @@ -130,7 +134,9 @@ allOf: contains: enum: - qcom,qdu1000-cpufreq-epss + - qcom,sa8255p-cpufreq-epss - qcom,sc7180-cpufreq-hw + - qcom,sc8180x-cpufreq-hw - qcom,sc8280xp-cpufreq-epss - qcom,sdm670-cpufreq-hw - qcom,sdm845-cpufreq-hw diff --git a/Documentation/devicetree/bindings/cpufreq/qemu,virtual-cpufreq.yaml b/Documentation/devicetree/bindings/cpufreq/qemu,virtual-cpufreq.yaml new file mode 100644 index 000000000000..018d98bcdc82 --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/qemu,virtual-cpufreq.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/cpufreq/qemu,virtual-cpufreq.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Virtual CPUFreq + +maintainers: + - David Dai + - Saravana Kannan + +description: + Virtual CPUFreq is a virtualized driver in guest kernels that sends performance + selection of its vCPUs as a hint to the host through MMIO regions. Each vCPU + is associated with a performance domain which can be shared with other vCPUs. + Each performance domain has its own set of registers for performance controls. + +properties: + compatible: + const: qemu,virtual-cpufreq + + reg: + maxItems: 1 + description: + Address and size of region containing performance controls for each of the + performance domains. Regions for each performance domain is placed + contiguously and contain registers for controlling DVFS(Dynamic Frequency + and Voltage) characteristics. The size of the region is proportional to + total number of performance domains. + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + soc { + #address-cells = <1>; + #size-cells = <1>; + + cpufreq@1040000 { + compatible = "qemu,virtual-cpufreq"; + reg = <0x1040000 0x2000>; + }; + }; diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi index 717ec4ad63f3..745a7d0b8381 100644 --- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi @@ -3889,7 +3889,7 @@ lmh@18358800 { }; cpufreq_hw: cpufreq@18323000 { - compatible = "qcom,cpufreq-hw"; + compatible = "qcom,sc8180x-cpufreq-hw", "qcom,cpufreq-hw"; reg = <0 0x18323000 0 0x1400>, <0 0x18325800 0 0x1400>; reg-names = "freq-domain0", "freq-domain1"; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 831fa4a12159..ce728cf7e301 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -578,7 +578,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx) * @dev: the target CPU * @index: the index of suggested state */ -static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) +static void acpi_idle_play_dead(struct cpuidle_device *dev, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); @@ -591,11 +591,8 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { io_idle(cx->address); } else - return -ENODEV; + return; } - - /* Never reached */ - return 0; } static __always_inline bool acpi_idle_fallback_to_c1(struct acpi_processor *pr) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 2561b215432a..92a83a9bb2e1 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -217,6 +217,20 @@ config CPUFREQ_DT If in doubt, say N. +config CPUFREQ_VIRT + tristate "Virtual cpufreq driver" + depends on GENERIC_ARCH_TOPOLOGY + help + This adds a virtualized cpufreq driver for guest kernels that + read/writes to a MMIO region for a virtualized cpufreq device to + communicate with the host. It sends performance requests to the host + which gets used as a hint to schedule vCPU threads and select CPU + frequency. If a VM does not support a virtualized FIE such as AMUs, + it updates the frequency scaling factor by polling host CPU frequency + to enable accurate Per-Entity Load Tracking for tasks running in the guest. + + If in doubt, say N. + config CPUFREQ_DT_PLATDEV tristate "Generic DT based cpufreq platdev driver" depends on OF diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 0f184031dd12..10d7d6e55da8 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o +obj-$(CONFIG_CPUFREQ_VIRT) += virtual-cpufreq.o # Traces CFLAGS_amd-pstate-trace.o := -I$(src) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 5d03a295a085..2fd0f6be6fa3 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -474,8 +474,8 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) rc = brcm_avs_get_pmap(priv, NULL); magic = readl(priv->base + AVS_MBOX_MAGIC); - return (magic == AVS_FIRMWARE_MAGIC) && ((rc != -ENOTSUPP) || - (rc != -EINVAL)); + return (magic == AVS_FIRMWARE_MAGIC) && (rc != -ENOTSUPP) && + (rc != -EINVAL); } static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 2b8708475ac7..bd8f75accfa0 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -36,33 +36,15 @@ static LIST_HEAD(cpu_data_list); static bool boost_supported; -struct cppc_workaround_oem_info { - char oem_id[ACPI_OEM_ID_SIZE + 1]; - char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; - u32 oem_revision; -}; - -static struct cppc_workaround_oem_info wa_info[] = { - { - .oem_id = "HISI ", - .oem_table_id = "HIP07 ", - .oem_revision = 0, - }, { - .oem_id = "HISI ", - .oem_table_id = "HIP08 ", - .oem_revision = 0, - } -}; - static struct cpufreq_driver cppc_cpufreq_driver; +#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE static enum { FIE_UNSET = -1, FIE_ENABLED, FIE_DISABLED } fie_disabled = FIE_UNSET; -#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE module_param(fie_disabled, int, 0444); MODULE_PARM_DESC(fie_disabled, "Disable Frequency Invariance Engine (FIE)"); @@ -78,7 +60,6 @@ struct cppc_freq_invariance { static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); static struct kthread_worker *kworker_fie; -static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu); static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, struct cppc_perf_fb_ctrs *fb_ctrs_t0, struct cppc_perf_fb_ctrs *fb_ctrs_t1); @@ -118,6 +99,9 @@ static void cppc_scale_freq_workfn(struct kthread_work *work) perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs, &fb_ctrs); + if (!perf) + return; + cppc_fi->prev_perf_fb_ctrs = fb_ctrs; perf <<= SCHED_CAPACITY_SHIFT; @@ -420,6 +404,9 @@ static int cppc_get_cpu_power(struct device *cpu_dev, struct cppc_cpudata *cpu_data; policy = cpufreq_cpu_get_raw(cpu_dev->id); + if (!policy) + return -EINVAL; + cpu_data = policy->driver_data; perf_caps = &cpu_data->perf_caps; max_cap = arch_scale_cpu_capacity(cpu_dev->id); @@ -487,6 +474,9 @@ static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz, int step; policy = cpufreq_cpu_get_raw(cpu_dev->id); + if (!policy) + return -EINVAL; + cpu_data = policy->driver_data; perf_caps = &cpu_data->perf_caps; max_cap = arch_scale_cpu_capacity(cpu_dev->id); @@ -724,13 +714,31 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, delta_delivered = get_delta(fb_ctrs_t1->delivered, fb_ctrs_t0->delivered); - /* Check to avoid divide-by zero and invalid delivered_perf */ + /* + * Avoid divide-by zero and unchanged feedback counters. + * Leave it for callers to handle. + */ if (!delta_reference || !delta_delivered) - return cpu_data->perf_ctrls.desired_perf; + return 0; return (reference_perf * delta_delivered) / delta_reference; } +static int cppc_get_perf_ctrs_sample(int cpu, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, + struct cppc_perf_fb_ctrs *fb_ctrs_t1) +{ + int ret; + + ret = cppc_get_perf_ctrs(cpu, fb_ctrs_t0); + if (ret) + return ret; + + udelay(2); /* 2usec delay between sampling */ + + return cppc_get_perf_ctrs(cpu, fb_ctrs_t1); +} + static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) { struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; @@ -746,18 +754,32 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) cpufreq_cpu_put(policy); - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0); - if (ret) - return 0; - - udelay(2); /* 2usec delay between sampling */ - - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1); - if (ret) - return 0; + ret = cppc_get_perf_ctrs_sample(cpu, &fb_ctrs_t0, &fb_ctrs_t1); + if (ret) { + if (ret == -EFAULT) + /* Any of the associated CPPC regs is 0. */ + goto out_invalid_counters; + else + return 0; + } delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0, &fb_ctrs_t1); + if (!delivered_perf) + goto out_invalid_counters; + + return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf); + +out_invalid_counters: + /* + * Feedback counters could be unchanged or 0 when a cpu enters a + * low-power idle state, e.g. clock-gated or power-gated. + * Use desired perf for reflecting frequency. Get the latest register + * value first as some platforms may update the actual delivered perf + * there; if failed, resort to the cached desired perf. + */ + if (cppc_get_desired_perf(cpu, &delivered_perf)) + delivered_perf = cpu_data->perf_ctrls.desired_perf; return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf); } @@ -812,57 +834,6 @@ static struct cpufreq_driver cppc_cpufreq_driver = { .name = "cppc_cpufreq", }; -/* - * HISI platform does not support delivered performance counter and - * reference performance counter. It can calculate the performance using the - * platform specific mechanism. We reuse the desired performance register to - * store the real performance calculated by the platform. - */ -static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) -{ - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct cppc_cpudata *cpu_data; - u64 desired_perf; - int ret; - - if (!policy) - return -ENODEV; - - cpu_data = policy->driver_data; - - cpufreq_cpu_put(policy); - - ret = cppc_get_desired_perf(cpu, &desired_perf); - if (ret < 0) - return -EIO; - - return cppc_perf_to_khz(&cpu_data->perf_caps, desired_perf); -} - -static void cppc_check_hisi_workaround(void) -{ - struct acpi_table_header *tbl; - acpi_status status = AE_OK; - int i; - - status = acpi_get_table(ACPI_SIG_PCCT, 0, &tbl); - if (ACPI_FAILURE(status) || !tbl) - return; - - for (i = 0; i < ARRAY_SIZE(wa_info); i++) { - if (!memcmp(wa_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) && - !memcmp(wa_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) && - wa_info[i].oem_revision == tbl->oem_revision) { - /* Overwrite the get() callback */ - cppc_cpufreq_driver.get = hisi_cppc_cpufreq_get_rate; - fie_disabled = FIE_DISABLED; - break; - } - } - - acpi_put_table(tbl); -} - static int __init cppc_cpufreq_init(void) { int ret; @@ -870,7 +841,6 @@ static int __init cppc_cpufreq_init(void) if (!acpi_cpc_valid()) return -ENODEV; - cppc_check_hisi_workaround(); cppc_freq_invariance_init(); populate_efficiency_class(); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 18942bfe9c95..2a3e8bd317c9 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -103,6 +103,7 @@ static const struct of_device_id allowlist[] __initconst = { * platforms using "operating-points-v2" property. */ static const struct of_device_id blocklist[] __initconst = { + { .compatible = "allwinner,sun50i-a100" }, { .compatible = "allwinner,sun50i-h6", }, { .compatible = "allwinner,sun50i-h616", }, { .compatible = "allwinner,sun50i-h618", }, diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c index 6a8e97896d38..ed1a6dbad638 100644 --- a/drivers/cpufreq/loongson2_cpufreq.c +++ b/drivers/cpufreq/loongson2_cpufreq.c @@ -148,7 +148,9 @@ static int __init cpufreq_init(void) ret = cpufreq_register_driver(&loongson2_cpufreq_driver); - if (!ret && !nowait) { + if (ret) { + platform_driver_unregister(&platform_driver); + } else if (!nowait) { saved_cpu_wait = cpu_wait; cpu_wait = loongson2_cpu_wait; } diff --git a/drivers/cpufreq/loongson3_cpufreq.c b/drivers/cpufreq/loongson3_cpufreq.c index 61ebebf69455..bd34bf0fafa5 100644 --- a/drivers/cpufreq/loongson3_cpufreq.c +++ b/drivers/cpufreq/loongson3_cpufreq.c @@ -346,8 +346,11 @@ static int loongson3_cpufreq_probe(struct platform_device *pdev) { int i, ret; - for (i = 0; i < MAX_PACKAGES; i++) - devm_mutex_init(&pdev->dev, &cpufreq_mutex[i]); + for (i = 0; i < MAX_PACKAGES; i++) { + ret = devm_mutex_init(&pdev->dev, &cpufreq_mutex[i]); + if (ret) + return ret; + } ret = do_service_request(0, 0, CMD_GET_VERSION, 0, 0); if (ret <= 0) diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index f7db5f4ad306..9252ebd60373 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -62,7 +62,7 @@ mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *uW, policy = cpufreq_cpu_get_raw(cpu_dev->id); if (!policy) - return 0; + return -EINVAL; data = policy->driver_data; diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 5892c73e129d..07d6f9a9b7c8 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -287,7 +287,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) ret = cpufreq_enable_boost_support(); if (ret) { dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); - goto out_free_opp; + goto out_free_table; } else { scmi_cpufreq_hw_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; scmi_cpufreq_driver.boost_enabled = true; @@ -296,6 +296,8 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) return 0; +out_free_table: + dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); out_free_opp: dev_pm_opp_remove_all_dynamic(cpu_dev); diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 352e1a69a85e..17d6a149f580 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -22,6 +22,9 @@ #define NVMEM_MASK 0x7 #define NVMEM_SHIFT 5 +#define SUN50I_A100_NVMEM_MASK 0xf +#define SUN50I_A100_NVMEM_SHIFT 12 + static struct platform_device *cpufreq_dt_pdev, *sun50i_cpufreq_pdev; struct sunxi_cpufreq_data { @@ -45,6 +48,23 @@ static u32 sun50i_h6_efuse_xlate(u32 speedbin) return 0; } +static u32 sun50i_a100_efuse_xlate(u32 speedbin) +{ + u32 efuse_value; + + efuse_value = (speedbin >> SUN50I_A100_NVMEM_SHIFT) & + SUN50I_A100_NVMEM_MASK; + + switch (efuse_value) { + case 0b100: + return 2; + case 0b010: + return 1; + default: + return 0; + } +} + static int get_soc_id_revision(void) { #ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY @@ -108,6 +128,10 @@ static struct sunxi_cpufreq_data sun50i_h6_cpufreq_data = { .efuse_xlate = sun50i_h6_efuse_xlate, }; +static struct sunxi_cpufreq_data sun50i_a100_cpufreq_data = { + .efuse_xlate = sun50i_a100_efuse_xlate, +}; + static struct sunxi_cpufreq_data sun50i_h616_cpufreq_data = { .efuse_xlate = sun50i_h616_efuse_xlate, }; @@ -116,6 +140,9 @@ static const struct of_device_id cpu_opp_match_list[] = { { .compatible = "allwinner,sun50i-h6-operating-points", .data = &sun50i_h6_cpufreq_data, }, + { .compatible = "allwinner,sun50i-a100-operating-points", + .data = &sun50i_a100_cpufreq_data, + }, { .compatible = "allwinner,sun50i-h616-operating-points", .data = &sun50i_h616_cpufreq_data, }, @@ -291,6 +318,7 @@ static struct platform_driver sun50i_cpufreq_driver = { static const struct of_device_id sun50i_cpufreq_match_list[] = { { .compatible = "allwinner,sun50i-h6" }, + { .compatible = "allwinner,sun50i-a100" }, { .compatible = "allwinner,sun50i-h616" }, { .compatible = "allwinner,sun50i-h618" }, { .compatible = "allwinner,sun50i-h700" }, diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index ba621ce1cdda..5a5147277cd0 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -93,6 +93,8 @@ struct ti_cpufreq_soc_data { bool multi_regulator; /* Backward compatibility hack: Might have missing syscon */ #define TI_QUIRK_SYSCON_MAY_BE_MISSING 0x1 +/* Backward compatibility hack: new syscon size is 1 register wide */ +#define TI_QUIRK_SYSCON_IS_SINGLE_REG 0x2 u8 quirks; }; @@ -316,8 +318,8 @@ static struct ti_cpufreq_soc_data am625_soc_data = { .efuse_offset = 0x0018, .efuse_mask = 0x07c0, .efuse_shift = 0x6, - .rev_offset = 0x0014, .multi_regulator = false, + .quirks = TI_QUIRK_SYSCON_IS_SINGLE_REG, }; static struct ti_cpufreq_soc_data am62a7_soc_data = { @@ -325,7 +327,6 @@ static struct ti_cpufreq_soc_data am62a7_soc_data = { .efuse_offset = 0x0, .efuse_mask = 0x07c0, .efuse_shift = 0x6, - .rev_offset = 0x0014, .multi_regulator = false, }; @@ -334,7 +335,6 @@ static struct ti_cpufreq_soc_data am62p5_soc_data = { .efuse_offset = 0x0, .efuse_mask = 0x07c0, .efuse_shift = 0x6, - .rev_offset = 0x0014, .multi_regulator = false, }; @@ -354,6 +354,10 @@ static int ti_cpufreq_get_efuse(struct ti_cpufreq_data *opp_data, ret = regmap_read(opp_data->syscon, opp_data->soc_data->efuse_offset, &efuse); + + if (opp_data->soc_data->quirks & TI_QUIRK_SYSCON_IS_SINGLE_REG && ret == -EIO) + ret = regmap_read(opp_data->syscon, 0x0, &efuse); + if (opp_data->soc_data->quirks & TI_QUIRK_SYSCON_MAY_BE_MISSING && ret == -EIO) { /* not a syscon register! */ void __iomem *regs = ioremap(OMAP3_SYSCON_BASE + diff --git a/drivers/cpufreq/virtual-cpufreq.c b/drivers/cpufreq/virtual-cpufreq.c new file mode 100644 index 000000000000..a050b3a6737f --- /dev/null +++ b/drivers/cpufreq/virtual-cpufreq.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * CPU0..CPUn + * +-------------+-------------------------------+--------+-------+ + * | Register | Description | Offset | Len | + * +-------------+-------------------------------+--------+-------+ + * | cur_perf | read this register to get | 0x0 | 0x4 | + * | | the current perf (integer val | | | + * | | representing perf relative to | | | + * | | max performance) | | | + * | | that vCPU is running at | | | + * +-------------+-------------------------------+--------+-------+ + * | set_perf | write to this register to set | 0x4 | 0x4 | + * | | perf value of the vCPU | | | + * +-------------+-------------------------------+--------+-------+ + * | perftbl_len | number of entries in perf | 0x8 | 0x4 | + * | | table. A single entry in the | | | + * | | perf table denotes no table | | | + * | | and the entry contains | | | + * | | the maximum perf value | | | + * | | that this vCPU supports. | | | + * | | The guest can request any | | | + * | | value between 1 and max perf | | | + * | | when perftbls are not used. | | | + * +---------------------------------------------+--------+-------+ + * | perftbl_sel | write to this register to | 0xc | 0x4 | + * | | select perf table entry to | | | + * | | read from | | | + * +---------------------------------------------+--------+-------+ + * | perftbl_rd | read this register to get | 0x10 | 0x4 | + * | | perf value of the selected | | | + * | | entry based on perftbl_sel | | | + * +---------------------------------------------+--------+-------+ + * | perf_domain | performance domain number | 0x14 | 0x4 | + * | | that this vCPU belongs to. | | | + * | | vCPUs sharing the same perf | | | + * | | domain number are part of the | | | + * | | same performance domain. | | | + * +-------------+-------------------------------+--------+-------+ + */ + +#define REG_CUR_PERF_STATE_OFFSET 0x0 +#define REG_SET_PERF_STATE_OFFSET 0x4 +#define REG_PERFTBL_LEN_OFFSET 0x8 +#define REG_PERFTBL_SEL_OFFSET 0xc +#define REG_PERFTBL_RD_OFFSET 0x10 +#define REG_PERF_DOMAIN_OFFSET 0x14 +#define PER_CPU_OFFSET 0x1000 + +#define PERFTBL_MAX_ENTRIES 64U + +static void __iomem *base; +static DEFINE_PER_CPU(u32, perftbl_num_entries); + +static void virt_scale_freq_tick(void) +{ + int cpu = smp_processor_id(); + u32 max_freq = (u32)cpufreq_get_hw_max_freq(cpu); + u64 cur_freq; + unsigned long scale; + + cur_freq = (u64)readl_relaxed(base + cpu * PER_CPU_OFFSET + + REG_CUR_PERF_STATE_OFFSET); + + cur_freq <<= SCHED_CAPACITY_SHIFT; + scale = (unsigned long)div_u64(cur_freq, max_freq); + scale = min(scale, SCHED_CAPACITY_SCALE); + + this_cpu_write(arch_freq_scale, scale); +} + +static struct scale_freq_data virt_sfd = { + .source = SCALE_FREQ_SOURCE_VIRT, + .set_freq_scale = virt_scale_freq_tick, +}; + +static unsigned int virt_cpufreq_set_perf(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + writel_relaxed(target_freq, + base + policy->cpu * PER_CPU_OFFSET + REG_SET_PERF_STATE_OFFSET); + return 0; +} + +static unsigned int virt_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + virt_cpufreq_set_perf(policy, target_freq); + return target_freq; +} + +static u32 virt_cpufreq_get_perftbl_entry(int cpu, u32 idx) +{ + writel_relaxed(idx, base + cpu * PER_CPU_OFFSET + + REG_PERFTBL_SEL_OFFSET); + return readl_relaxed(base + cpu * PER_CPU_OFFSET + + REG_PERFTBL_RD_OFFSET); +} + +static int virt_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + struct cpufreq_freqs freqs; + int ret = 0; + + freqs.old = policy->cur; + freqs.new = target_freq; + + cpufreq_freq_transition_begin(policy, &freqs); + ret = virt_cpufreq_set_perf(policy, target_freq); + cpufreq_freq_transition_end(policy, &freqs, ret != 0); + + return ret; +} + +static int virt_cpufreq_get_sharing_cpus(struct cpufreq_policy *policy) +{ + u32 cur_perf_domain, perf_domain; + struct device *cpu_dev; + int cpu; + + cur_perf_domain = readl_relaxed(base + policy->cpu * + PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET); + + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + continue; + + perf_domain = readl_relaxed(base + cpu * + PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET); + + if (perf_domain == cur_perf_domain) + cpumask_set_cpu(cpu, policy->cpus); + } + + return 0; +} + +static int virt_cpufreq_get_freq_info(struct cpufreq_policy *policy) +{ + struct cpufreq_frequency_table *table; + u32 num_perftbl_entries, idx; + + num_perftbl_entries = per_cpu(perftbl_num_entries, policy->cpu); + + if (num_perftbl_entries == 1) { + policy->cpuinfo.min_freq = 1; + policy->cpuinfo.max_freq = virt_cpufreq_get_perftbl_entry(policy->cpu, 0); + + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; + + policy->cur = policy->max; + return 0; + } + + table = kcalloc(num_perftbl_entries + 1, sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + for (idx = 0; idx < num_perftbl_entries; idx++) + table[idx].frequency = virt_cpufreq_get_perftbl_entry(policy->cpu, idx); + + table[idx].frequency = CPUFREQ_TABLE_END; + policy->freq_table = table; + + return 0; +} + +static int virt_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + struct device *cpu_dev; + int ret; + + cpu_dev = get_cpu_device(policy->cpu); + if (!cpu_dev) + return -ENODEV; + + ret = virt_cpufreq_get_freq_info(policy); + if (ret) { + dev_warn(cpu_dev, "failed to get cpufreq info\n"); + return ret; + } + + ret = virt_cpufreq_get_sharing_cpus(policy); + if (ret) { + dev_warn(cpu_dev, "failed to get sharing cpumask\n"); + return ret; + } + + /* + * To simplify and improve latency of handling frequency requests on + * the host side, this ensures that the vCPU thread triggering the MMIO + * abort is the same thread whose performance constraints (Ex. uclamp + * settings) need to be updated. This simplifies the VMM (Virtual + * Machine Manager) having to find the correct vCPU thread and/or + * facing permission issues when configuring other threads. + */ + policy->dvfs_possible_from_any_cpu = false; + policy->fast_switch_possible = true; + + /* + * Using the default SCALE_FREQ_SOURCE_CPUFREQ is insufficient since + * the actual physical CPU frequency may not match requested frequency + * from the vCPU thread due to frequency update latencies or other + * inputs to the physical CPU frequency selection. This additional FIE + * source allows for more accurate freq_scale updates and only takes + * effect if another FIE source such as AMUs have not been registered. + */ + topology_set_scale_freq_source(&virt_sfd, policy->cpus); + + return 0; +} + +static void virt_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_VIRT, policy->related_cpus); + kfree(policy->freq_table); +} + +static int virt_cpufreq_online(struct cpufreq_policy *policy) +{ + /* Nothing to restore. */ + return 0; +} + +static int virt_cpufreq_offline(struct cpufreq_policy *policy) +{ + /* Dummy offline() to avoid exit() being called and freeing resources. */ + return 0; +} + +static int virt_cpufreq_verify_policy(struct cpufreq_policy_data *policy) +{ + if (policy->freq_table) + return cpufreq_frequency_table_verify(policy, policy->freq_table); + + cpufreq_verify_within_cpu_limits(policy); + return 0; +} + +static struct cpufreq_driver cpufreq_virt_driver = { + .name = "virt-cpufreq", + .init = virt_cpufreq_cpu_init, + .exit = virt_cpufreq_cpu_exit, + .online = virt_cpufreq_online, + .offline = virt_cpufreq_offline, + .verify = virt_cpufreq_verify_policy, + .target = virt_cpufreq_target, + .fast_switch = virt_cpufreq_fast_switch, + .attr = cpufreq_generic_attr, +}; + +static int virt_cpufreq_driver_probe(struct platform_device *pdev) +{ + u32 num_perftbl_entries; + int ret, cpu; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + for_each_possible_cpu(cpu) { + num_perftbl_entries = readl_relaxed(base + cpu * PER_CPU_OFFSET + + REG_PERFTBL_LEN_OFFSET); + + if (!num_perftbl_entries || num_perftbl_entries > PERFTBL_MAX_ENTRIES) + return -ENODEV; + + per_cpu(perftbl_num_entries, cpu) = num_perftbl_entries; + } + + ret = cpufreq_register_driver(&cpufreq_virt_driver); + if (ret) { + dev_err(&pdev->dev, "Virtual CPUFreq driver failed to register: %d\n", ret); + return ret; + } + + dev_dbg(&pdev->dev, "Virtual CPUFreq driver initialized\n"); + return 0; +} + +static void virt_cpufreq_driver_remove(struct platform_device *pdev) +{ + cpufreq_unregister_driver(&cpufreq_virt_driver); +} + +static const struct of_device_id virt_cpufreq_match[] = { + { .compatible = "qemu,virtual-cpufreq", .data = NULL}, + {} +}; +MODULE_DEVICE_TABLE(of, virt_cpufreq_match); + +static struct platform_driver virt_cpufreq_driver = { + .probe = virt_cpufreq_driver_probe, + .remove = virt_cpufreq_driver_remove, + .driver = { + .name = "virt-cpufreq", + .of_match_table = virt_cpufreq_match, + }, +}; + +static int __init virt_cpufreq_init(void) +{ + return platform_driver_register(&virt_cpufreq_driver); +} +postcore_initcall(virt_cpufreq_init); + +static void __exit virt_cpufreq_exit(void) +{ + platform_driver_unregister(&virt_cpufreq_driver); +} +module_exit(virt_cpufreq_exit); + +MODULE_DESCRIPTION("Virtual cpufreq driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 06ace16f9e71..0835da449db8 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -69,11 +69,15 @@ int cpuidle_play_dead(void) if (!drv) return -ENODEV; - /* Find lowest-power state that supports long-term idle */ - for (i = drv->state_count - 1; i >= 0; i--) + for (i = drv->state_count - 1; i >= 0; i--) { if (drv->states[i].enter_dead) - return drv->states[i].enter_dead(dev, i); + drv->states[i].enter_dead(dev, i); + } + /* + * If :enter_dead() is successful, it will never return, so reaching + * here means that all of them failed above or were not present. + */ return -ENODEV; } diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 4a952c4885ed..2222e8b03ff4 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -45,6 +45,7 @@ enum scale_freq_source { SCALE_FREQ_SOURCE_CPUFREQ = 0, SCALE_FREQ_SOURCE_ARCH, SCALE_FREQ_SOURCE_CPPC, + SCALE_FREQ_SOURCE_VIRT, }; struct scale_freq_data { diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3183aeb7f5b4..a9ee4fe55dcf 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -61,7 +61,7 @@ struct cpuidle_state { struct cpuidle_driver *drv, int index); - int (*enter_dead) (struct cpuidle_device *dev, int index); + void (*enter_dead) (struct cpuidle_device *dev, int index); /* * CPUs execute ->enter_s2idle with the local tick or entire timekeeping