mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-06 05:06:29 +00:00
Power management updates for 5.17-rc1
- Add new P-state driver for AMD processors (Huang Rui). - Fix initialization of min and max frequency QoS requests in the cpufreq core (Rafael Wysocki). - Fix EPP handling on Alder Lake in intel_pstate (Srinivas Pandruvada). - Make intel_pstate update cpuinfo.max_freq when notified of HWP capabilities changes and drop a redundant function call from that driver (Rafael Wysocki). - Improve IRQ support in the Qcom cpufreq driver (Ard Biesheuvel, Stephen Boyd, Vladimir Zapolskiy). - Fix double devm_remap() in the Mediatek cpufreq driver (Hector Yuan). - Introduce thermal pressure helpers for cpufreq CPU cooling (Lukasz Luba). - Make cpufreq use default_groups in kobj_type (Greg Kroah-Hartman). - Make cpuidle use default_groups in kobj_type (Greg Kroah-Hartman). - Fix two comments in cpuidle code (Jason Wang, Yang Li). - Allow model-specific normal EPB value to be used in the intel_epb sysfs attribute handling code (Srinivas Pandruvada). - Simplify locking in pm_runtime_put_suppliers() (Rafael Wysocki). - Add safety net to supplier device release in the runtime PM core code (Rafael Wysocki). - Capture device status before disabling runtime PM for it (Rafael Wysocki). - Add new macros for declaring PM operations to allow drivers to avoid guarding them with CONFIG_PM #ifdefs or __maybe_unused and update some drivers to use these macros (Paul Cercueil). - Allow ACPI hardware signature to be honoured during restore from hibernation (David Woodhouse). - Update outdated operating performance points (OPP) documentation (Tang Yizhou). - Reduce log severity for informative message regarding frequency transition failures in devfreq (Tzung-Bi Shih). - Add DRAM frequency controller devfreq driver for Allwinner sunXi SoCs (Samuel Holland). - Add missing COMMON_CLK dependency to sun8i devfreq driver (Arnd Bergmann). - Add support for new layout of Psys PowerLimit Register on SPR to the Intel RAPL power capping driver (Zhang Rui). - Fix typo in a comment in idle_inject.c (Jason Wang). - Remove unused function definition from the DTPM (Dynamit Thermal Power Management) power capping framework (Daniel Lezcano). - Reduce DTPM trace verbosity (Daniel Lezcano). -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEE4fcc61cGeeHD/fCwgsRv/nhiVHEFAmHcgkgSHHJqd0Byand5 c29ja2kubmV0AAoJEILEb/54YlRxs34P/3kFhRk7qrwEekx6F11im6caLKT9+Qap PuGVqfTbK7TupVQDVGFBEjTjgKY7Ph7Fcr4bqn6wvNOp96cjXyOSk/c1fcpS3Bpr b1PYsFsb9diNKE462sGGYClyCT3X5qQqtpxzOl3g4I1PWKTC1mKFm4Jm2m6S6cFq DKhsgYKFzQSZNb1wJM4JjHS9c3BRygqp4nfEAmifu5b9tLZf7stWnFHhbGq63M9m OwHOrEEnzhf4pOXGZTvIXeczgE6IcuDdlGkIg7XMHnmKSNvj1HqhEgi2lfSRb98z 5eI4S6JymCJGVK+gr8iVCq1iJ+LKqV3YPXRqvI35/+NqIKYxMt2ZivQQf5s3aQLe 26gUulD3O6Pz5tMlwcDElD4/tcClfg35PCD/VzpRR8TAo8vLBb63kZ5v6+HM34ZJ 6QbLTNZJTnGmEqxMccUxP+HhZz8ssqpLAC+R2sE5yXbNpIZq8CbPiGb65RGiX3SG CmRKqH/xQVNKBYP0ChjmUyhKcBxOnx1Xu8AhsN7gRAy0aht7j7OdjTnJuGiX6gu3 Q5WxvVvkekyfhuFQ5TST9y/fzvMJWzeaA6GhVIr6RoBmshNQGTb0H4HXARxS3Ah5 qjd7ao7BFLa898FCHaHIpmFWp0wF5iljwCJQVP3I2qUpPvDJxEtsxc4CF/AZzyNR VudoFqLoIV5C =1egI -----END PGP SIGNATURE----- Merge tag 'pm-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm Pull power management updates from Rafael Wysocki: "The most signigicant change here is the addition of a new cpufreq 'P-state' driver for AMD processors as a better replacement for the venerable acpi-cpufreq driver. There are also other cpufreq updates (in the core, intel_pstate, ARM drivers), PM core updates (mostly related to adding new macros for declaring PM operations which should make the lives of driver developers somewhat easier), and a bunch of assorted fixes and cleanups. Summary: - Add new P-state driver for AMD processors (Huang Rui). - Fix initialization of min and max frequency QoS requests in the cpufreq core (Rafael Wysocki). - Fix EPP handling on Alder Lake in intel_pstate (Srinivas Pandruvada). - Make intel_pstate update cpuinfo.max_freq when notified of HWP capabilities changes and drop a redundant function call from that driver (Rafael Wysocki). - Improve IRQ support in the Qcom cpufreq driver (Ard Biesheuvel, Stephen Boyd, Vladimir Zapolskiy). - Fix double devm_remap() in the Mediatek cpufreq driver (Hector Yuan). - Introduce thermal pressure helpers for cpufreq CPU cooling (Lukasz Luba). - Make cpufreq use default_groups in kobj_type (Greg Kroah-Hartman). - Make cpuidle use default_groups in kobj_type (Greg Kroah-Hartman). - Fix two comments in cpuidle code (Jason Wang, Yang Li). - Allow model-specific normal EPB value to be used in the intel_epb sysfs attribute handling code (Srinivas Pandruvada). - Simplify locking in pm_runtime_put_suppliers() (Rafael Wysocki). - Add safety net to supplier device release in the runtime PM core code (Rafael Wysocki). - Capture device status before disabling runtime PM for it (Rafael Wysocki). - Add new macros for declaring PM operations to allow drivers to avoid guarding them with CONFIG_PM #ifdefs or __maybe_unused and update some drivers to use these macros (Paul Cercueil). - Allow ACPI hardware signature to be honoured during restore from hibernation (David Woodhouse). - Update outdated operating performance points (OPP) documentation (Tang Yizhou). - Reduce log severity for informative message regarding frequency transition failures in devfreq (Tzung-Bi Shih). - Add DRAM frequency controller devfreq driver for Allwinner sunXi SoCs (Samuel Holland). - Add missing COMMON_CLK dependency to sun8i devfreq driver (Arnd Bergmann). - Add support for new layout of Psys PowerLimit Register on SPR to the Intel RAPL power capping driver (Zhang Rui). - Fix typo in a comment in idle_inject.c (Jason Wang). - Remove unused function definition from the DTPM (Dynamit Thermal Power Management) power capping framework (Daniel Lezcano). - Reduce DTPM trace verbosity (Daniel Lezcano)" * tag 'pm-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (53 commits) x86, sched: Fix undefined reference to init_freq_invariance_cppc() build error cpufreq: amd-pstate: Fix Kconfig dependencies for AMD P-State cpufreq: amd-pstate: Fix struct amd_cpudata kernel-doc comment cpuidle: use default_groups in kobj_type x86: intel_epb: Allow model specific normal EPB value MAINTAINERS: Add AMD P-State driver maintainer entry Documentation: amd-pstate: Add AMD P-State driver introduction cpufreq: amd-pstate: Add AMD P-State performance attributes cpufreq: amd-pstate: Add AMD P-State frequencies attributes cpufreq: amd-pstate: Add boost mode support for AMD P-State cpufreq: amd-pstate: Add trace for AMD P-State module cpufreq: amd-pstate: Introduce the support for the processors with shared memory solution cpufreq: amd-pstate: Add fast switch function for AMD P-State cpufreq: amd-pstate: Introduce a new AMD P-State driver to support future processors ACPI: CPPC: Add CPPC enable register function ACPI: CPPC: Check present CPUs for determining _CPC is valid ACPI: CPPC: Implement support for SystemIO registers x86/msr: Add AMD CPPC MSR definitions x86/cpufeatures: Add AMD Collaborative Processor Performance Control feature flag cpufreq: use default_groups in kobj_type ...
This commit is contained in:
commit
b35b6d4d71
@ -4,6 +4,8 @@
|
||||
Collaborative Processor Performance Control (CPPC)
|
||||
==================================================
|
||||
|
||||
.. _cppc_sysfs:
|
||||
|
||||
CPPC
|
||||
====
|
||||
|
||||
|
@ -225,14 +225,23 @@
|
||||
For broken nForce2 BIOS resulting in XT-PIC timer.
|
||||
|
||||
acpi_sleep= [HW,ACPI] Sleep options
|
||||
Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
|
||||
old_ordering, nonvs, sci_force_enable, nobl }
|
||||
Format: { s3_bios, s3_mode, s3_beep, s4_hwsig,
|
||||
s4_nohwsig, old_ordering, nonvs,
|
||||
sci_force_enable, nobl }
|
||||
See Documentation/power/video.rst for information on
|
||||
s3_bios and s3_mode.
|
||||
s3_beep is for debugging; it makes the PC's speaker beep
|
||||
as soon as the kernel's real-mode entry point is called.
|
||||
s4_hwsig causes the kernel to check the ACPI hardware
|
||||
signature during resume from hibernation, and gracefully
|
||||
refuse to resume if it has changed. This complies with
|
||||
the ACPI specification but not with reality, since
|
||||
Windows does not do this and many laptops do change it
|
||||
on docking. So the default behaviour is to allow resume
|
||||
and simply warn when the signature changes, unless the
|
||||
s4_hwsig option is enabled.
|
||||
s4_nohwsig prevents ACPI hardware signature from being
|
||||
used during resume from hibernation.
|
||||
used (or even warned about) during resume.
|
||||
old_ordering causes the ACPI 1.0 ordering of the _PTS
|
||||
control method, with respect to putting devices into
|
||||
low power states, to be enforced (the ACPI 2.0 ordering
|
||||
|
382
Documentation/admin-guide/pm/amd-pstate.rst
Normal file
382
Documentation/admin-guide/pm/amd-pstate.rst
Normal file
@ -0,0 +1,382 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
===============================================
|
||||
``amd-pstate`` CPU Performance Scaling Driver
|
||||
===============================================
|
||||
|
||||
:Copyright: |copy| 2021 Advanced Micro Devices, Inc.
|
||||
|
||||
:Author: Huang Rui <ray.huang@amd.com>
|
||||
|
||||
|
||||
Introduction
|
||||
===================
|
||||
|
||||
``amd-pstate`` is the AMD CPU performance scaling driver that introduces a
|
||||
new CPU frequency control mechanism on modern AMD APU and CPU series in
|
||||
Linux kernel. The new mechanism is based on Collaborative Processor
|
||||
Performance Control (CPPC) which provides finer grain frequency management
|
||||
than legacy ACPI hardware P-States. Current AMD CPU/APU platforms are using
|
||||
the ACPI P-states driver to manage CPU frequency and clocks with switching
|
||||
only in 3 P-states. CPPC replaces the ACPI P-states controls, allows a
|
||||
flexible, low-latency interface for the Linux kernel to directly
|
||||
communicate the performance hints to hardware.
|
||||
|
||||
``amd-pstate`` leverages the Linux kernel governors such as ``schedutil``,
|
||||
``ondemand``, etc. to manage the performance hints which are provided by
|
||||
CPPC hardware functionality that internally follows the hardware
|
||||
specification (for details refer to AMD64 Architecture Programmer's Manual
|
||||
Volume 2: System Programming [1]_). Currently ``amd-pstate`` supports basic
|
||||
frequency control function according to kernel governors on some of the
|
||||
Zen2 and Zen3 processors, and we will implement more AMD specific functions
|
||||
in future after we verify them on the hardware and SBIOS.
|
||||
|
||||
|
||||
AMD CPPC Overview
|
||||
=======================
|
||||
|
||||
Collaborative Processor Performance Control (CPPC) interface enumerates a
|
||||
continuous, abstract, and unit-less performance value in a scale that is
|
||||
not tied to a specific performance state / frequency. This is an ACPI
|
||||
standard [2]_ which software can specify application performance goals and
|
||||
hints as a relative target to the infrastructure limits. AMD processors
|
||||
provides the low latency register model (MSR) instead of AML code
|
||||
interpreter for performance adjustments. ``amd-pstate`` will initialize a
|
||||
``struct cpufreq_driver`` instance ``amd_pstate_driver`` with the callbacks
|
||||
to manage each performance update behavior. ::
|
||||
|
||||
Highest Perf ------>+-----------------------+ +-----------------------+
|
||||
| | | |
|
||||
| | | |
|
||||
| | Max Perf ---->| |
|
||||
| | | |
|
||||
| | | |
|
||||
Nominal Perf ------>+-----------------------+ +-----------------------+
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | Desired Perf ---->| |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
Lowest non- | | | |
|
||||
linear perf ------>+-----------------------+ +-----------------------+
|
||||
| | | |
|
||||
| | Lowest perf ---->| |
|
||||
| | | |
|
||||
Lowest perf ------>+-----------------------+ +-----------------------+
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
0 ------>+-----------------------+ +-----------------------+
|
||||
|
||||
AMD P-States Performance Scale
|
||||
|
||||
|
||||
.. _perf_cap:
|
||||
|
||||
AMD CPPC Performance Capability
|
||||
--------------------------------
|
||||
|
||||
Highest Performance (RO)
|
||||
.........................
|
||||
|
||||
It is the absolute maximum performance an individual processor may reach,
|
||||
assuming ideal conditions. This performance level may not be sustainable
|
||||
for long durations and may only be achievable if other platform components
|
||||
are in a specific state; for example, it may require other processors be in
|
||||
an idle state. This would be equivalent to the highest frequencies
|
||||
supported by the processor.
|
||||
|
||||
Nominal (Guaranteed) Performance (RO)
|
||||
......................................
|
||||
|
||||
It is the maximum sustained performance level of the processor, assuming
|
||||
ideal operating conditions. In absence of an external constraint (power,
|
||||
thermal, etc.) this is the performance level the processor is expected to
|
||||
be able to maintain continuously. All cores/processors are expected to be
|
||||
able to sustain their nominal performance state simultaneously.
|
||||
|
||||
Lowest non-linear Performance (RO)
|
||||
...................................
|
||||
|
||||
It is the lowest performance level at which nonlinear power savings are
|
||||
achieved, for example, due to the combined effects of voltage and frequency
|
||||
scaling. Above this threshold, lower performance levels should be generally
|
||||
more energy efficient than higher performance levels. This register
|
||||
effectively conveys the most efficient performance level to ``amd-pstate``.
|
||||
|
||||
Lowest Performance (RO)
|
||||
........................
|
||||
|
||||
It is the absolute lowest performance level of the processor. Selecting a
|
||||
performance level lower than the lowest nonlinear performance level may
|
||||
cause an efficiency penalty but should reduce the instantaneous power
|
||||
consumption of the processor.
|
||||
|
||||
AMD CPPC Performance Control
|
||||
------------------------------
|
||||
|
||||
``amd-pstate`` passes performance goals through these registers. The
|
||||
register drives the behavior of the desired performance target.
|
||||
|
||||
Minimum requested performance (RW)
|
||||
...................................
|
||||
|
||||
``amd-pstate`` specifies the minimum allowed performance level.
|
||||
|
||||
Maximum requested performance (RW)
|
||||
...................................
|
||||
|
||||
``amd-pstate`` specifies a limit the maximum performance that is expected
|
||||
to be supplied by the hardware.
|
||||
|
||||
Desired performance target (RW)
|
||||
...................................
|
||||
|
||||
``amd-pstate`` specifies a desired target in the CPPC performance scale as
|
||||
a relative number. This can be expressed as percentage of nominal
|
||||
performance (infrastructure max). Below the nominal sustained performance
|
||||
level, desired performance expresses the average performance level of the
|
||||
processor subject to hardware. Above the nominal performance level,
|
||||
processor must provide at least nominal performance requested and go higher
|
||||
if current operating conditions allow.
|
||||
|
||||
Energy Performance Preference (EPP) (RW)
|
||||
.........................................
|
||||
|
||||
Provides a hint to the hardware if software wants to bias toward performance
|
||||
(0x0) or energy efficiency (0xff).
|
||||
|
||||
|
||||
Key Governors Support
|
||||
=======================
|
||||
|
||||
``amd-pstate`` can be used with all the (generic) scaling governors listed
|
||||
by the ``scaling_available_governors`` policy attribute in ``sysfs``. Then,
|
||||
it is responsible for the configuration of policy objects corresponding to
|
||||
CPUs and provides the ``CPUFreq`` core (and the scaling governors attached
|
||||
to the policy objects) with accurate information on the maximum and minimum
|
||||
operating frequencies supported by the hardware. Users can check the
|
||||
``scaling_cur_freq`` information comes from the ``CPUFreq`` core.
|
||||
|
||||
``amd-pstate`` mainly supports ``schedutil`` and ``ondemand`` for dynamic
|
||||
frequency control. It is to fine tune the processor configuration on
|
||||
``amd-pstate`` to the ``schedutil`` with CPU CFS scheduler. ``amd-pstate``
|
||||
registers adjust_perf callback to implement the CPPC similar performance
|
||||
update behavior. It is initialized by ``sugov_start`` and then populate the
|
||||
CPU's update_util_data pointer to assign ``sugov_update_single_perf`` as
|
||||
the utilization update callback function in CPU scheduler. CPU scheduler
|
||||
will call ``cpufreq_update_util`` and assign the target performance
|
||||
according to the ``struct sugov_cpu`` that utilization update belongs to.
|
||||
Then ``amd-pstate`` updates the desired performance according to the CPU
|
||||
scheduler assigned.
|
||||
|
||||
|
||||
Processor Support
|
||||
=======================
|
||||
|
||||
The ``amd-pstate`` initialization will fail if the _CPC in ACPI SBIOS is
|
||||
not existed at the detected processor, and it uses ``acpi_cpc_valid`` to
|
||||
check the _CPC existence. All Zen based processors support legacy ACPI
|
||||
hardware P-States function, so while the ``amd-pstate`` fails to be
|
||||
initialized, the kernel will fall back to initialize ``acpi-cpufreq``
|
||||
driver.
|
||||
|
||||
There are two types of hardware implementations for ``amd-pstate``: one is
|
||||
`Full MSR Support <perf_cap_>`_ and another is `Shared Memory Support
|
||||
<perf_cap_>`_. It can use :c:macro:`X86_FEATURE_CPPC` feature flag (for
|
||||
details refer to Processor Programming Reference (PPR) for AMD Family
|
||||
19h Model 51h, Revision A1 Processors [3]_) to indicate the different
|
||||
types. ``amd-pstate`` is to register different ``static_call`` instances
|
||||
for different hardware implementations.
|
||||
|
||||
Currently, some of Zen2 and Zen3 processors support ``amd-pstate``. In the
|
||||
future, it will be supported on more and more AMD processors.
|
||||
|
||||
Full MSR Support
|
||||
-----------------
|
||||
|
||||
Some new Zen3 processors such as Cezanne provide the MSR registers directly
|
||||
while the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is set.
|
||||
``amd-pstate`` can handle the MSR register to implement the fast switch
|
||||
function in ``CPUFreq`` that can shrink latency of frequency control on the
|
||||
interrupt context. The functions with ``pstate_xxx`` prefix represent the
|
||||
operations of MSR registers.
|
||||
|
||||
Shared Memory Support
|
||||
----------------------
|
||||
|
||||
If :c:macro:`X86_FEATURE_CPPC` CPU feature flag is not set, that means the
|
||||
processor supports shared memory solution. In this case, ``amd-pstate``
|
||||
uses the ``cppc_acpi`` helper methods to implement the callback functions
|
||||
that defined on ``static_call``. The functions with ``cppc_xxx`` prefix
|
||||
represent the operations of acpi cppc helpers for shared memory solution.
|
||||
|
||||
|
||||
AMD P-States and ACPI hardware P-States always can be supported in one
|
||||
processor. But AMD P-States has the higher priority and if it is enabled
|
||||
with :c:macro:`MSR_AMD_CPPC_ENABLE` or ``cppc_set_enable``, it will respond
|
||||
to the request from AMD P-States.
|
||||
|
||||
|
||||
User Space Interface in ``sysfs``
|
||||
==================================
|
||||
|
||||
``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
|
||||
control its functionality at the system level. They located in the
|
||||
``/sys/devices/system/cpu/cpufreq/policyX/`` directory and affect all CPUs. ::
|
||||
|
||||
root@hr-test1:/home/ray# ls /sys/devices/system/cpu/cpufreq/policy0/*amd*
|
||||
/sys/devices/system/cpu/cpufreq/policy0/amd_pstate_highest_perf
|
||||
/sys/devices/system/cpu/cpufreq/policy0/amd_pstate_lowest_nonlinear_freq
|
||||
/sys/devices/system/cpu/cpufreq/policy0/amd_pstate_max_freq
|
||||
|
||||
|
||||
``amd_pstate_highest_perf / amd_pstate_max_freq``
|
||||
|
||||
Maximum CPPC performance and CPU frequency that the driver is allowed to
|
||||
set in percent of the maximum supported CPPC performance level (the highest
|
||||
performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
|
||||
In some of ASICs, the highest CPPC performance is not the one in the _CPC
|
||||
table, so we need to expose it to sysfs. If boost is not active but
|
||||
supported, this maximum frequency will be larger than the one in
|
||||
``cpuinfo``.
|
||||
This attribute is read-only.
|
||||
|
||||
``amd_pstate_lowest_nonlinear_freq``
|
||||
|
||||
The lowest non-linear CPPC CPU frequency that the driver is allowed to set
|
||||
in percent of the maximum supported CPPC performance level (Please see the
|
||||
lowest non-linear performance in `AMD CPPC Performance Capability
|
||||
<perf_cap_>`_).
|
||||
This attribute is read-only.
|
||||
|
||||
For other performance and frequency values, we can read them back from
|
||||
``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
|
||||
|
||||
|
||||
``amd-pstate`` vs ``acpi-cpufreq``
|
||||
======================================
|
||||
|
||||
On majority of AMD platforms supported by ``acpi-cpufreq``, the ACPI tables
|
||||
provided by the platform firmware used for CPU performance scaling, but
|
||||
only provides 3 P-states on AMD processors.
|
||||
However, on modern AMD APU and CPU series, it provides the collaborative
|
||||
processor performance control according to ACPI protocol and customize this
|
||||
for AMD platforms. That is fine-grain and continuous frequency range
|
||||
instead of the legacy hardware P-states. ``amd-pstate`` is the kernel
|
||||
module which supports the new AMD P-States mechanism on most of future AMD
|
||||
platforms. The AMD P-States mechanism will be the more performance and energy
|
||||
efficiency frequency management method on AMD processors.
|
||||
|
||||
Kernel Module Options for ``amd-pstate``
|
||||
=========================================
|
||||
|
||||
``shared_mem``
|
||||
Use a module param (shared_mem) to enable related processors manually with
|
||||
**amd_pstate.shared_mem=1**.
|
||||
Due to the performance issue on the processors with `Shared Memory Support
|
||||
<perf_cap_>`_, so we disable it for the moment and will enable this by default
|
||||
once we address performance issue on this solution.
|
||||
|
||||
The way to check whether current processor is `Full MSR Support <perf_cap_>`_
|
||||
or `Shared Memory Support <perf_cap_>`_ : ::
|
||||
|
||||
ray@hr-test1:~$ lscpu | grep cppc
|
||||
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
|
||||
|
||||
If CPU Flags have cppc, then this processor supports `Full MSR Support
|
||||
<perf_cap_>`_. Otherwise it supports `Shared Memory Support <perf_cap_>`_.
|
||||
|
||||
|
||||
``cpupower`` tool support for ``amd-pstate``
|
||||
===============================================
|
||||
|
||||
``amd-pstate`` is supported on ``cpupower`` tool that can be used to dump the frequency
|
||||
information. And it is in progress to support more and more operations for new
|
||||
``amd-pstate`` module with this tool. ::
|
||||
|
||||
root@hr-test1:/home/ray# cpupower frequency-info
|
||||
analyzing CPU 0:
|
||||
driver: amd-pstate
|
||||
CPUs which run at the same hardware frequency: 0
|
||||
CPUs which need to have their frequency coordinated by software: 0
|
||||
maximum transition latency: 131 us
|
||||
hardware limits: 400 MHz - 4.68 GHz
|
||||
available cpufreq governors: ondemand conservative powersave userspace performance schedutil
|
||||
current policy: frequency should be within 400 MHz and 4.68 GHz.
|
||||
The governor "schedutil" may decide which speed to use
|
||||
within this range.
|
||||
current CPU frequency: Unable to call hardware
|
||||
current CPU frequency: 4.02 GHz (asserted by call to kernel)
|
||||
boost state support:
|
||||
Supported: yes
|
||||
Active: yes
|
||||
AMD PSTATE Highest Performance: 166. Maximum Frequency: 4.68 GHz.
|
||||
AMD PSTATE Nominal Performance: 117. Nominal Frequency: 3.30 GHz.
|
||||
AMD PSTATE Lowest Non-linear Performance: 39. Lowest Non-linear Frequency: 1.10 GHz.
|
||||
AMD PSTATE Lowest Performance: 15. Lowest Frequency: 400 MHz.
|
||||
|
||||
|
||||
Diagnostics and Tuning
|
||||
=======================
|
||||
|
||||
Trace Events
|
||||
--------------
|
||||
|
||||
There are two static trace events that can be used for ``amd-pstate``
|
||||
diagnostics. One of them is the cpu_frequency trace event generally used
|
||||
by ``CPUFreq``, and the other one is the ``amd_pstate_perf`` trace event
|
||||
specific to ``amd-pstate``. The following sequence of shell commands can
|
||||
be used to enable them and see their output (if the kernel is generally
|
||||
configured to support event tracing). ::
|
||||
|
||||
root@hr-test1:/home/ray# cd /sys/kernel/tracing/
|
||||
root@hr-test1:/sys/kernel/tracing# echo 1 > events/amd_cpu/enable
|
||||
root@hr-test1:/sys/kernel/tracing# cat trace
|
||||
# tracer: nop
|
||||
#
|
||||
# entries-in-buffer/entries-written: 47827/42233061 #P:2
|
||||
#
|
||||
# _-----=> irqs-off
|
||||
# / _----=> need-resched
|
||||
# | / _---=> hardirq/softirq
|
||||
# || / _--=> preempt-depth
|
||||
# ||| / delay
|
||||
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
|
||||
# | | | |||| | |
|
||||
<idle>-0 [015] dN... 4995.979886: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=15 changed=false fast_switch=true
|
||||
<idle>-0 [007] d.h.. 4995.979893: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=7 changed=false fast_switch=true
|
||||
cat-2161 [000] d.... 4995.980841: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=0 changed=false fast_switch=true
|
||||
sshd-2125 [004] d.s.. 4995.980968: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=4 changed=false fast_switch=true
|
||||
<idle>-0 [007] d.s.. 4995.980968: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=7 changed=false fast_switch=true
|
||||
<idle>-0 [003] d.s.. 4995.980971: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=3 changed=false fast_switch=true
|
||||
<idle>-0 [011] d.s.. 4995.980996: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=11 changed=false fast_switch=true
|
||||
|
||||
The cpu_frequency trace event will be triggered either by the ``schedutil`` scaling
|
||||
governor (for the policies it is attached to), or by the ``CPUFreq`` core (for the
|
||||
policies with other scaling governors).
|
||||
|
||||
|
||||
Reference
|
||||
===========
|
||||
|
||||
.. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming,
|
||||
https://www.amd.com/system/files/TechDocs/24593.pdf
|
||||
|
||||
.. [2] Advanced Configuration and Power Interface Specification,
|
||||
https://uefi.org/sites/default/files/resources/ACPI_Spec_6_4_Jan22.pdf
|
||||
|
||||
.. [3] Processor Programming Reference (PPR) for AMD Family 19h Model 51h, Revision A1 Processors
|
||||
https://www.amd.com/system/files/TechDocs/56569-A1-PUB.zip
|
@ -11,6 +11,7 @@ Working-State Power Management
|
||||
intel_idle
|
||||
cpufreq
|
||||
intel_pstate
|
||||
amd-pstate
|
||||
cpufreq_drivers
|
||||
intel_epb
|
||||
intel-speed-select
|
||||
|
@ -48,9 +48,9 @@ We can represent these as three OPPs as the following {Hz, uV} tuples:
|
||||
OPP library provides a set of helper functions to organize and query the OPP
|
||||
information. The library is located in drivers/opp/ directory and the header
|
||||
is located in include/linux/pm_opp.h. OPP library can be enabled by enabling
|
||||
CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on
|
||||
CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to
|
||||
optionally boot at a certain OPP without needing cpufreq.
|
||||
CONFIG_PM_OPP from power management menuconfig menu. Certain SoCs such as Texas
|
||||
Instrument's OMAP framework allows to optionally boot at a certain OPP without
|
||||
needing cpufreq.
|
||||
|
||||
Typical usage of the OPP library is as follows::
|
||||
|
||||
@ -75,8 +75,8 @@ operations until that OPP could be re-enabled if possible.
|
||||
|
||||
OPP library facilitates this concept in its implementation. The following
|
||||
operational functions operate only on available opps:
|
||||
opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq,
|
||||
dev_pm_opp_get_opp_count
|
||||
dev_pm_opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq,
|
||||
dev_pm_opp_get_opp_count.
|
||||
|
||||
dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer
|
||||
which can then be used for dev_pm_opp_enable/disable functions to make an
|
||||
@ -103,7 +103,7 @@ dev_pm_opp_add
|
||||
The OPP is defined using the frequency and voltage. Once added, the OPP
|
||||
is assumed to be available and control of its availability can be done
|
||||
with the dev_pm_opp_enable/disable functions. OPP library
|
||||
internally stores and manages this information in the opp struct.
|
||||
internally stores and manages this information in the dev_pm_opp struct.
|
||||
This function may be used by SoC framework to define a optimal list
|
||||
as per the demands of SoC usage environment.
|
||||
|
||||
@ -247,7 +247,7 @@ dev_pm_opp_disable
|
||||
5. OPP Data Retrieval Functions
|
||||
===============================
|
||||
Since OPP library abstracts away the OPP information, a set of functions to pull
|
||||
information from the OPP structure is necessary. Once an OPP pointer is
|
||||
information from the dev_pm_opp structure is necessary. Once an OPP pointer is
|
||||
retrieved using the search functions, the following functions can be used by SoC
|
||||
framework to retrieve the information represented inside the OPP layer.
|
||||
|
||||
|
@ -265,6 +265,10 @@ defined in include/linux/pm.h:
|
||||
RPM_SUSPENDED, which means that each device is initially regarded by the
|
||||
PM core as 'suspended', regardless of its real hardware status
|
||||
|
||||
`enum rpm_status last_status;`
|
||||
- the last runtime PM status of the device captured before disabling runtime
|
||||
PM for it (invalid initially and when disable_depth is 0)
|
||||
|
||||
`unsigned int runtime_auto;`
|
||||
- if set, indicates that the user space has allowed the device driver to
|
||||
power manage the device at run time via the /sys/devices/.../power/control
|
||||
@ -333,10 +337,12 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
||||
|
||||
`int pm_runtime_resume(struct device *dev);`
|
||||
- execute the subsystem-level resume callback for the device; returns 0 on
|
||||
success, 1 if the device's runtime PM status was already 'active' or
|
||||
error code on failure, where -EAGAIN means it may be safe to attempt to
|
||||
resume the device again in future, but 'power.runtime_error' should be
|
||||
checked additionally, and -EACCES means that 'power.disable_depth' is
|
||||
success, 1 if the device's runtime PM status is already 'active' (also if
|
||||
'power.disable_depth' is nonzero, but the status was 'active' when it was
|
||||
changing from 0 to 1) or error code on failure, where -EAGAIN means it may
|
||||
be safe to attempt to resume the device again in future, but
|
||||
'power.runtime_error' should be checked additionally, and -EACCES means
|
||||
that the callback could not be run, because 'power.disable_depth' was
|
||||
different from 0
|
||||
|
||||
`int pm_runtime_resume_and_get(struct device *dev);`
|
||||
|
@ -994,6 +994,13 @@ S: Supported
|
||||
T: git https://gitlab.freedesktop.org/agd5f/linux.git
|
||||
F: drivers/gpu/drm/amd/pm/
|
||||
|
||||
AMD PSTATE DRIVER
|
||||
M: Huang Rui <ray.huang@amd.com>
|
||||
L: linux-pm@vger.kernel.org
|
||||
S: Supported
|
||||
F: Documentation/admin-guide/pm/amd-pstate.rst
|
||||
F: drivers/cpufreq/amd-pstate*
|
||||
|
||||
AMD PTDMA DRIVER
|
||||
M: Sanjay R Mehta <sanju.mehta@amd.com>
|
||||
L: dmaengine@vger.kernel.org
|
||||
|
@ -23,7 +23,7 @@
|
||||
|
||||
/* Replace task scheduler's default thermal pressure API */
|
||||
#define arch_scale_thermal_pressure topology_get_thermal_pressure
|
||||
#define arch_set_thermal_pressure topology_set_thermal_pressure
|
||||
#define arch_update_thermal_pressure topology_update_thermal_pressure
|
||||
|
||||
#else
|
||||
|
||||
|
@ -32,7 +32,7 @@ void update_freq_counters_refs(void);
|
||||
|
||||
/* Replace task scheduler's default thermal pressure API */
|
||||
#define arch_scale_thermal_pressure topology_get_thermal_pressure
|
||||
#define arch_set_thermal_pressure topology_set_thermal_pressure
|
||||
#define arch_update_thermal_pressure topology_update_thermal_pressure
|
||||
|
||||
#include <asm-generic/topology.h>
|
||||
|
||||
|
@ -315,6 +315,7 @@
|
||||
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
|
||||
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
|
||||
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
|
||||
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
|
||||
|
||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
|
||||
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
||||
|
@ -486,6 +486,23 @@
|
||||
|
||||
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
|
||||
|
||||
/* AMD Collaborative Processor Performance Control MSRs */
|
||||
#define MSR_AMD_CPPC_CAP1 0xc00102b0
|
||||
#define MSR_AMD_CPPC_ENABLE 0xc00102b1
|
||||
#define MSR_AMD_CPPC_CAP2 0xc00102b2
|
||||
#define MSR_AMD_CPPC_REQ 0xc00102b3
|
||||
#define MSR_AMD_CPPC_STATUS 0xc00102b4
|
||||
|
||||
#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
|
||||
#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
|
||||
#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
|
||||
#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
|
||||
|
||||
#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
|
||||
#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
|
||||
#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
|
||||
#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
|
||||
|
||||
/* Fam 17h MSRs */
|
||||
#define MSR_F17H_IRPERF 0xc00000e9
|
||||
|
||||
|
@ -221,7 +221,7 @@ static inline void arch_set_max_freq_ratio(bool turbo_disabled)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ACPI_CPPC_LIB
|
||||
#if defined(CONFIG_ACPI_CPPC_LIB) && defined(CONFIG_SMP)
|
||||
void init_freq_invariance_cppc(void);
|
||||
#define init_freq_invariance_cppc init_freq_invariance_cppc
|
||||
#endif
|
||||
|
@ -139,8 +139,10 @@ static int __init acpi_sleep_setup(char *str)
|
||||
if (strncmp(str, "s3_beep", 7) == 0)
|
||||
acpi_realmode_flags |= 4;
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
if (strncmp(str, "s4_hwsig", 8) == 0)
|
||||
acpi_check_s4_hw_signature(1);
|
||||
if (strncmp(str, "s4_nohwsig", 10) == 0)
|
||||
acpi_no_s4_hw_signature();
|
||||
acpi_check_s4_hw_signature(0);
|
||||
#endif
|
||||
if (strncmp(str, "nonvs", 5) == 0)
|
||||
acpi_nvs_nosave();
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/pm.h>
|
||||
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
@ -58,6 +59,22 @@ static DEFINE_PER_CPU(u8, saved_epb);
|
||||
#define EPB_SAVED 0x10ULL
|
||||
#define MAX_EPB EPB_MASK
|
||||
|
||||
enum energy_perf_value_index {
|
||||
EPB_INDEX_PERFORMANCE,
|
||||
EPB_INDEX_BALANCE_PERFORMANCE,
|
||||
EPB_INDEX_NORMAL,
|
||||
EPB_INDEX_BALANCE_POWERSAVE,
|
||||
EPB_INDEX_POWERSAVE,
|
||||
};
|
||||
|
||||
static u8 energ_perf_values[] = {
|
||||
[EPB_INDEX_PERFORMANCE] = ENERGY_PERF_BIAS_PERFORMANCE,
|
||||
[EPB_INDEX_BALANCE_PERFORMANCE] = ENERGY_PERF_BIAS_BALANCE_PERFORMANCE,
|
||||
[EPB_INDEX_NORMAL] = ENERGY_PERF_BIAS_NORMAL,
|
||||
[EPB_INDEX_BALANCE_POWERSAVE] = ENERGY_PERF_BIAS_BALANCE_POWERSAVE,
|
||||
[EPB_INDEX_POWERSAVE] = ENERGY_PERF_BIAS_POWERSAVE,
|
||||
};
|
||||
|
||||
static int intel_epb_save(void)
|
||||
{
|
||||
u64 epb;
|
||||
@ -90,7 +107,7 @@ static void intel_epb_restore(void)
|
||||
*/
|
||||
val = epb & EPB_MASK;
|
||||
if (val == ENERGY_PERF_BIAS_PERFORMANCE) {
|
||||
val = ENERGY_PERF_BIAS_NORMAL;
|
||||
val = energ_perf_values[EPB_INDEX_NORMAL];
|
||||
pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
|
||||
}
|
||||
}
|
||||
@ -103,18 +120,11 @@ static struct syscore_ops intel_epb_syscore_ops = {
|
||||
};
|
||||
|
||||
static const char * const energy_perf_strings[] = {
|
||||
"performance",
|
||||
"balance-performance",
|
||||
"normal",
|
||||
"balance-power",
|
||||
"power"
|
||||
};
|
||||
static const u8 energ_perf_values[] = {
|
||||
ENERGY_PERF_BIAS_PERFORMANCE,
|
||||
ENERGY_PERF_BIAS_BALANCE_PERFORMANCE,
|
||||
ENERGY_PERF_BIAS_NORMAL,
|
||||
ENERGY_PERF_BIAS_BALANCE_POWERSAVE,
|
||||
ENERGY_PERF_BIAS_POWERSAVE
|
||||
[EPB_INDEX_PERFORMANCE] = "performance",
|
||||
[EPB_INDEX_BALANCE_PERFORMANCE] = "balance-performance",
|
||||
[EPB_INDEX_NORMAL] = "normal",
|
||||
[EPB_INDEX_BALANCE_POWERSAVE] = "balance-power",
|
||||
[EPB_INDEX_POWERSAVE] = "power",
|
||||
};
|
||||
|
||||
static ssize_t energy_perf_bias_show(struct device *dev,
|
||||
@ -193,13 +203,22 @@ static int intel_epb_offline(unsigned int cpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct x86_cpu_id intel_epb_normal[] = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 7),
|
||||
{}
|
||||
};
|
||||
|
||||
static __init int intel_epb_init(void)
|
||||
{
|
||||
const struct x86_cpu_id *id = x86_match_cpu(intel_epb_normal);
|
||||
int ret;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_EPB))
|
||||
return -ENODEV;
|
||||
|
||||
if (id)
|
||||
energ_perf_values[EPB_INDEX_NORMAL] = id->driver_data;
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE,
|
||||
"x86/intel/epb:online", intel_epb_online,
|
||||
intel_epb_offline);
|
||||
|
@ -118,6 +118,8 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
|
||||
*/
|
||||
#define NUM_RETRIES 500ULL
|
||||
|
||||
#define OVER_16BTS_MASK ~0xFFFFULL
|
||||
|
||||
#define define_one_cppc_ro(_name) \
|
||||
static struct kobj_attribute _name = \
|
||||
__ATTR(_name, 0444, show_##_name, NULL)
|
||||
@ -412,7 +414,7 @@ bool acpi_cpc_valid(void)
|
||||
struct cpc_desc *cpc_ptr;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
for_each_present_cpu(cpu) {
|
||||
cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
|
||||
if (!cpc_ptr)
|
||||
return false;
|
||||
@ -730,9 +732,26 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
|
||||
goto out_free;
|
||||
cpc_ptr->cpc_regs[i-2].sys_mem_vaddr = addr;
|
||||
}
|
||||
} else if (gas_t->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
|
||||
if (gas_t->access_width < 1 || gas_t->access_width > 3) {
|
||||
/*
|
||||
* 1 = 8-bit, 2 = 16-bit, and 3 = 32-bit.
|
||||
* SystemIO doesn't implement 64-bit
|
||||
* registers.
|
||||
*/
|
||||
pr_debug("Invalid access width %d for SystemIO register\n",
|
||||
gas_t->access_width);
|
||||
goto out_free;
|
||||
}
|
||||
if (gas_t->address & OVER_16BTS_MASK) {
|
||||
/* SystemIO registers use 16-bit integer addresses */
|
||||
pr_debug("Invalid IO port %llu for SystemIO register\n",
|
||||
gas_t->address);
|
||||
goto out_free;
|
||||
}
|
||||
} else {
|
||||
if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) {
|
||||
/* Support only PCC ,SYS MEM and FFH type regs */
|
||||
/* Support only PCC, SystemMemory, SystemIO, and FFH type regs. */
|
||||
pr_debug("Unsupported register type: %d\n", gas_t->space_id);
|
||||
goto out_free;
|
||||
}
|
||||
@ -907,7 +926,21 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
|
||||
}
|
||||
|
||||
*val = 0;
|
||||
if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
|
||||
|
||||
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
|
||||
u32 width = 8 << (reg->access_width - 1);
|
||||
acpi_status status;
|
||||
|
||||
status = acpi_os_read_port((acpi_io_address)reg->address,
|
||||
(u32 *)val, width);
|
||||
if (ACPI_FAILURE(status)) {
|
||||
pr_debug("Error: Failed to read SystemIO port %llx\n",
|
||||
reg->address);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
} else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
|
||||
vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id);
|
||||
else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
|
||||
vaddr = reg_res->sys_mem_vaddr;
|
||||
@ -946,7 +979,20 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
|
||||
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
|
||||
struct cpc_reg *reg = ®_res->cpc_entry.reg;
|
||||
|
||||
if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
|
||||
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
|
||||
u32 width = 8 << (reg->access_width - 1);
|
||||
acpi_status status;
|
||||
|
||||
status = acpi_os_write_port((acpi_io_address)reg->address,
|
||||
(u32)val, width);
|
||||
if (ACPI_FAILURE(status)) {
|
||||
pr_debug("Error: Failed to write SystemIO port %llx\n",
|
||||
reg->address);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
} else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
|
||||
vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id);
|
||||
else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
|
||||
vaddr = reg_res->sys_mem_vaddr;
|
||||
@ -1213,6 +1259,51 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs);
|
||||
|
||||
/**
|
||||
* cppc_set_enable - Set to enable CPPC on the processor by writing the
|
||||
* Continuous Performance Control package EnableRegister field.
|
||||
* @cpu: CPU for which to enable CPPC register.
|
||||
* @enable: 0 - disable, 1 - enable CPPC feature on the processor.
|
||||
*
|
||||
* Return: 0 for success, -ERRNO or -EIO otherwise.
|
||||
*/
|
||||
int cppc_set_enable(int cpu, bool enable)
|
||||
{
|
||||
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
|
||||
struct cpc_register_resource *enable_reg;
|
||||
struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
|
||||
struct cppc_pcc_data *pcc_ss_data = NULL;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!cpc_desc) {
|
||||
pr_debug("No CPC descriptor for CPU:%d\n", cpu);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
enable_reg = &cpc_desc->cpc_regs[ENABLE];
|
||||
|
||||
if (CPC_IN_PCC(enable_reg)) {
|
||||
|
||||
if (pcc_ss_id < 0)
|
||||
return -EIO;
|
||||
|
||||
ret = cpc_write(cpu, enable_reg, enable);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pcc_ss_data = pcc_data[pcc_ss_id];
|
||||
|
||||
down_write(&pcc_ss_data->pcc_lock);
|
||||
/* after writing CPC, transfer the ownership of PCC to platfrom */
|
||||
ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE);
|
||||
up_write(&pcc_ss_data->pcc_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return cpc_write(cpu, enable_reg, enable);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cppc_set_enable);
|
||||
|
||||
/**
|
||||
* cppc_set_perf - Set a CPU's performance controls.
|
||||
* @cpu: CPU for which to set performance controls.
|
||||
|
@ -874,11 +874,11 @@ static inline void acpi_sleep_syscore_init(void) {}
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
static unsigned long s4_hardware_signature;
|
||||
static struct acpi_table_facs *facs;
|
||||
static bool nosigcheck;
|
||||
static int sigcheck = -1; /* Default behaviour is just to warn */
|
||||
|
||||
void __init acpi_no_s4_hw_signature(void)
|
||||
void __init acpi_check_s4_hw_signature(int check)
|
||||
{
|
||||
nosigcheck = true;
|
||||
sigcheck = check;
|
||||
}
|
||||
|
||||
static int acpi_hibernation_begin(pm_message_t stage)
|
||||
@ -1004,12 +1004,28 @@ static void acpi_sleep_hibernate_setup(void)
|
||||
hibernation_set_ops(old_suspend_ordering ?
|
||||
&acpi_hibernation_ops_old : &acpi_hibernation_ops);
|
||||
sleep_states[ACPI_STATE_S4] = 1;
|
||||
if (nosigcheck)
|
||||
if (!sigcheck)
|
||||
return;
|
||||
|
||||
acpi_get_table(ACPI_SIG_FACS, 1, (struct acpi_table_header **)&facs);
|
||||
if (facs)
|
||||
if (facs) {
|
||||
/*
|
||||
* s4_hardware_signature is the local variable which is just
|
||||
* used to warn about mismatch after we're attempting to
|
||||
* resume (in violation of the ACPI specification.)
|
||||
*/
|
||||
s4_hardware_signature = facs->hardware_signature;
|
||||
|
||||
if (sigcheck > 0) {
|
||||
/*
|
||||
* If we're actually obeying the ACPI specification
|
||||
* then the signature is written out as part of the
|
||||
* swsusp header, in order to allow the boot kernel
|
||||
* to gracefully decline to resume.
|
||||
*/
|
||||
swsusp_hardware_signature = facs->hardware_signature;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else /* !CONFIG_HIBERNATION */
|
||||
static inline void acpi_sleep_hibernate_setup(void) {}
|
||||
|
@ -22,6 +22,7 @@
|
||||
static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
|
||||
static struct cpumask scale_freq_counters_mask;
|
||||
static bool scale_freq_invariant;
|
||||
static DEFINE_PER_CPU(u32, freq_factor) = 1;
|
||||
|
||||
static bool supports_scale_freq_counters(const struct cpumask *cpus)
|
||||
{
|
||||
@ -155,15 +156,49 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, thermal_pressure);
|
||||
|
||||
void topology_set_thermal_pressure(const struct cpumask *cpus,
|
||||
unsigned long th_pressure)
|
||||
/**
|
||||
* topology_update_thermal_pressure() - Update thermal pressure for CPUs
|
||||
* @cpus : The related CPUs for which capacity has been reduced
|
||||
* @capped_freq : The maximum allowed frequency that CPUs can run at
|
||||
*
|
||||
* Update the value of thermal pressure for all @cpus in the mask. The
|
||||
* cpumask should include all (online+offline) affected CPUs, to avoid
|
||||
* operating on stale data when hot-plug is used for some CPUs. The
|
||||
* @capped_freq reflects the currently allowed max CPUs frequency due to
|
||||
* thermal capping. It might be also a boost frequency value, which is bigger
|
||||
* than the internal 'freq_factor' max frequency. In such case the pressure
|
||||
* value should simply be removed, since this is an indication that there is
|
||||
* no thermal throttling. The @capped_freq must be provided in kHz.
|
||||
*/
|
||||
void topology_update_thermal_pressure(const struct cpumask *cpus,
|
||||
unsigned long capped_freq)
|
||||
{
|
||||
unsigned long max_capacity, capacity, th_pressure;
|
||||
u32 max_freq;
|
||||
int cpu;
|
||||
|
||||
cpu = cpumask_first(cpus);
|
||||
max_capacity = arch_scale_cpu_capacity(cpu);
|
||||
max_freq = per_cpu(freq_factor, cpu);
|
||||
|
||||
/* Convert to MHz scale which is used in 'freq_factor' */
|
||||
capped_freq /= 1000;
|
||||
|
||||
/*
|
||||
* Handle properly the boost frequencies, which should simply clean
|
||||
* the thermal pressure value.
|
||||
*/
|
||||
if (max_freq <= capped_freq)
|
||||
capacity = max_capacity;
|
||||
else
|
||||
capacity = mult_frac(max_capacity, capped_freq, max_freq);
|
||||
|
||||
th_pressure = max_capacity - capacity;
|
||||
|
||||
for_each_cpu(cpu, cpus)
|
||||
WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(topology_set_thermal_pressure);
|
||||
EXPORT_SYMBOL_GPL(topology_update_thermal_pressure);
|
||||
|
||||
static ssize_t cpu_capacity_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
@ -217,7 +252,6 @@ static void update_topology_flags_workfn(struct work_struct *work)
|
||||
update_topology = 0;
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(u32, freq_factor) = 1;
|
||||
static u32 *raw_capacity;
|
||||
|
||||
static int free_raw_capacity(void)
|
||||
|
@ -485,8 +485,7 @@ static void device_link_release_fn(struct work_struct *work)
|
||||
/* Ensure that all references to the link object have been dropped. */
|
||||
device_link_synchronize_removal();
|
||||
|
||||
while (refcount_dec_not_one(&link->rpm_active))
|
||||
pm_runtime_put(link->supplier);
|
||||
pm_runtime_release_supplier(link, true);
|
||||
|
||||
put_device(link->consumer);
|
||||
put_device(link->supplier);
|
||||
|
@ -305,19 +305,40 @@ static int rpm_get_suppliers(struct device *dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_runtime_release_supplier - Drop references to device link's supplier.
|
||||
* @link: Target device link.
|
||||
* @check_idle: Whether or not to check if the supplier device is idle.
|
||||
*
|
||||
* Drop all runtime PM references associated with @link to its supplier device
|
||||
* and if @check_idle is set, check if that device is idle (and so it can be
|
||||
* suspended).
|
||||
*/
|
||||
void pm_runtime_release_supplier(struct device_link *link, bool check_idle)
|
||||
{
|
||||
struct device *supplier = link->supplier;
|
||||
|
||||
/*
|
||||
* The additional power.usage_count check is a safety net in case
|
||||
* the rpm_active refcount becomes saturated, in which case
|
||||
* refcount_dec_not_one() would return true forever, but it is not
|
||||
* strictly necessary.
|
||||
*/
|
||||
while (refcount_dec_not_one(&link->rpm_active) &&
|
||||
atomic_read(&supplier->power.usage_count) > 0)
|
||||
pm_runtime_put_noidle(supplier);
|
||||
|
||||
if (check_idle)
|
||||
pm_request_idle(supplier);
|
||||
}
|
||||
|
||||
static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
|
||||
{
|
||||
struct device_link *link;
|
||||
|
||||
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
|
||||
device_links_read_lock_held()) {
|
||||
|
||||
while (refcount_dec_not_one(&link->rpm_active))
|
||||
pm_runtime_put_noidle(link->supplier);
|
||||
|
||||
if (try_to_suspend)
|
||||
pm_request_idle(link->supplier);
|
||||
}
|
||||
device_links_read_lock_held())
|
||||
pm_runtime_release_supplier(link, try_to_suspend);
|
||||
}
|
||||
|
||||
static void rpm_put_suppliers(struct device *dev)
|
||||
@ -742,13 +763,15 @@ static int rpm_resume(struct device *dev, int rpmflags)
|
||||
trace_rpm_resume_rcuidle(dev, rpmflags);
|
||||
|
||||
repeat:
|
||||
if (dev->power.runtime_error)
|
||||
if (dev->power.runtime_error) {
|
||||
retval = -EINVAL;
|
||||
else if (dev->power.disable_depth == 1 && dev->power.is_suspended
|
||||
&& dev->power.runtime_status == RPM_ACTIVE)
|
||||
retval = 1;
|
||||
else if (dev->power.disable_depth > 0)
|
||||
retval = -EACCES;
|
||||
} else if (dev->power.disable_depth > 0) {
|
||||
if (dev->power.runtime_status == RPM_ACTIVE &&
|
||||
dev->power.last_status == RPM_ACTIVE)
|
||||
retval = 1;
|
||||
else
|
||||
retval = -EACCES;
|
||||
}
|
||||
if (retval)
|
||||
goto out;
|
||||
|
||||
@ -1410,8 +1433,10 @@ void __pm_runtime_disable(struct device *dev, bool check_resume)
|
||||
/* Update time accounting before disabling PM-runtime. */
|
||||
update_pm_runtime_accounting(dev);
|
||||
|
||||
if (!dev->power.disable_depth++)
|
||||
if (!dev->power.disable_depth++) {
|
||||
__pm_runtime_barrier(dev);
|
||||
dev->power.last_status = dev->power.runtime_status;
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock_irq(&dev->power.lock);
|
||||
@ -1428,23 +1453,23 @@ void pm_runtime_enable(struct device *dev)
|
||||
|
||||
spin_lock_irqsave(&dev->power.lock, flags);
|
||||
|
||||
if (dev->power.disable_depth > 0) {
|
||||
dev->power.disable_depth--;
|
||||
|
||||
/* About to enable runtime pm, set accounting_timestamp to now */
|
||||
if (!dev->power.disable_depth)
|
||||
dev->power.accounting_timestamp = ktime_get_mono_fast_ns();
|
||||
} else {
|
||||
if (!dev->power.disable_depth) {
|
||||
dev_warn(dev, "Unbalanced %s!\n", __func__);
|
||||
goto out;
|
||||
}
|
||||
|
||||
WARN(!dev->power.disable_depth &&
|
||||
dev->power.runtime_status == RPM_SUSPENDED &&
|
||||
!dev->power.ignore_children &&
|
||||
atomic_read(&dev->power.child_count) > 0,
|
||||
"Enabling runtime PM for inactive device (%s) with active children\n",
|
||||
dev_name(dev));
|
||||
if (--dev->power.disable_depth > 0)
|
||||
goto out;
|
||||
|
||||
dev->power.last_status = RPM_INVALID;
|
||||
dev->power.accounting_timestamp = ktime_get_mono_fast_ns();
|
||||
|
||||
if (dev->power.runtime_status == RPM_SUSPENDED &&
|
||||
!dev->power.ignore_children &&
|
||||
atomic_read(&dev->power.child_count) > 0)
|
||||
dev_warn(dev, "Enabling runtime PM for inactive device with active children\n");
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&dev->power.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pm_runtime_enable);
|
||||
@ -1640,6 +1665,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend);
|
||||
void pm_runtime_init(struct device *dev)
|
||||
{
|
||||
dev->power.runtime_status = RPM_SUSPENDED;
|
||||
dev->power.last_status = RPM_INVALID;
|
||||
dev->power.idle_notification = false;
|
||||
|
||||
dev->power.disable_depth = 1;
|
||||
@ -1722,8 +1748,6 @@ void pm_runtime_get_suppliers(struct device *dev)
|
||||
void pm_runtime_put_suppliers(struct device *dev)
|
||||
{
|
||||
struct device_link *link;
|
||||
unsigned long flags;
|
||||
bool put;
|
||||
int idx;
|
||||
|
||||
idx = device_links_read_lock();
|
||||
@ -1731,11 +1755,17 @@ void pm_runtime_put_suppliers(struct device *dev)
|
||||
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
|
||||
device_links_read_lock_held())
|
||||
if (link->supplier_preactivated) {
|
||||
bool put;
|
||||
|
||||
link->supplier_preactivated = false;
|
||||
spin_lock_irqsave(&dev->power.lock, flags);
|
||||
|
||||
spin_lock_irq(&dev->power.lock);
|
||||
|
||||
put = pm_runtime_status_suspended(dev) &&
|
||||
refcount_dec_not_one(&link->rpm_active);
|
||||
spin_unlock_irqrestore(&dev->power.lock, flags);
|
||||
|
||||
spin_unlock_irq(&dev->power.lock);
|
||||
|
||||
if (put)
|
||||
pm_runtime_put(link->supplier);
|
||||
}
|
||||
@ -1772,9 +1802,7 @@ void pm_runtime_drop_link(struct device_link *link)
|
||||
return;
|
||||
|
||||
pm_runtime_drop_link_count(link->consumer);
|
||||
|
||||
while (refcount_dec_not_one(&link->rpm_active))
|
||||
pm_runtime_put(link->supplier);
|
||||
pm_runtime_release_supplier(link, true);
|
||||
}
|
||||
|
||||
static bool pm_runtime_need_not_resume(struct device *dev)
|
||||
|
@ -34,6 +34,23 @@ config X86_PCC_CPUFREQ
|
||||
|
||||
If in doubt, say N.
|
||||
|
||||
config X86_AMD_PSTATE
|
||||
tristate "AMD Processor P-State driver"
|
||||
depends on X86 && ACPI
|
||||
select ACPI_PROCESSOR
|
||||
select ACPI_CPPC_LIB if X86_64
|
||||
select CPU_FREQ_GOV_SCHEDUTIL if SMP
|
||||
help
|
||||
This driver adds a CPUFreq driver which utilizes a fine grain
|
||||
processor performance frequency control range instead of legacy
|
||||
performance levels. _CPC needs to be present in the ACPI tables
|
||||
of the system.
|
||||
|
||||
For details, take a look at:
|
||||
<file:Documentation/admin-guide/pm/amd-pstate.rst>.
|
||||
|
||||
If in doubt, say N.
|
||||
|
||||
config X86_ACPI_CPUFREQ
|
||||
tristate "ACPI Processor P-States driver"
|
||||
depends on ACPI_PROCESSOR
|
||||
|
@ -17,6 +17,10 @@ obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o
|
||||
obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o
|
||||
obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o
|
||||
|
||||
# Traces
|
||||
CFLAGS_amd-pstate-trace.o := -I$(src)
|
||||
amd_pstate-y := amd-pstate.o amd-pstate-trace.o
|
||||
|
||||
##################################################################################
|
||||
# x86 drivers.
|
||||
# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
|
||||
@ -25,6 +29,7 @@ obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o
|
||||
# speedstep-* is preferred over p4-clockmod.
|
||||
|
||||
obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
|
||||
obj-$(CONFIG_X86_AMD_PSTATE) += amd_pstate.o
|
||||
obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
|
||||
obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
|
||||
obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
|
||||
|
2
drivers/cpufreq/amd-pstate-trace.c
Normal file
2
drivers/cpufreq/amd-pstate-trace.c
Normal file
@ -0,0 +1,2 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "amd-pstate-trace.h"
|
77
drivers/cpufreq/amd-pstate-trace.h
Normal file
77
drivers/cpufreq/amd-pstate-trace.h
Normal file
@ -0,0 +1,77 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* amd-pstate-trace.h - AMD Processor P-state Frequency Driver Tracer
|
||||
*
|
||||
* Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Author: Huang Rui <ray.huang@amd.com>
|
||||
*/
|
||||
|
||||
#if !defined(_AMD_PSTATE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _AMD_PSTATE_TRACE_H
|
||||
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/tracepoint.h>
|
||||
#include <linux/trace_events.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM amd_cpu
|
||||
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_FILE amd-pstate-trace
|
||||
|
||||
#define TPS(x) tracepoint_string(x)
|
||||
|
||||
TRACE_EVENT(amd_pstate_perf,
|
||||
|
||||
TP_PROTO(unsigned long min_perf,
|
||||
unsigned long target_perf,
|
||||
unsigned long capacity,
|
||||
unsigned int cpu_id,
|
||||
bool changed,
|
||||
bool fast_switch
|
||||
),
|
||||
|
||||
TP_ARGS(min_perf,
|
||||
target_perf,
|
||||
capacity,
|
||||
cpu_id,
|
||||
changed,
|
||||
fast_switch
|
||||
),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, min_perf)
|
||||
__field(unsigned long, target_perf)
|
||||
__field(unsigned long, capacity)
|
||||
__field(unsigned int, cpu_id)
|
||||
__field(bool, changed)
|
||||
__field(bool, fast_switch)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->min_perf = min_perf;
|
||||
__entry->target_perf = target_perf;
|
||||
__entry->capacity = capacity;
|
||||
__entry->cpu_id = cpu_id;
|
||||
__entry->changed = changed;
|
||||
__entry->fast_switch = fast_switch;
|
||||
),
|
||||
|
||||
TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s",
|
||||
(unsigned long)__entry->min_perf,
|
||||
(unsigned long)__entry->target_perf,
|
||||
(unsigned long)__entry->capacity,
|
||||
(unsigned int)__entry->cpu_id,
|
||||
(__entry->changed) ? "true" : "false",
|
||||
(__entry->fast_switch) ? "true" : "false"
|
||||
)
|
||||
);
|
||||
|
||||
#endif /* _AMD_PSTATE_TRACE_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
|
||||
#include <trace/define_trace.h>
|
645
drivers/cpufreq/amd-pstate.c
Normal file
645
drivers/cpufreq/amd-pstate.c
Normal file
@ -0,0 +1,645 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* amd-pstate.c - AMD Processor P-state Frequency Driver
|
||||
*
|
||||
* Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Author: Huang Rui <ray.huang@amd.com>
|
||||
*
|
||||
* AMD P-State introduces a new CPU performance scaling design for AMD
|
||||
* processors using the ACPI Collaborative Performance and Power Control (CPPC)
|
||||
* feature which works with the AMD SMU firmware providing a finer grained
|
||||
* frequency control range. It is to replace the legacy ACPI P-States control,
|
||||
* allows a flexible, low-latency interface for the Linux kernel to directly
|
||||
* communicate the performance hints to hardware.
|
||||
*
|
||||
* AMD P-State is supported on recent AMD Zen base CPU series include some of
|
||||
* Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD
|
||||
* P-State supported system. And there are two types of hardware implementations
|
||||
* for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution.
|
||||
* X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/static_call.h>
|
||||
|
||||
#include <acpi/processor.h>
|
||||
#include <acpi/cppc_acpi.h>
|
||||
|
||||
#include <asm/msr.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include "amd-pstate-trace.h"
|
||||
|
||||
#define AMD_PSTATE_TRANSITION_LATENCY 0x20000
|
||||
#define AMD_PSTATE_TRANSITION_DELAY 500
|
||||
|
||||
/*
|
||||
* TODO: We need more time to fine tune processors with shared memory solution
|
||||
* with community together.
|
||||
*
|
||||
* There are some performance drops on the CPU benchmarks which reports from
|
||||
* Suse. We are co-working with them to fine tune the shared memory solution. So
|
||||
* we disable it by default to go acpi-cpufreq on these processors and add a
|
||||
* module parameter to be able to enable it manually for debugging.
|
||||
*/
|
||||
static bool shared_mem = false;
|
||||
module_param(shared_mem, bool, 0444);
|
||||
MODULE_PARM_DESC(shared_mem,
|
||||
"enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)");
|
||||
|
||||
static struct cpufreq_driver amd_pstate_driver;
|
||||
|
||||
/**
|
||||
* struct amd_cpudata - private CPU data for AMD P-State
|
||||
* @cpu: CPU number
|
||||
* @req: constraint request to apply
|
||||
* @cppc_req_cached: cached performance request hints
|
||||
* @highest_perf: the maximum performance an individual processor may reach,
|
||||
* assuming ideal conditions
|
||||
* @nominal_perf: the maximum sustained performance level of the processor,
|
||||
* assuming ideal operating conditions
|
||||
* @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
|
||||
* savings are achieved
|
||||
* @lowest_perf: the absolute lowest performance level of the processor
|
||||
* @max_freq: the frequency that mapped to highest_perf
|
||||
* @min_freq: the frequency that mapped to lowest_perf
|
||||
* @nominal_freq: the frequency that mapped to nominal_perf
|
||||
* @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
|
||||
* @boost_supported: check whether the Processor or SBIOS supports boost mode
|
||||
*
|
||||
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
|
||||
* represents all the attributes and goals that AMD P-State requests at runtime.
|
||||
*/
|
||||
struct amd_cpudata {
|
||||
int cpu;
|
||||
|
||||
struct freq_qos_request req[2];
|
||||
u64 cppc_req_cached;
|
||||
|
||||
u32 highest_perf;
|
||||
u32 nominal_perf;
|
||||
u32 lowest_nonlinear_perf;
|
||||
u32 lowest_perf;
|
||||
|
||||
u32 max_freq;
|
||||
u32 min_freq;
|
||||
u32 nominal_freq;
|
||||
u32 lowest_nonlinear_freq;
|
||||
|
||||
bool boost_supported;
|
||||
};
|
||||
|
||||
static inline int pstate_enable(bool enable)
|
||||
{
|
||||
return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
|
||||
}
|
||||
|
||||
static int cppc_enable(bool enable)
|
||||
{
|
||||
int cpu, ret = 0;
|
||||
|
||||
for_each_present_cpu(cpu) {
|
||||
ret = cppc_set_enable(cpu, enable);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable);
|
||||
|
||||
static inline int amd_pstate_enable(bool enable)
|
||||
{
|
||||
return static_call(amd_pstate_enable)(enable);
|
||||
}
|
||||
|
||||
static int pstate_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
u64 cap1;
|
||||
|
||||
int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
|
||||
&cap1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* TODO: Introduce AMD specific power feature.
|
||||
*
|
||||
* CPPC entry doesn't indicate the highest performance in some ASICs.
|
||||
*/
|
||||
WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf());
|
||||
|
||||
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cppc_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
|
||||
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
WRITE_ONCE(cpudata->highest_perf, amd_get_highest_perf());
|
||||
|
||||
WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
|
||||
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
|
||||
cppc_perf.lowest_nonlinear_perf);
|
||||
WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
|
||||
|
||||
static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
|
||||
{
|
||||
return static_call(amd_pstate_init_perf)(cpudata);
|
||||
}
|
||||
|
||||
static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
|
||||
u32 des_perf, u32 max_perf, bool fast_switch)
|
||||
{
|
||||
if (fast_switch)
|
||||
wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
|
||||
else
|
||||
wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
|
||||
READ_ONCE(cpudata->cppc_req_cached));
|
||||
}
|
||||
|
||||
static void cppc_update_perf(struct amd_cpudata *cpudata,
|
||||
u32 min_perf, u32 des_perf,
|
||||
u32 max_perf, bool fast_switch)
|
||||
{
|
||||
struct cppc_perf_ctrls perf_ctrls;
|
||||
|
||||
perf_ctrls.max_perf = max_perf;
|
||||
perf_ctrls.min_perf = min_perf;
|
||||
perf_ctrls.desired_perf = des_perf;
|
||||
|
||||
cppc_set_perf(cpudata->cpu, &perf_ctrls);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
|
||||
|
||||
static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
|
||||
u32 min_perf, u32 des_perf,
|
||||
u32 max_perf, bool fast_switch)
|
||||
{
|
||||
static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
|
||||
max_perf, fast_switch);
|
||||
}
|
||||
|
||||
static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
|
||||
u32 des_perf, u32 max_perf, bool fast_switch)
|
||||
{
|
||||
u64 prev = READ_ONCE(cpudata->cppc_req_cached);
|
||||
u64 value = prev;
|
||||
|
||||
value &= ~AMD_CPPC_MIN_PERF(~0L);
|
||||
value |= AMD_CPPC_MIN_PERF(min_perf);
|
||||
|
||||
value &= ~AMD_CPPC_DES_PERF(~0L);
|
||||
value |= AMD_CPPC_DES_PERF(des_perf);
|
||||
|
||||
value &= ~AMD_CPPC_MAX_PERF(~0L);
|
||||
value |= AMD_CPPC_MAX_PERF(max_perf);
|
||||
|
||||
trace_amd_pstate_perf(min_perf, des_perf, max_perf,
|
||||
cpudata->cpu, (value != prev), fast_switch);
|
||||
|
||||
if (value == prev)
|
||||
return;
|
||||
|
||||
WRITE_ONCE(cpudata->cppc_req_cached, value);
|
||||
|
||||
amd_pstate_update_perf(cpudata, min_perf, des_perf,
|
||||
max_perf, fast_switch);
|
||||
}
|
||||
|
||||
static int amd_pstate_verify(struct cpufreq_policy_data *policy)
|
||||
{
|
||||
cpufreq_verify_within_cpu_limits(policy);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amd_pstate_target(struct cpufreq_policy *policy,
|
||||
unsigned int target_freq,
|
||||
unsigned int relation)
|
||||
{
|
||||
struct cpufreq_freqs freqs;
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
unsigned long max_perf, min_perf, des_perf, cap_perf;
|
||||
|
||||
if (!cpudata->max_freq)
|
||||
return -ENODEV;
|
||||
|
||||
cap_perf = READ_ONCE(cpudata->highest_perf);
|
||||
min_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
|
||||
max_perf = cap_perf;
|
||||
|
||||
freqs.old = policy->cur;
|
||||
freqs.new = target_freq;
|
||||
|
||||
des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
|
||||
cpudata->max_freq);
|
||||
|
||||
cpufreq_freq_transition_begin(policy, &freqs);
|
||||
amd_pstate_update(cpudata, min_perf, des_perf,
|
||||
max_perf, false);
|
||||
cpufreq_freq_transition_end(policy, &freqs, false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amd_pstate_adjust_perf(unsigned int cpu,
|
||||
unsigned long _min_perf,
|
||||
unsigned long target_perf,
|
||||
unsigned long capacity)
|
||||
{
|
||||
unsigned long max_perf, min_perf, des_perf,
|
||||
cap_perf, lowest_nonlinear_perf;
|
||||
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
cap_perf = READ_ONCE(cpudata->highest_perf);
|
||||
lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
|
||||
|
||||
des_perf = cap_perf;
|
||||
if (target_perf < capacity)
|
||||
des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
|
||||
|
||||
min_perf = READ_ONCE(cpudata->highest_perf);
|
||||
if (_min_perf < capacity)
|
||||
min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
|
||||
|
||||
if (min_perf < lowest_nonlinear_perf)
|
||||
min_perf = lowest_nonlinear_perf;
|
||||
|
||||
max_perf = cap_perf;
|
||||
if (max_perf < min_perf)
|
||||
max_perf = min_perf;
|
||||
|
||||
des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
|
||||
|
||||
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
|
||||
}
|
||||
|
||||
static int amd_get_min_freq(struct amd_cpudata *cpudata)
|
||||
{
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
|
||||
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Switch to khz */
|
||||
return cppc_perf.lowest_freq * 1000;
|
||||
}
|
||||
|
||||
static int amd_get_max_freq(struct amd_cpudata *cpudata)
|
||||
{
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
u32 max_perf, max_freq, nominal_freq, nominal_perf;
|
||||
u64 boost_ratio;
|
||||
|
||||
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
nominal_freq = cppc_perf.nominal_freq;
|
||||
nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
max_perf = READ_ONCE(cpudata->highest_perf);
|
||||
|
||||
boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT,
|
||||
nominal_perf);
|
||||
|
||||
max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT;
|
||||
|
||||
/* Switch to khz */
|
||||
return max_freq * 1000;
|
||||
}
|
||||
|
||||
static int amd_get_nominal_freq(struct amd_cpudata *cpudata)
|
||||
{
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
|
||||
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Switch to khz */
|
||||
return cppc_perf.nominal_freq * 1000;
|
||||
}
|
||||
|
||||
static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
|
||||
{
|
||||
struct cppc_perf_caps cppc_perf;
|
||||
u32 lowest_nonlinear_freq, lowest_nonlinear_perf,
|
||||
nominal_freq, nominal_perf;
|
||||
u64 lowest_nonlinear_ratio;
|
||||
|
||||
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
nominal_freq = cppc_perf.nominal_freq;
|
||||
nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
|
||||
lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
|
||||
|
||||
lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
|
||||
nominal_perf);
|
||||
|
||||
lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT;
|
||||
|
||||
/* Switch to khz */
|
||||
return lowest_nonlinear_freq * 1000;
|
||||
}
|
||||
|
||||
static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
|
||||
{
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
int ret;
|
||||
|
||||
if (!cpudata->boost_supported) {
|
||||
pr_err("Boost mode is not supported by this processor or SBIOS\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (state)
|
||||
policy->cpuinfo.max_freq = cpudata->max_freq;
|
||||
else
|
||||
policy->cpuinfo.max_freq = cpudata->nominal_freq;
|
||||
|
||||
policy->max = policy->cpuinfo.max_freq;
|
||||
|
||||
ret = freq_qos_update_request(&cpudata->req[1],
|
||||
policy->cpuinfo.max_freq);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
|
||||
{
|
||||
u32 highest_perf, nominal_perf;
|
||||
|
||||
highest_perf = READ_ONCE(cpudata->highest_perf);
|
||||
nominal_perf = READ_ONCE(cpudata->nominal_perf);
|
||||
|
||||
if (highest_perf <= nominal_perf)
|
||||
return;
|
||||
|
||||
cpudata->boost_supported = true;
|
||||
amd_pstate_driver.boost_enabled = true;
|
||||
}
|
||||
|
||||
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
||||
{
|
||||
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
|
||||
struct device *dev;
|
||||
struct amd_cpudata *cpudata;
|
||||
|
||||
dev = get_cpu_device(policy->cpu);
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
|
||||
cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
|
||||
if (!cpudata)
|
||||
return -ENOMEM;
|
||||
|
||||
cpudata->cpu = policy->cpu;
|
||||
|
||||
ret = amd_pstate_init_perf(cpudata);
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
|
||||
min_freq = amd_get_min_freq(cpudata);
|
||||
max_freq = amd_get_max_freq(cpudata);
|
||||
nominal_freq = amd_get_nominal_freq(cpudata);
|
||||
lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
|
||||
|
||||
if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
|
||||
dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
|
||||
min_freq, max_freq);
|
||||
ret = -EINVAL;
|
||||
goto free_cpudata1;
|
||||
}
|
||||
|
||||
policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY;
|
||||
policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY;
|
||||
|
||||
policy->min = min_freq;
|
||||
policy->max = max_freq;
|
||||
|
||||
policy->cpuinfo.min_freq = min_freq;
|
||||
policy->cpuinfo.max_freq = max_freq;
|
||||
|
||||
/* It will be updated by governor */
|
||||
policy->cur = policy->cpuinfo.min_freq;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_CPPC))
|
||||
policy->fast_switch_possible = true;
|
||||
|
||||
ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
|
||||
FREQ_QOS_MIN, policy->cpuinfo.min_freq);
|
||||
if (ret < 0) {
|
||||
dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
|
||||
goto free_cpudata1;
|
||||
}
|
||||
|
||||
ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
|
||||
FREQ_QOS_MAX, policy->cpuinfo.max_freq);
|
||||
if (ret < 0) {
|
||||
dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
|
||||
goto free_cpudata2;
|
||||
}
|
||||
|
||||
/* Initial processor data capability frequencies */
|
||||
cpudata->max_freq = max_freq;
|
||||
cpudata->min_freq = min_freq;
|
||||
cpudata->nominal_freq = nominal_freq;
|
||||
cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
|
||||
|
||||
policy->driver_data = cpudata;
|
||||
|
||||
amd_pstate_boost_init(cpudata);
|
||||
|
||||
return 0;
|
||||
|
||||
free_cpudata2:
|
||||
freq_qos_remove_request(&cpudata->req[0]);
|
||||
free_cpudata1:
|
||||
kfree(cpudata);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amd_pstate_cpu_exit(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct amd_cpudata *cpudata;
|
||||
|
||||
cpudata = policy->driver_data;
|
||||
|
||||
freq_qos_remove_request(&cpudata->req[1]);
|
||||
freq_qos_remove_request(&cpudata->req[0]);
|
||||
kfree(cpudata);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sysfs attributes */
|
||||
|
||||
/*
|
||||
* This frequency is to indicate the maximum hardware frequency.
|
||||
* If boost is not active but supported, the frequency will be larger than the
|
||||
* one in cpuinfo.
|
||||
*/
|
||||
static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
|
||||
char *buf)
|
||||
{
|
||||
int max_freq;
|
||||
struct amd_cpudata *cpudata;
|
||||
|
||||
cpudata = policy->driver_data;
|
||||
|
||||
max_freq = amd_get_max_freq(cpudata);
|
||||
if (max_freq < 0)
|
||||
return max_freq;
|
||||
|
||||
return sprintf(&buf[0], "%u\n", max_freq);
|
||||
}
|
||||
|
||||
static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
|
||||
char *buf)
|
||||
{
|
||||
int freq;
|
||||
struct amd_cpudata *cpudata;
|
||||
|
||||
cpudata = policy->driver_data;
|
||||
|
||||
freq = amd_get_lowest_nonlinear_freq(cpudata);
|
||||
if (freq < 0)
|
||||
return freq;
|
||||
|
||||
return sprintf(&buf[0], "%u\n", freq);
|
||||
}
|
||||
|
||||
/*
|
||||
* In some of ASICs, the highest_perf is not the one in the _CPC table, so we
|
||||
* need to expose it to sysfs.
|
||||
*/
|
||||
static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
|
||||
char *buf)
|
||||
{
|
||||
u32 perf;
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
perf = READ_ONCE(cpudata->highest_perf);
|
||||
|
||||
return sprintf(&buf[0], "%u\n", perf);
|
||||
}
|
||||
|
||||
cpufreq_freq_attr_ro(amd_pstate_max_freq);
|
||||
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
|
||||
|
||||
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
|
||||
|
||||
static struct freq_attr *amd_pstate_attr[] = {
|
||||
&amd_pstate_max_freq,
|
||||
&amd_pstate_lowest_nonlinear_freq,
|
||||
&amd_pstate_highest_perf,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct cpufreq_driver amd_pstate_driver = {
|
||||
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
|
||||
.verify = amd_pstate_verify,
|
||||
.target = amd_pstate_target,
|
||||
.init = amd_pstate_cpu_init,
|
||||
.exit = amd_pstate_cpu_exit,
|
||||
.set_boost = amd_pstate_set_boost,
|
||||
.name = "amd-pstate",
|
||||
.attr = amd_pstate_attr,
|
||||
};
|
||||
|
||||
static int __init amd_pstate_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
||||
return -ENODEV;
|
||||
|
||||
if (!acpi_cpc_valid()) {
|
||||
pr_debug("the _CPC object is not present in SBIOS\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* don't keep reloading if cpufreq_driver exists */
|
||||
if (cpufreq_get_current_driver())
|
||||
return -EEXIST;
|
||||
|
||||
/* capability check */
|
||||
if (boot_cpu_has(X86_FEATURE_CPPC)) {
|
||||
pr_debug("AMD CPPC MSR based functionality is supported\n");
|
||||
amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
|
||||
} else if (shared_mem) {
|
||||
static_call_update(amd_pstate_enable, cppc_enable);
|
||||
static_call_update(amd_pstate_init_perf, cppc_init_perf);
|
||||
static_call_update(amd_pstate_update_perf, cppc_update_perf);
|
||||
} else {
|
||||
pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* enable amd pstate feature */
|
||||
ret = amd_pstate_enable(true);
|
||||
if (ret) {
|
||||
pr_err("failed to enable amd-pstate with return %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = cpufreq_register_driver(&amd_pstate_driver);
|
||||
if (ret)
|
||||
pr_err("failed to register amd_pstate_driver with return %d\n",
|
||||
ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit amd_pstate_exit(void)
|
||||
{
|
||||
cpufreq_unregister_driver(&amd_pstate_driver);
|
||||
|
||||
amd_pstate_enable(false);
|
||||
}
|
||||
|
||||
module_init(amd_pstate_init);
|
||||
module_exit(amd_pstate_exit);
|
||||
|
||||
MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
|
||||
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
|
||||
MODULE_LICENSE("GPL");
|
@ -924,7 +924,7 @@ cpufreq_freq_attr_rw(scaling_max_freq);
|
||||
cpufreq_freq_attr_rw(scaling_governor);
|
||||
cpufreq_freq_attr_rw(scaling_setspeed);
|
||||
|
||||
static struct attribute *default_attrs[] = {
|
||||
static struct attribute *cpufreq_attrs[] = {
|
||||
&cpuinfo_min_freq.attr,
|
||||
&cpuinfo_max_freq.attr,
|
||||
&cpuinfo_transition_latency.attr,
|
||||
@ -938,6 +938,7 @@ static struct attribute *default_attrs[] = {
|
||||
&scaling_setspeed.attr,
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(cpufreq);
|
||||
|
||||
#define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
|
||||
#define to_attr(a) container_of(a, struct freq_attr, attr)
|
||||
@ -1000,7 +1001,7 @@ static const struct sysfs_ops sysfs_ops = {
|
||||
|
||||
static struct kobj_type ktype_cpufreq = {
|
||||
.sysfs_ops = &sysfs_ops,
|
||||
.default_attrs = default_attrs,
|
||||
.default_groups = cpufreq_groups,
|
||||
.release = cpufreq_sysfs_release,
|
||||
};
|
||||
|
||||
@ -1403,7 +1404,7 @@ static int cpufreq_online(unsigned int cpu)
|
||||
|
||||
ret = freq_qos_add_request(&policy->constraints,
|
||||
policy->min_freq_req, FREQ_QOS_MIN,
|
||||
policy->min);
|
||||
FREQ_QOS_MIN_DEFAULT_VALUE);
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* So we don't call freq_qos_remove_request() for an
|
||||
@ -1423,7 +1424,7 @@ static int cpufreq_online(unsigned int cpu)
|
||||
|
||||
ret = freq_qos_add_request(&policy->constraints,
|
||||
policy->max_freq_req, FREQ_QOS_MAX,
|
||||
policy->max);
|
||||
FREQ_QOS_MAX_DEFAULT_VALUE);
|
||||
if (ret < 0) {
|
||||
policy->max_freq_req = NULL;
|
||||
goto out_destroy_policy;
|
||||
|
@ -257,7 +257,7 @@ gov_attr_rw(ignore_nice_load);
|
||||
gov_attr_rw(down_threshold);
|
||||
gov_attr_rw(freq_step);
|
||||
|
||||
static struct attribute *cs_attributes[] = {
|
||||
static struct attribute *cs_attrs[] = {
|
||||
&sampling_rate.attr,
|
||||
&sampling_down_factor.attr,
|
||||
&up_threshold.attr,
|
||||
@ -266,6 +266,7 @@ static struct attribute *cs_attributes[] = {
|
||||
&freq_step.attr,
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(cs);
|
||||
|
||||
/************************** sysfs end ************************/
|
||||
|
||||
@ -315,7 +316,7 @@ static void cs_start(struct cpufreq_policy *policy)
|
||||
|
||||
static struct dbs_governor cs_governor = {
|
||||
.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"),
|
||||
.kobj_type = { .default_attrs = cs_attributes },
|
||||
.kobj_type = { .default_groups = cs_groups },
|
||||
.gov_dbs_update = cs_dbs_update,
|
||||
.alloc = cs_alloc,
|
||||
.free = cs_free,
|
||||
|
@ -328,7 +328,7 @@ gov_attr_rw(sampling_down_factor);
|
||||
gov_attr_rw(ignore_nice_load);
|
||||
gov_attr_rw(powersave_bias);
|
||||
|
||||
static struct attribute *od_attributes[] = {
|
||||
static struct attribute *od_attrs[] = {
|
||||
&sampling_rate.attr,
|
||||
&up_threshold.attr,
|
||||
&sampling_down_factor.attr,
|
||||
@ -337,6 +337,7 @@ static struct attribute *od_attributes[] = {
|
||||
&io_is_busy.attr,
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(od);
|
||||
|
||||
/************************** sysfs end ************************/
|
||||
|
||||
@ -401,7 +402,7 @@ static struct od_ops od_ops = {
|
||||
|
||||
static struct dbs_governor od_dbs_gov = {
|
||||
.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"),
|
||||
.kobj_type = { .default_attrs = od_attributes },
|
||||
.kobj_type = { .default_groups = od_groups },
|
||||
.gov_dbs_update = od_dbs_update,
|
||||
.alloc = od_alloc,
|
||||
.free = od_free,
|
||||
|
@ -664,19 +664,29 @@ static int intel_pstate_set_epb(int cpu, s16 pref)
|
||||
* 3 balance_power
|
||||
* 4 power
|
||||
*/
|
||||
|
||||
enum energy_perf_value_index {
|
||||
EPP_INDEX_DEFAULT = 0,
|
||||
EPP_INDEX_PERFORMANCE,
|
||||
EPP_INDEX_BALANCE_PERFORMANCE,
|
||||
EPP_INDEX_BALANCE_POWERSAVE,
|
||||
EPP_INDEX_POWERSAVE,
|
||||
};
|
||||
|
||||
static const char * const energy_perf_strings[] = {
|
||||
"default",
|
||||
"performance",
|
||||
"balance_performance",
|
||||
"balance_power",
|
||||
"power",
|
||||
[EPP_INDEX_DEFAULT] = "default",
|
||||
[EPP_INDEX_PERFORMANCE] = "performance",
|
||||
[EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
|
||||
[EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
|
||||
[EPP_INDEX_POWERSAVE] = "power",
|
||||
NULL
|
||||
};
|
||||
static const unsigned int epp_values[] = {
|
||||
HWP_EPP_PERFORMANCE,
|
||||
HWP_EPP_BALANCE_PERFORMANCE,
|
||||
HWP_EPP_BALANCE_POWERSAVE,
|
||||
HWP_EPP_POWERSAVE
|
||||
static unsigned int epp_values[] = {
|
||||
[EPP_INDEX_DEFAULT] = 0, /* Unused index */
|
||||
[EPP_INDEX_PERFORMANCE] = HWP_EPP_PERFORMANCE,
|
||||
[EPP_INDEX_BALANCE_PERFORMANCE] = HWP_EPP_BALANCE_PERFORMANCE,
|
||||
[EPP_INDEX_BALANCE_POWERSAVE] = HWP_EPP_BALANCE_POWERSAVE,
|
||||
[EPP_INDEX_POWERSAVE] = HWP_EPP_POWERSAVE,
|
||||
};
|
||||
|
||||
static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw_epp)
|
||||
@ -690,14 +700,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw
|
||||
return epp;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
|
||||
if (epp == HWP_EPP_PERFORMANCE)
|
||||
return 1;
|
||||
if (epp == HWP_EPP_BALANCE_PERFORMANCE)
|
||||
return 2;
|
||||
if (epp == HWP_EPP_BALANCE_POWERSAVE)
|
||||
return 3;
|
||||
if (epp == HWP_EPP_POWERSAVE)
|
||||
return 4;
|
||||
if (epp == epp_values[EPP_INDEX_PERFORMANCE])
|
||||
return EPP_INDEX_PERFORMANCE;
|
||||
if (epp == epp_values[EPP_INDEX_BALANCE_PERFORMANCE])
|
||||
return EPP_INDEX_BALANCE_PERFORMANCE;
|
||||
if (epp == epp_values[EPP_INDEX_BALANCE_POWERSAVE])
|
||||
return EPP_INDEX_BALANCE_POWERSAVE;
|
||||
if (epp == epp_values[EPP_INDEX_POWERSAVE])
|
||||
return EPP_INDEX_POWERSAVE;
|
||||
*raw_epp = epp;
|
||||
return 0;
|
||||
} else if (boot_cpu_has(X86_FEATURE_EPB)) {
|
||||
@ -757,7 +767,7 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
|
||||
if (use_raw)
|
||||
epp = raw_epp;
|
||||
else if (epp == -EINVAL)
|
||||
epp = epp_values[pref_index - 1];
|
||||
epp = epp_values[pref_index];
|
||||
|
||||
/*
|
||||
* To avoid confusion, refuse to set EPP to any values different
|
||||
@ -843,7 +853,7 @@ static ssize_t store_energy_performance_preference(
|
||||
* upfront.
|
||||
*/
|
||||
if (!raw)
|
||||
epp = ret ? epp_values[ret - 1] : cpu->epp_default;
|
||||
epp = ret ? epp_values[ret] : cpu->epp_default;
|
||||
|
||||
if (cpu->epp_cached != epp) {
|
||||
int err;
|
||||
@ -1124,19 +1134,22 @@ static void intel_pstate_update_policies(void)
|
||||
cpufreq_update_policy(cpu);
|
||||
}
|
||||
|
||||
static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
|
||||
struct cpufreq_policy *policy)
|
||||
{
|
||||
policy->cpuinfo.max_freq = global.turbo_disabled_mf ?
|
||||
cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
|
||||
refresh_frequency_limits(policy);
|
||||
}
|
||||
|
||||
static void intel_pstate_update_max_freq(unsigned int cpu)
|
||||
{
|
||||
struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
|
||||
struct cpudata *cpudata;
|
||||
|
||||
if (!policy)
|
||||
return;
|
||||
|
||||
cpudata = all_cpu_data[cpu];
|
||||
policy->cpuinfo.max_freq = global.turbo_disabled_mf ?
|
||||
cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
|
||||
|
||||
refresh_frequency_limits(policy);
|
||||
__intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
|
||||
|
||||
cpufreq_cpu_release(policy);
|
||||
}
|
||||
@ -1584,8 +1597,15 @@ static void intel_pstate_notify_work(struct work_struct *work)
|
||||
{
|
||||
struct cpudata *cpudata =
|
||||
container_of(to_delayed_work(work), struct cpudata, hwp_notify_work);
|
||||
struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
|
||||
|
||||
if (policy) {
|
||||
intel_pstate_get_hwp_cap(cpudata);
|
||||
__intel_pstate_update_max_freq(cpudata, policy);
|
||||
|
||||
cpufreq_cpu_release(policy);
|
||||
}
|
||||
|
||||
cpufreq_update_policy(cpudata->cpu);
|
||||
wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
|
||||
}
|
||||
|
||||
@ -1679,10 +1699,18 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
|
||||
wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
|
||||
|
||||
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
|
||||
if (cpudata->epp_default == -EINVAL)
|
||||
cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
|
||||
|
||||
intel_pstate_enable_hwp_interrupt(cpudata);
|
||||
|
||||
if (cpudata->epp_default >= 0)
|
||||
return;
|
||||
|
||||
if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) {
|
||||
cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
|
||||
} else {
|
||||
cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE];
|
||||
intel_pstate_set_epp(cpudata, cpudata->epp_default);
|
||||
}
|
||||
}
|
||||
|
||||
static int atom_get_min_pstate(void)
|
||||
@ -2486,18 +2514,14 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu,
|
||||
* HWP needs some special consideration, because HWP_REQUEST uses
|
||||
* abstract values to represent performance rather than pure ratios.
|
||||
*/
|
||||
if (hwp_active) {
|
||||
intel_pstate_get_hwp_cap(cpu);
|
||||
if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) {
|
||||
int scaling = cpu->pstate.scaling;
|
||||
int freq;
|
||||
|
||||
if (cpu->pstate.scaling != perf_ctl_scaling) {
|
||||
int scaling = cpu->pstate.scaling;
|
||||
int freq;
|
||||
|
||||
freq = max_policy_perf * perf_ctl_scaling;
|
||||
max_policy_perf = DIV_ROUND_UP(freq, scaling);
|
||||
freq = min_policy_perf * perf_ctl_scaling;
|
||||
min_policy_perf = DIV_ROUND_UP(freq, scaling);
|
||||
}
|
||||
freq = max_policy_perf * perf_ctl_scaling;
|
||||
max_policy_perf = DIV_ROUND_UP(freq, scaling);
|
||||
freq = min_policy_perf * perf_ctl_scaling;
|
||||
min_policy_perf = DIV_ROUND_UP(freq, scaling);
|
||||
}
|
||||
|
||||
pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n",
|
||||
@ -3349,6 +3373,16 @@ static bool intel_pstate_hwp_is_enabled(void)
|
||||
return !!(value & 0x1);
|
||||
}
|
||||
|
||||
static const struct x86_cpu_id intel_epp_balance_perf[] = {
|
||||
/*
|
||||
* Set EPP value as 102, this is the max suggested EPP
|
||||
* which can result in one core turbo frequency for
|
||||
* AlderLake Mobile CPUs.
|
||||
*/
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102),
|
||||
{}
|
||||
};
|
||||
|
||||
static int __init intel_pstate_init(void)
|
||||
{
|
||||
static struct cpudata **_all_cpu_data;
|
||||
@ -3438,6 +3472,13 @@ static int __init intel_pstate_init(void)
|
||||
|
||||
intel_pstate_sysfs_expose_params();
|
||||
|
||||
if (hwp_active) {
|
||||
const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf);
|
||||
|
||||
if (id)
|
||||
epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data;
|
||||
}
|
||||
|
||||
mutex_lock(&intel_pstate_driver_lock);
|
||||
rc = intel_pstate_register_driver(default_driver);
|
||||
mutex_unlock(&intel_pstate_driver_lock);
|
||||
|
@ -36,6 +36,8 @@ enum {
|
||||
struct mtk_cpufreq_data {
|
||||
struct cpufreq_frequency_table *table;
|
||||
void __iomem *reg_bases[REG_ARRAY_SIZE];
|
||||
struct resource *res;
|
||||
void __iomem *base;
|
||||
int nr_opp;
|
||||
};
|
||||
|
||||
@ -156,6 +158,7 @@ static int mtk_cpu_resources_init(struct platform_device *pdev,
|
||||
{
|
||||
struct mtk_cpufreq_data *data;
|
||||
struct device *dev = &pdev->dev;
|
||||
struct resource *res;
|
||||
void __iomem *base;
|
||||
int ret, i;
|
||||
int index;
|
||||
@ -170,9 +173,26 @@ static int mtk_cpu_resources_init(struct platform_device *pdev,
|
||||
if (index < 0)
|
||||
return index;
|
||||
|
||||
base = devm_platform_ioremap_resource(pdev, index);
|
||||
if (IS_ERR(base))
|
||||
return PTR_ERR(base);
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, index);
|
||||
if (!res) {
|
||||
dev_err(dev, "failed to get mem resource %d\n", index);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!request_mem_region(res->start, resource_size(res), res->name)) {
|
||||
dev_err(dev, "failed to request resource %pR\n", res);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
base = ioremap(res->start, resource_size(res));
|
||||
if (!base) {
|
||||
dev_err(dev, "failed to map resource %pR\n", res);
|
||||
ret = -ENOMEM;
|
||||
goto release_region;
|
||||
}
|
||||
|
||||
data->base = base;
|
||||
data->res = res;
|
||||
|
||||
for (i = REG_FREQ_LUT_TABLE; i < REG_ARRAY_SIZE; i++)
|
||||
data->reg_bases[i] = base + offsets[i];
|
||||
@ -187,6 +207,9 @@ static int mtk_cpu_resources_init(struct platform_device *pdev,
|
||||
policy->driver_data = data;
|
||||
|
||||
return 0;
|
||||
release_region:
|
||||
release_mem_region(res->start, resource_size(res));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
|
||||
@ -233,9 +256,13 @@ static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
|
||||
static int mtk_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct mtk_cpufreq_data *data = policy->driver_data;
|
||||
struct resource *res = data->res;
|
||||
void __iomem *base = data->base;
|
||||
|
||||
/* HW should be in paused state now */
|
||||
writel_relaxed(0x0, data->reg_bases[REG_FREQ_ENABLE]);
|
||||
iounmap(base);
|
||||
release_mem_region(res->start, resource_size(res));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -46,6 +46,7 @@ struct qcom_cpufreq_data {
|
||||
*/
|
||||
struct mutex throttle_lock;
|
||||
int throttle_irq;
|
||||
char irq_name[15];
|
||||
bool cancel_throttle;
|
||||
struct delayed_work throttle_work;
|
||||
struct cpufreq_policy *policy;
|
||||
@ -275,10 +276,10 @@ static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)
|
||||
|
||||
static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
|
||||
{
|
||||
unsigned long max_capacity, capacity, freq_hz, throttled_freq;
|
||||
struct cpufreq_policy *policy = data->policy;
|
||||
int cpu = cpumask_first(policy->cpus);
|
||||
struct device *dev = get_cpu_device(cpu);
|
||||
unsigned long freq_hz, throttled_freq;
|
||||
struct dev_pm_opp *opp;
|
||||
unsigned int freq;
|
||||
|
||||
@ -295,16 +296,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
|
||||
|
||||
throttled_freq = freq_hz / HZ_PER_KHZ;
|
||||
|
||||
/* Update thermal pressure */
|
||||
|
||||
max_capacity = arch_scale_cpu_capacity(cpu);
|
||||
capacity = mult_frac(max_capacity, throttled_freq, policy->cpuinfo.max_freq);
|
||||
|
||||
/* Don't pass boost capacity to scheduler */
|
||||
if (capacity > max_capacity)
|
||||
capacity = max_capacity;
|
||||
|
||||
arch_set_thermal_pressure(policy->cpus, max_capacity - capacity);
|
||||
/* Update thermal pressure (the boost frequencies are accepted) */
|
||||
arch_update_thermal_pressure(policy->related_cpus, throttled_freq);
|
||||
|
||||
/*
|
||||
* In the unlikely case policy is unregistered do not enable
|
||||
@ -342,9 +335,9 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
|
||||
|
||||
/* Disable interrupt and enable polling */
|
||||
disable_irq_nosync(c_data->throttle_irq);
|
||||
qcom_lmh_dcvs_notify(c_data);
|
||||
schedule_delayed_work(&c_data->throttle_work, 0);
|
||||
|
||||
return 0;
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static const struct qcom_cpufreq_soc_data qcom_soc_data = {
|
||||
@ -375,16 +368,17 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
|
||||
{
|
||||
struct qcom_cpufreq_data *data = policy->driver_data;
|
||||
struct platform_device *pdev = cpufreq_get_driver_data();
|
||||
char irq_name[15];
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Look for LMh interrupt. If no interrupt line is specified /
|
||||
* if there is an error, allow cpufreq to be enabled as usual.
|
||||
*/
|
||||
data->throttle_irq = platform_get_irq(pdev, index);
|
||||
if (data->throttle_irq <= 0)
|
||||
return data->throttle_irq == -EPROBE_DEFER ? -EPROBE_DEFER : 0;
|
||||
data->throttle_irq = platform_get_irq_optional(pdev, index);
|
||||
if (data->throttle_irq == -ENXIO)
|
||||
return 0;
|
||||
if (data->throttle_irq < 0)
|
||||
return data->throttle_irq;
|
||||
|
||||
data->cancel_throttle = false;
|
||||
data->policy = policy;
|
||||
@ -392,14 +386,19 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
|
||||
mutex_init(&data->throttle_lock);
|
||||
INIT_DEFERRABLE_WORK(&data->throttle_work, qcom_lmh_dcvs_poll);
|
||||
|
||||
snprintf(irq_name, sizeof(irq_name), "dcvsh-irq-%u", policy->cpu);
|
||||
snprintf(data->irq_name, sizeof(data->irq_name), "dcvsh-irq-%u", policy->cpu);
|
||||
ret = request_threaded_irq(data->throttle_irq, NULL, qcom_lmh_dcvs_handle_irq,
|
||||
IRQF_ONESHOT, irq_name, data);
|
||||
IRQF_ONESHOT, data->irq_name, data);
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "Error registering %s: %d\n", irq_name, ret);
|
||||
dev_err(&pdev->dev, "Error registering %s: %d\n", data->irq_name, ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = irq_set_affinity_hint(data->throttle_irq, policy->cpus);
|
||||
if (ret)
|
||||
dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n",
|
||||
data->irq_name, data->throttle_irq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@
|
||||
* 1) Energy break even point
|
||||
* 2) Performance impact
|
||||
* 3) Latency tolerance (from pmqos infrastructure)
|
||||
* These these three factors are treated independently.
|
||||
* These three factors are treated independently.
|
||||
*
|
||||
* Energy break even point
|
||||
* -----------------------
|
||||
|
@ -335,6 +335,7 @@ static struct attribute *cpuidle_state_default_attrs[] = {
|
||||
&attr_default_status.attr,
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(cpuidle_state_default);
|
||||
|
||||
struct cpuidle_state_kobj {
|
||||
struct cpuidle_state *state;
|
||||
@ -448,7 +449,7 @@ static void cpuidle_state_sysfs_release(struct kobject *kobj)
|
||||
|
||||
static struct kobj_type ktype_state_cpuidle = {
|
||||
.sysfs_ops = &cpuidle_state_sysfs_ops,
|
||||
.default_attrs = cpuidle_state_default_attrs,
|
||||
.default_groups = cpuidle_state_default_groups,
|
||||
.release = cpuidle_state_sysfs_release,
|
||||
};
|
||||
|
||||
@ -505,7 +506,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
|
||||
}
|
||||
|
||||
/**
|
||||
* cpuidle_remove_driver_sysfs - removes the cpuidle states sysfs attributes
|
||||
* cpuidle_remove_state_sysfs - removes the cpuidle states sysfs attributes
|
||||
* @device: the target device
|
||||
*/
|
||||
static void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
|
||||
@ -591,10 +592,11 @@ static struct attribute *cpuidle_driver_default_attrs[] = {
|
||||
&attr_driver_name.attr,
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(cpuidle_driver_default);
|
||||
|
||||
static struct kobj_type ktype_driver_cpuidle = {
|
||||
.sysfs_ops = &cpuidle_driver_sysfs_ops,
|
||||
.default_attrs = cpuidle_driver_default_attrs,
|
||||
.default_groups = cpuidle_driver_default_groups,
|
||||
.release = cpuidle_driver_sysfs_release,
|
||||
};
|
||||
|
||||
|
@ -132,6 +132,15 @@ config ARM_RK3399_DMC_DEVFREQ
|
||||
It sets the frequency for the memory controller and reads the usage counts
|
||||
from hardware.
|
||||
|
||||
config ARM_SUN8I_A33_MBUS_DEVFREQ
|
||||
tristate "sun8i/sun50i MBUS DEVFREQ Driver"
|
||||
depends on ARCH_SUNXI || COMPILE_TEST
|
||||
depends on COMMON_CLK
|
||||
select DEVFREQ_GOV_SIMPLE_ONDEMAND
|
||||
help
|
||||
This adds the DEVFREQ driver for the MBUS controller in some
|
||||
Allwinner sun8i (A33 through H3) and sun50i (A64 and H5) SoCs.
|
||||
|
||||
source "drivers/devfreq/event/Kconfig"
|
||||
|
||||
endif # PM_DEVFREQ
|
||||
|
@ -12,6 +12,7 @@ obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o
|
||||
obj-$(CONFIG_ARM_IMX_BUS_DEVFREQ) += imx-bus.o
|
||||
obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ) += imx8m-ddrc.o
|
||||
obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o
|
||||
obj-$(CONFIG_ARM_SUN8I_A33_MBUS_DEVFREQ) += sun8i-a33-mbus.o
|
||||
obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra30-devfreq.o
|
||||
|
||||
# DEVFREQ Event Drivers
|
||||
|
@ -382,8 +382,8 @@ static int devfreq_set_target(struct devfreq *devfreq, unsigned long new_freq,
|
||||
devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
|
||||
|
||||
if (devfreq_update_status(devfreq, new_freq))
|
||||
dev_err(&devfreq->dev,
|
||||
"Couldn't update frequency transition information.\n");
|
||||
dev_warn(&devfreq->dev,
|
||||
"Couldn't update frequency transition information.\n");
|
||||
|
||||
devfreq->previous_freq = new_freq;
|
||||
|
||||
|
511
drivers/devfreq/sun8i-a33-mbus.c
Normal file
511
drivers/devfreq/sun8i-a33-mbus.c
Normal file
@ -0,0 +1,511 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
//
|
||||
// Copyright (C) 2020-2021 Samuel Holland <samuel@sholland.org>
|
||||
//
|
||||
|
||||
#include <linux/clk.h>
|
||||
#include <linux/devfreq.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/iopoll.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/property.h>
|
||||
|
||||
#define MBUS_CR 0x0000
|
||||
#define MBUS_CR_GET_DRAM_TYPE(x) (((x) >> 16) & 0x7)
|
||||
#define MBUS_CR_DRAM_TYPE_DDR2 2
|
||||
#define MBUS_CR_DRAM_TYPE_DDR3 3
|
||||
#define MBUS_CR_DRAM_TYPE_DDR4 4
|
||||
#define MBUS_CR_DRAM_TYPE_LPDDR2 6
|
||||
#define MBUS_CR_DRAM_TYPE_LPDDR3 7
|
||||
|
||||
#define MBUS_TMR 0x000c
|
||||
#define MBUS_TMR_PERIOD(x) ((x) - 1)
|
||||
|
||||
#define MBUS_PMU_CFG 0x009c
|
||||
#define MBUS_PMU_CFG_PERIOD(x) (((x) - 1) << 16)
|
||||
#define MBUS_PMU_CFG_UNIT (0x3 << 1)
|
||||
#define MBUS_PMU_CFG_UNIT_B (0x0 << 1)
|
||||
#define MBUS_PMU_CFG_UNIT_KB (0x1 << 1)
|
||||
#define MBUS_PMU_CFG_UNIT_MB (0x2 << 1)
|
||||
#define MBUS_PMU_CFG_ENABLE (0x1 << 0)
|
||||
|
||||
#define MBUS_PMU_BWCR(n) (0x00a0 + (0x04 * (n)))
|
||||
|
||||
#define MBUS_TOTAL_BWCR MBUS_PMU_BWCR(5)
|
||||
#define MBUS_TOTAL_BWCR_H616 MBUS_PMU_BWCR(13)
|
||||
|
||||
#define MBUS_MDFSCR 0x0100
|
||||
#define MBUS_MDFSCR_BUFFER_TIMING (0x1 << 15)
|
||||
#define MBUS_MDFSCR_PAD_HOLD (0x1 << 13)
|
||||
#define MBUS_MDFSCR_BYPASS (0x1 << 4)
|
||||
#define MBUS_MDFSCR_MODE (0x1 << 1)
|
||||
#define MBUS_MDFSCR_MODE_DFS (0x0 << 1)
|
||||
#define MBUS_MDFSCR_MODE_CFS (0x1 << 1)
|
||||
#define MBUS_MDFSCR_START (0x1 << 0)
|
||||
|
||||
#define MBUS_MDFSMRMR 0x0108
|
||||
|
||||
#define DRAM_PWRCTL 0x0004
|
||||
#define DRAM_PWRCTL_SELFREF_EN (0x1 << 0)
|
||||
|
||||
#define DRAM_RFSHTMG 0x0090
|
||||
#define DRAM_RFSHTMG_TREFI(x) ((x) << 16)
|
||||
#define DRAM_RFSHTMG_TRFC(x) ((x) << 0)
|
||||
|
||||
#define DRAM_VTFCR 0x00b8
|
||||
#define DRAM_VTFCR_VTF_ENABLE (0x3 << 8)
|
||||
|
||||
#define DRAM_ODTMAP 0x0120
|
||||
|
||||
#define DRAM_DX_MAX 4
|
||||
|
||||
#define DRAM_DXnGCR0(n) (0x0344 + 0x80 * (n))
|
||||
#define DRAM_DXnGCR0_DXODT (0x3 << 4)
|
||||
#define DRAM_DXnGCR0_DXODT_DYNAMIC (0x0 << 4)
|
||||
#define DRAM_DXnGCR0_DXODT_ENABLED (0x1 << 4)
|
||||
#define DRAM_DXnGCR0_DXODT_DISABLED (0x2 << 4)
|
||||
#define DRAM_DXnGCR0_DXEN (0x1 << 0)
|
||||
|
||||
struct sun8i_a33_mbus_variant {
|
||||
u32 min_dram_divider;
|
||||
u32 max_dram_divider;
|
||||
u32 odt_freq_mhz;
|
||||
};
|
||||
|
||||
struct sun8i_a33_mbus {
|
||||
const struct sun8i_a33_mbus_variant *variant;
|
||||
void __iomem *reg_dram;
|
||||
void __iomem *reg_mbus;
|
||||
struct clk *clk_bus;
|
||||
struct clk *clk_dram;
|
||||
struct clk *clk_mbus;
|
||||
struct devfreq *devfreq_dram;
|
||||
struct devfreq_simple_ondemand_data gov_data;
|
||||
struct devfreq_dev_profile profile;
|
||||
u32 data_width;
|
||||
u32 nominal_bw;
|
||||
u32 odtmap;
|
||||
u32 tREFI_ns;
|
||||
u32 tRFC_ns;
|
||||
unsigned long freq_table[];
|
||||
};
|
||||
|
||||
/*
|
||||
* The unit for this value is (MBUS clock cycles / MBUS_TMR_PERIOD). When
|
||||
* MBUS_TMR_PERIOD is programmed to match the MBUS clock frequency in MHz, as
|
||||
* it is during DRAM init and during probe, the resulting unit is microseconds.
|
||||
*/
|
||||
static int pmu_period = 50000;
|
||||
module_param(pmu_period, int, 0644);
|
||||
MODULE_PARM_DESC(pmu_period, "Bandwidth measurement period (microseconds)");
|
||||
|
||||
static u32 sun8i_a33_mbus_get_peak_bw(struct sun8i_a33_mbus *priv)
|
||||
{
|
||||
/* Returns the peak transfer (in KiB) during any single PMU period. */
|
||||
return readl_relaxed(priv->reg_mbus + MBUS_TOTAL_BWCR);
|
||||
}
|
||||
|
||||
static void sun8i_a33_mbus_restart_pmu_counters(struct sun8i_a33_mbus *priv)
|
||||
{
|
||||
u32 pmu_cfg = MBUS_PMU_CFG_PERIOD(pmu_period) | MBUS_PMU_CFG_UNIT_KB;
|
||||
|
||||
/* All PMU counters are cleared on a disable->enable transition. */
|
||||
writel_relaxed(pmu_cfg,
|
||||
priv->reg_mbus + MBUS_PMU_CFG);
|
||||
writel_relaxed(pmu_cfg | MBUS_PMU_CFG_ENABLE,
|
||||
priv->reg_mbus + MBUS_PMU_CFG);
|
||||
|
||||
}
|
||||
|
||||
static void sun8i_a33_mbus_update_nominal_bw(struct sun8i_a33_mbus *priv,
|
||||
u32 ddr_freq_mhz)
|
||||
{
|
||||
/*
|
||||
* Nominal bandwidth (KiB per PMU period):
|
||||
*
|
||||
* DDR transfers microseconds KiB
|
||||
* ------------- * ------------ * --------
|
||||
* microsecond PMU period transfer
|
||||
*/
|
||||
priv->nominal_bw = ddr_freq_mhz * pmu_period * priv->data_width / 1024;
|
||||
}
|
||||
|
||||
static int sun8i_a33_mbus_set_dram_freq(struct sun8i_a33_mbus *priv,
|
||||
unsigned long freq)
|
||||
{
|
||||
u32 ddr_freq_mhz = freq / USEC_PER_SEC; /* DDR */
|
||||
u32 dram_freq_mhz = ddr_freq_mhz / 2; /* SDR */
|
||||
u32 mctl_freq_mhz = dram_freq_mhz / 2; /* HDR */
|
||||
u32 dxodt, mdfscr, pwrctl, vtfcr;
|
||||
u32 i, tREFI_32ck, tRFC_ck;
|
||||
int ret;
|
||||
|
||||
/* The rate change is not effective until the MDFS process runs. */
|
||||
ret = clk_set_rate(priv->clk_dram, freq);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Disable automatic self-refesh and VTF before starting MDFS. */
|
||||
pwrctl = readl_relaxed(priv->reg_dram + DRAM_PWRCTL) &
|
||||
~DRAM_PWRCTL_SELFREF_EN;
|
||||
writel_relaxed(pwrctl, priv->reg_dram + DRAM_PWRCTL);
|
||||
vtfcr = readl_relaxed(priv->reg_dram + DRAM_VTFCR);
|
||||
writel_relaxed(vtfcr & ~DRAM_VTFCR_VTF_ENABLE,
|
||||
priv->reg_dram + DRAM_VTFCR);
|
||||
|
||||
/* Set up MDFS and enable double buffering for timing registers. */
|
||||
mdfscr = MBUS_MDFSCR_MODE_DFS |
|
||||
MBUS_MDFSCR_BYPASS |
|
||||
MBUS_MDFSCR_PAD_HOLD |
|
||||
MBUS_MDFSCR_BUFFER_TIMING;
|
||||
writel(mdfscr, priv->reg_mbus + MBUS_MDFSCR);
|
||||
|
||||
/* Update the buffered copy of RFSHTMG. */
|
||||
tREFI_32ck = priv->tREFI_ns * mctl_freq_mhz / 1000 / 32;
|
||||
tRFC_ck = DIV_ROUND_UP(priv->tRFC_ns * mctl_freq_mhz, 1000);
|
||||
writel(DRAM_RFSHTMG_TREFI(tREFI_32ck) | DRAM_RFSHTMG_TRFC(tRFC_ck),
|
||||
priv->reg_dram + DRAM_RFSHTMG);
|
||||
|
||||
/* Enable ODT if needed, or disable it to save power. */
|
||||
if (priv->odtmap && dram_freq_mhz > priv->variant->odt_freq_mhz) {
|
||||
dxodt = DRAM_DXnGCR0_DXODT_DYNAMIC;
|
||||
writel(priv->odtmap, priv->reg_dram + DRAM_ODTMAP);
|
||||
} else {
|
||||
dxodt = DRAM_DXnGCR0_DXODT_DISABLED;
|
||||
writel(0, priv->reg_dram + DRAM_ODTMAP);
|
||||
}
|
||||
for (i = 0; i < DRAM_DX_MAX; ++i) {
|
||||
void __iomem *reg = priv->reg_dram + DRAM_DXnGCR0(i);
|
||||
|
||||
writel((readl(reg) & ~DRAM_DXnGCR0_DXODT) | dxodt, reg);
|
||||
}
|
||||
|
||||
dev_dbg(priv->devfreq_dram->dev.parent,
|
||||
"Setting DRAM to %u MHz, tREFI=%u, tRFC=%u, ODT=%s\n",
|
||||
dram_freq_mhz, tREFI_32ck, tRFC_ck,
|
||||
dxodt == DRAM_DXnGCR0_DXODT_DYNAMIC ? "dynamic" : "disabled");
|
||||
|
||||
/* Trigger hardware MDFS. */
|
||||
writel(mdfscr | MBUS_MDFSCR_START, priv->reg_mbus + MBUS_MDFSCR);
|
||||
ret = readl_poll_timeout_atomic(priv->reg_mbus + MBUS_MDFSCR, mdfscr,
|
||||
!(mdfscr & MBUS_MDFSCR_START), 10, 1000);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Disable double buffering. */
|
||||
writel(0, priv->reg_mbus + MBUS_MDFSCR);
|
||||
|
||||
/* Restore VTF configuration. */
|
||||
writel_relaxed(vtfcr, priv->reg_dram + DRAM_VTFCR);
|
||||
|
||||
/* Enable automatic self-refresh at the lowest frequency only. */
|
||||
if (freq == priv->freq_table[0])
|
||||
pwrctl |= DRAM_PWRCTL_SELFREF_EN;
|
||||
writel_relaxed(pwrctl, priv->reg_dram + DRAM_PWRCTL);
|
||||
|
||||
sun8i_a33_mbus_restart_pmu_counters(priv);
|
||||
sun8i_a33_mbus_update_nominal_bw(priv, ddr_freq_mhz);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sun8i_a33_mbus_set_dram_target(struct device *dev,
|
||||
unsigned long *freq, u32 flags)
|
||||
{
|
||||
struct sun8i_a33_mbus *priv = dev_get_drvdata(dev);
|
||||
struct devfreq *devfreq = priv->devfreq_dram;
|
||||
struct dev_pm_opp *opp;
|
||||
int ret;
|
||||
|
||||
opp = devfreq_recommended_opp(dev, freq, flags);
|
||||
if (IS_ERR(opp))
|
||||
return PTR_ERR(opp);
|
||||
|
||||
dev_pm_opp_put(opp);
|
||||
|
||||
if (*freq == devfreq->previous_freq)
|
||||
return 0;
|
||||
|
||||
ret = sun8i_a33_mbus_set_dram_freq(priv, *freq);
|
||||
if (ret) {
|
||||
dev_warn(dev, "failed to set DRAM frequency: %d\n", ret);
|
||||
*freq = devfreq->previous_freq;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int sun8i_a33_mbus_get_dram_status(struct device *dev,
|
||||
struct devfreq_dev_status *stat)
|
||||
{
|
||||
struct sun8i_a33_mbus *priv = dev_get_drvdata(dev);
|
||||
|
||||
stat->busy_time = sun8i_a33_mbus_get_peak_bw(priv);
|
||||
stat->total_time = priv->nominal_bw;
|
||||
stat->current_frequency = priv->devfreq_dram->previous_freq;
|
||||
|
||||
sun8i_a33_mbus_restart_pmu_counters(priv);
|
||||
|
||||
dev_dbg(dev, "Using %lu/%lu (%lu%%) at %lu MHz\n",
|
||||
stat->busy_time, stat->total_time,
|
||||
DIV_ROUND_CLOSEST(stat->busy_time * 100, stat->total_time),
|
||||
stat->current_frequency / USEC_PER_SEC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sun8i_a33_mbus_hw_init(struct device *dev,
|
||||
struct sun8i_a33_mbus *priv,
|
||||
unsigned long ddr_freq)
|
||||
{
|
||||
u32 i, mbus_cr, mbus_freq_mhz;
|
||||
|
||||
/* Choose tREFI and tRFC to match the configured DRAM type. */
|
||||
mbus_cr = readl_relaxed(priv->reg_mbus + MBUS_CR);
|
||||
switch (MBUS_CR_GET_DRAM_TYPE(mbus_cr)) {
|
||||
case MBUS_CR_DRAM_TYPE_DDR2:
|
||||
case MBUS_CR_DRAM_TYPE_DDR3:
|
||||
case MBUS_CR_DRAM_TYPE_DDR4:
|
||||
priv->tREFI_ns = 7800;
|
||||
priv->tRFC_ns = 350;
|
||||
break;
|
||||
case MBUS_CR_DRAM_TYPE_LPDDR2:
|
||||
case MBUS_CR_DRAM_TYPE_LPDDR3:
|
||||
priv->tREFI_ns = 3900;
|
||||
priv->tRFC_ns = 210;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Save ODTMAP so it can be restored when raising the frequency. */
|
||||
priv->odtmap = readl_relaxed(priv->reg_dram + DRAM_ODTMAP);
|
||||
|
||||
/* Compute the DRAM data bus width by counting enabled DATx8 blocks. */
|
||||
for (i = 0; i < DRAM_DX_MAX; ++i) {
|
||||
void __iomem *reg = priv->reg_dram + DRAM_DXnGCR0(i);
|
||||
|
||||
if (!(readl_relaxed(reg) & DRAM_DXnGCR0_DXEN))
|
||||
break;
|
||||
}
|
||||
priv->data_width = i;
|
||||
|
||||
dev_dbg(dev, "Detected %u-bit %sDDRx with%s ODT\n",
|
||||
priv->data_width * 8,
|
||||
MBUS_CR_GET_DRAM_TYPE(mbus_cr) > 4 ? "LP" : "",
|
||||
priv->odtmap ? "" : "out");
|
||||
|
||||
/* Program MBUS_TMR such that the PMU period unit is microseconds. */
|
||||
mbus_freq_mhz = clk_get_rate(priv->clk_mbus) / USEC_PER_SEC;
|
||||
writel_relaxed(MBUS_TMR_PERIOD(mbus_freq_mhz),
|
||||
priv->reg_mbus + MBUS_TMR);
|
||||
|
||||
/* "Master Ready Mask Register" bits must be set or MDFS will block. */
|
||||
writel_relaxed(0xffffffff, priv->reg_mbus + MBUS_MDFSMRMR);
|
||||
|
||||
sun8i_a33_mbus_restart_pmu_counters(priv);
|
||||
sun8i_a33_mbus_update_nominal_bw(priv, ddr_freq / USEC_PER_SEC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __maybe_unused sun8i_a33_mbus_suspend(struct device *dev)
|
||||
{
|
||||
struct sun8i_a33_mbus *priv = dev_get_drvdata(dev);
|
||||
|
||||
clk_disable_unprepare(priv->clk_bus);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __maybe_unused sun8i_a33_mbus_resume(struct device *dev)
|
||||
{
|
||||
struct sun8i_a33_mbus *priv = dev_get_drvdata(dev);
|
||||
|
||||
return clk_prepare_enable(priv->clk_bus);
|
||||
}
|
||||
|
||||
static int sun8i_a33_mbus_probe(struct platform_device *pdev)
|
||||
{
|
||||
const struct sun8i_a33_mbus_variant *variant;
|
||||
struct device *dev = &pdev->dev;
|
||||
struct sun8i_a33_mbus *priv;
|
||||
unsigned long base_freq;
|
||||
unsigned int max_state;
|
||||
const char *err;
|
||||
int i, ret;
|
||||
|
||||
variant = device_get_match_data(dev);
|
||||
if (!variant)
|
||||
return -EINVAL;
|
||||
|
||||
max_state = variant->max_dram_divider - variant->min_dram_divider + 1;
|
||||
|
||||
priv = devm_kzalloc(dev, struct_size(priv, freq_table, max_state), GFP_KERNEL);
|
||||
if (!priv)
|
||||
return -ENOMEM;
|
||||
|
||||
platform_set_drvdata(pdev, priv);
|
||||
|
||||
priv->variant = variant;
|
||||
|
||||
priv->reg_dram = devm_platform_ioremap_resource_byname(pdev, "dram");
|
||||
if (IS_ERR(priv->reg_dram))
|
||||
return PTR_ERR(priv->reg_dram);
|
||||
|
||||
priv->reg_mbus = devm_platform_ioremap_resource_byname(pdev, "mbus");
|
||||
if (IS_ERR(priv->reg_mbus))
|
||||
return PTR_ERR(priv->reg_mbus);
|
||||
|
||||
priv->clk_bus = devm_clk_get(dev, "bus");
|
||||
if (IS_ERR(priv->clk_bus))
|
||||
return dev_err_probe(dev, PTR_ERR(priv->clk_bus),
|
||||
"failed to get bus clock\n");
|
||||
|
||||
priv->clk_dram = devm_clk_get(dev, "dram");
|
||||
if (IS_ERR(priv->clk_dram))
|
||||
return dev_err_probe(dev, PTR_ERR(priv->clk_dram),
|
||||
"failed to get dram clock\n");
|
||||
|
||||
priv->clk_mbus = devm_clk_get(dev, "mbus");
|
||||
if (IS_ERR(priv->clk_mbus))
|
||||
return dev_err_probe(dev, PTR_ERR(priv->clk_mbus),
|
||||
"failed to get mbus clock\n");
|
||||
|
||||
ret = clk_prepare_enable(priv->clk_bus);
|
||||
if (ret)
|
||||
return dev_err_probe(dev, ret,
|
||||
"failed to enable bus clock\n");
|
||||
|
||||
/* Lock the DRAM clock rate to keep priv->nominal_bw in sync. */
|
||||
ret = clk_rate_exclusive_get(priv->clk_dram);
|
||||
if (ret) {
|
||||
err = "failed to lock dram clock rate\n";
|
||||
goto err_disable_bus;
|
||||
}
|
||||
|
||||
/* Lock the MBUS clock rate to keep MBUS_TMR_PERIOD in sync. */
|
||||
ret = clk_rate_exclusive_get(priv->clk_mbus);
|
||||
if (ret) {
|
||||
err = "failed to lock mbus clock rate\n";
|
||||
goto err_unlock_dram;
|
||||
}
|
||||
|
||||
priv->gov_data.upthreshold = 10;
|
||||
priv->gov_data.downdifferential = 5;
|
||||
|
||||
priv->profile.initial_freq = clk_get_rate(priv->clk_dram);
|
||||
priv->profile.polling_ms = 1000;
|
||||
priv->profile.target = sun8i_a33_mbus_set_dram_target;
|
||||
priv->profile.get_dev_status = sun8i_a33_mbus_get_dram_status;
|
||||
priv->profile.freq_table = priv->freq_table;
|
||||
priv->profile.max_state = max_state;
|
||||
|
||||
ret = devm_pm_opp_set_clkname(dev, "dram");
|
||||
if (ret) {
|
||||
err = "failed to add OPP table\n";
|
||||
goto err_unlock_mbus;
|
||||
}
|
||||
|
||||
base_freq = clk_get_rate(clk_get_parent(priv->clk_dram));
|
||||
for (i = 0; i < max_state; ++i) {
|
||||
unsigned int div = variant->max_dram_divider - i;
|
||||
|
||||
priv->freq_table[i] = base_freq / div;
|
||||
|
||||
ret = dev_pm_opp_add(dev, priv->freq_table[i], 0);
|
||||
if (ret) {
|
||||
err = "failed to add OPPs\n";
|
||||
goto err_remove_opps;
|
||||
}
|
||||
}
|
||||
|
||||
ret = sun8i_a33_mbus_hw_init(dev, priv, priv->profile.initial_freq);
|
||||
if (ret) {
|
||||
err = "failed to init hardware\n";
|
||||
goto err_remove_opps;
|
||||
}
|
||||
|
||||
priv->devfreq_dram = devfreq_add_device(dev, &priv->profile,
|
||||
DEVFREQ_GOV_SIMPLE_ONDEMAND,
|
||||
&priv->gov_data);
|
||||
if (IS_ERR(priv->devfreq_dram)) {
|
||||
ret = PTR_ERR(priv->devfreq_dram);
|
||||
err = "failed to add devfreq device\n";
|
||||
goto err_remove_opps;
|
||||
}
|
||||
|
||||
/*
|
||||
* This must be set manually after registering the devfreq device,
|
||||
* because there is no way to select a dynamic OPP as the suspend OPP.
|
||||
*/
|
||||
priv->devfreq_dram->suspend_freq = priv->freq_table[0];
|
||||
|
||||
return 0;
|
||||
|
||||
err_remove_opps:
|
||||
dev_pm_opp_remove_all_dynamic(dev);
|
||||
err_unlock_mbus:
|
||||
clk_rate_exclusive_put(priv->clk_mbus);
|
||||
err_unlock_dram:
|
||||
clk_rate_exclusive_put(priv->clk_dram);
|
||||
err_disable_bus:
|
||||
clk_disable_unprepare(priv->clk_bus);
|
||||
|
||||
return dev_err_probe(dev, ret, err);
|
||||
}
|
||||
|
||||
static int sun8i_a33_mbus_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct sun8i_a33_mbus *priv = platform_get_drvdata(pdev);
|
||||
unsigned long initial_freq = priv->profile.initial_freq;
|
||||
struct device *dev = &pdev->dev;
|
||||
int ret;
|
||||
|
||||
devfreq_remove_device(priv->devfreq_dram);
|
||||
|
||||
ret = sun8i_a33_mbus_set_dram_freq(priv, initial_freq);
|
||||
if (ret)
|
||||
dev_warn(dev, "failed to restore DRAM frequency: %d\n", ret);
|
||||
|
||||
dev_pm_opp_remove_all_dynamic(dev);
|
||||
clk_rate_exclusive_put(priv->clk_mbus);
|
||||
clk_rate_exclusive_put(priv->clk_dram);
|
||||
clk_disable_unprepare(priv->clk_bus);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct sun8i_a33_mbus_variant sun50i_a64_mbus = {
|
||||
.min_dram_divider = 1,
|
||||
.max_dram_divider = 4,
|
||||
.odt_freq_mhz = 400,
|
||||
};
|
||||
|
||||
static const struct of_device_id sun8i_a33_mbus_of_match[] = {
|
||||
{ .compatible = "allwinner,sun50i-a64-mbus", .data = &sun50i_a64_mbus },
|
||||
{ .compatible = "allwinner,sun50i-h5-mbus", .data = &sun50i_a64_mbus },
|
||||
{ },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, sun8i_a33_mbus_of_match);
|
||||
|
||||
static SIMPLE_DEV_PM_OPS(sun8i_a33_mbus_pm_ops,
|
||||
sun8i_a33_mbus_suspend, sun8i_a33_mbus_resume);
|
||||
|
||||
static struct platform_driver sun8i_a33_mbus_driver = {
|
||||
.probe = sun8i_a33_mbus_probe,
|
||||
.remove = sun8i_a33_mbus_remove,
|
||||
.driver = {
|
||||
.name = "sun8i-a33-mbus",
|
||||
.of_match_table = sun8i_a33_mbus_of_match,
|
||||
.pm = pm_ptr(&sun8i_a33_mbus_pm_ops),
|
||||
},
|
||||
};
|
||||
module_platform_driver(sun8i_a33_mbus_driver);
|
||||
|
||||
MODULE_AUTHOR("Samuel Holland <samuel@sholland.org>");
|
||||
MODULE_DESCRIPTION("Allwinner sun8i/sun50i MBUS DEVFREQ Driver");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -1103,17 +1103,17 @@ static int jz4740_mmc_remove(struct platform_device *pdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __maybe_unused jz4740_mmc_suspend(struct device *dev)
|
||||
static int jz4740_mmc_suspend(struct device *dev)
|
||||
{
|
||||
return pinctrl_pm_select_sleep_state(dev);
|
||||
}
|
||||
|
||||
static int __maybe_unused jz4740_mmc_resume(struct device *dev)
|
||||
static int jz4740_mmc_resume(struct device *dev)
|
||||
{
|
||||
return pinctrl_select_default_state(dev);
|
||||
}
|
||||
|
||||
static SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend,
|
||||
DEFINE_SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend,
|
||||
jz4740_mmc_resume);
|
||||
|
||||
static struct platform_driver jz4740_mmc_driver = {
|
||||
@ -1123,7 +1123,7 @@ static struct platform_driver jz4740_mmc_driver = {
|
||||
.name = "jz4740-mmc",
|
||||
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
|
||||
.of_match_table = of_match_ptr(jz4740_mmc_of_match),
|
||||
.pm = pm_ptr(&jz4740_mmc_pm_ops),
|
||||
.pm = pm_sleep_ptr(&jz4740_mmc_pm_ops),
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -1183,7 +1183,6 @@ static int mxcmci_remove(struct platform_device *pdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
static int mxcmci_suspend(struct device *dev)
|
||||
{
|
||||
struct mmc_host *mmc = dev_get_drvdata(dev);
|
||||
@ -1210,9 +1209,8 @@ static int mxcmci_resume(struct device *dev)
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume);
|
||||
DEFINE_SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume);
|
||||
|
||||
static struct platform_driver mxcmci_driver = {
|
||||
.probe = mxcmci_probe,
|
||||
@ -1220,7 +1218,7 @@ static struct platform_driver mxcmci_driver = {
|
||||
.driver = {
|
||||
.name = DRIVER_NAME,
|
||||
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
|
||||
.pm = &mxcmci_pm_ops,
|
||||
.pm = pm_sleep_ptr(&mxcmci_pm_ops),
|
||||
.of_match_table = mxcmci_of_match,
|
||||
}
|
||||
};
|
||||
|
@ -5460,7 +5460,9 @@ static struct pci_driver rtl8169_pci_driver = {
|
||||
.probe = rtl_init_one,
|
||||
.remove = rtl_remove_one,
|
||||
.shutdown = rtl_shutdown,
|
||||
.driver.pm = pm_ptr(&rtl8169_pm_ops),
|
||||
#ifdef CONFIG_PM
|
||||
.driver.pm = &rtl8169_pm_ops,
|
||||
#endif
|
||||
};
|
||||
|
||||
module_pci_driver(rtl8169_pci_driver);
|
||||
|
@ -382,7 +382,7 @@ void dtpm_unregister(struct dtpm *dtpm)
|
||||
{
|
||||
powercap_unregister_zone(pct, &dtpm->zone);
|
||||
|
||||
pr_info("Unregistered dtpm node '%s'\n", dtpm->zone.name);
|
||||
pr_debug("Unregistered dtpm node '%s'\n", dtpm->zone.name);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -453,8 +453,8 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
|
||||
dtpm->power_limit = dtpm->power_max;
|
||||
}
|
||||
|
||||
pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n",
|
||||
dtpm->zone.name, dtpm->power_min, dtpm->power_max);
|
||||
pr_debug("Registered dtpm node '%s' / %llu-%llu uW, \n",
|
||||
dtpm->zone.name, dtpm->power_min, dtpm->power_max);
|
||||
|
||||
mutex_unlock(&dtpm_lock);
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
*
|
||||
* All of the kthreads used for idle injection are created at init time.
|
||||
*
|
||||
* Next, the users of the the idle injection framework provide a cpumask via
|
||||
* Next, the users of the idle injection framework provide a cpumask via
|
||||
* its register function. The kthreads will be synchronized with respect to
|
||||
* this cpumask.
|
||||
*
|
||||
|
@ -61,6 +61,20 @@
|
||||
#define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
|
||||
#define PP_POLICY_MASK 0x1F
|
||||
|
||||
/*
|
||||
* SPR has different layout for Psys Domain PowerLimit registers.
|
||||
* There are 17 bits of PL1 and PL2 instead of 15 bits.
|
||||
* The Enable bits and TimeWindow bits are also shifted as a result.
|
||||
*/
|
||||
#define PSYS_POWER_LIMIT1_MASK 0x1FFFF
|
||||
#define PSYS_POWER_LIMIT1_ENABLE BIT(17)
|
||||
|
||||
#define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32)
|
||||
#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
|
||||
|
||||
#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
|
||||
#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
|
||||
|
||||
/* Non HW constants */
|
||||
#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
|
||||
#define RAPL_PRIMITIVE_DUMMY BIT(2)
|
||||
@ -97,6 +111,7 @@ struct rapl_defaults {
|
||||
bool to_raw);
|
||||
unsigned int dram_domain_energy_unit;
|
||||
unsigned int psys_domain_energy_unit;
|
||||
bool spr_psys_bits;
|
||||
};
|
||||
static struct rapl_defaults *rapl_defaults;
|
||||
|
||||
@ -669,12 +684,51 @@ static struct rapl_primitive_info rpi[] = {
|
||||
RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
|
||||
PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
|
||||
RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
|
||||
PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
|
||||
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
|
||||
PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
|
||||
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
|
||||
PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
|
||||
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
|
||||
PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
|
||||
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
|
||||
PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
|
||||
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
|
||||
PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
|
||||
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
|
||||
/* non-hardware */
|
||||
PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
|
||||
RAPL_PRIMITIVE_DERIVED),
|
||||
{NULL, 0, 0, 0},
|
||||
};
|
||||
|
||||
static enum rapl_primitives
|
||||
prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
|
||||
{
|
||||
if (!rapl_defaults->spr_psys_bits)
|
||||
return prim;
|
||||
|
||||
if (rd->id != RAPL_DOMAIN_PLATFORM)
|
||||
return prim;
|
||||
|
||||
switch (prim) {
|
||||
case POWER_LIMIT1:
|
||||
return PSYS_POWER_LIMIT1;
|
||||
case POWER_LIMIT2:
|
||||
return PSYS_POWER_LIMIT2;
|
||||
case PL1_ENABLE:
|
||||
return PSYS_PL1_ENABLE;
|
||||
case PL2_ENABLE:
|
||||
return PSYS_PL2_ENABLE;
|
||||
case TIME_WINDOW1:
|
||||
return PSYS_TIME_WINDOW1;
|
||||
case TIME_WINDOW2:
|
||||
return PSYS_TIME_WINDOW2;
|
||||
default:
|
||||
return prim;
|
||||
}
|
||||
}
|
||||
|
||||
/* Read primitive data based on its related struct rapl_primitive_info.
|
||||
* if xlate flag is set, return translated data based on data units, i.e.
|
||||
* time, energy, and power.
|
||||
@ -692,7 +746,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
|
||||
enum rapl_primitives prim, bool xlate, u64 *data)
|
||||
{
|
||||
u64 value;
|
||||
struct rapl_primitive_info *rp = &rpi[prim];
|
||||
enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
|
||||
struct rapl_primitive_info *rp = &rpi[prim_fixed];
|
||||
struct reg_action ra;
|
||||
int cpu;
|
||||
|
||||
@ -738,7 +793,8 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
|
||||
enum rapl_primitives prim,
|
||||
unsigned long long value)
|
||||
{
|
||||
struct rapl_primitive_info *rp = &rpi[prim];
|
||||
enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
|
||||
struct rapl_primitive_info *rp = &rpi[prim_fixed];
|
||||
int cpu;
|
||||
u64 bits;
|
||||
struct reg_action ra;
|
||||
@ -981,6 +1037,7 @@ static const struct rapl_defaults rapl_defaults_spr_server = {
|
||||
.compute_time_window = rapl_compute_time_window_core,
|
||||
.dram_domain_energy_unit = 15300,
|
||||
.psys_domain_energy_unit = 1000000000,
|
||||
.spr_psys_bits = true,
|
||||
};
|
||||
|
||||
static const struct rapl_defaults rapl_defaults_byt = {
|
||||
|
@ -462,7 +462,6 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
|
||||
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
|
||||
struct cpumask *cpus;
|
||||
unsigned int frequency;
|
||||
unsigned long max_capacity, capacity;
|
||||
int ret;
|
||||
|
||||
/* Request state should be less than max_level */
|
||||
@ -479,10 +478,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
|
||||
if (ret >= 0) {
|
||||
cpufreq_cdev->cpufreq_state = state;
|
||||
cpus = cpufreq_cdev->policy->related_cpus;
|
||||
max_capacity = arch_scale_cpu_capacity(cpumask_first(cpus));
|
||||
capacity = frequency * max_capacity;
|
||||
capacity /= cpufreq_cdev->policy->cpuinfo.max_freq;
|
||||
arch_set_thermal_pressure(cpus, max_capacity - capacity);
|
||||
arch_update_thermal_pressure(cpus, frequency);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
|
@ -138,6 +138,7 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
|
||||
extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
|
||||
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
|
||||
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
|
||||
extern int cppc_set_enable(int cpu, bool enable);
|
||||
extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
|
||||
extern bool acpi_cpc_valid(void);
|
||||
extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data);
|
||||
@ -162,6 +163,10 @@ static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
static inline int cppc_set_enable(int cpu, bool enable)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
static inline int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
|
@ -506,7 +506,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res,
|
||||
int acpi_resources_are_enforced(void);
|
||||
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
void __init acpi_no_s4_hw_signature(void);
|
||||
void __init acpi_check_s4_hw_signature(int check);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
|
@ -56,8 +56,8 @@ static inline unsigned long topology_get_thermal_pressure(int cpu)
|
||||
return per_cpu(thermal_pressure, cpu);
|
||||
}
|
||||
|
||||
void topology_set_thermal_pressure(const struct cpumask *cpus,
|
||||
unsigned long th_pressure);
|
||||
void topology_update_thermal_pressure(const struct cpumask *cpus,
|
||||
unsigned long capped_freq);
|
||||
|
||||
struct cpu_topology {
|
||||
int thread_id;
|
||||
|
@ -70,6 +70,4 @@ void dtpm_unregister(struct dtpm *dtpm);
|
||||
|
||||
int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
|
||||
|
||||
int dtpm_register_cpu(struct dtpm *parent);
|
||||
|
||||
#endif
|
||||
|
@ -58,6 +58,12 @@ enum rapl_primitives {
|
||||
THROTTLED_TIME,
|
||||
PRIORITY_LEVEL,
|
||||
|
||||
PSYS_POWER_LIMIT1,
|
||||
PSYS_POWER_LIMIT2,
|
||||
PSYS_PL1_ENABLE,
|
||||
PSYS_PL2_ENABLE,
|
||||
PSYS_TIME_WINDOW1,
|
||||
PSYS_TIME_WINDOW2,
|
||||
/* below are not raw primitive data */
|
||||
AVERAGE_POWER,
|
||||
NR_RAPL_PRIMITIVES,
|
||||
|
@ -300,47 +300,59 @@ struct dev_pm_ops {
|
||||
int (*runtime_idle)(struct device *dev);
|
||||
};
|
||||
|
||||
#define SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
.suspend = pm_sleep_ptr(suspend_fn), \
|
||||
.resume = pm_sleep_ptr(resume_fn), \
|
||||
.freeze = pm_sleep_ptr(suspend_fn), \
|
||||
.thaw = pm_sleep_ptr(resume_fn), \
|
||||
.poweroff = pm_sleep_ptr(suspend_fn), \
|
||||
.restore = pm_sleep_ptr(resume_fn),
|
||||
|
||||
#define LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
.suspend_late = pm_sleep_ptr(suspend_fn), \
|
||||
.resume_early = pm_sleep_ptr(resume_fn), \
|
||||
.freeze_late = pm_sleep_ptr(suspend_fn), \
|
||||
.thaw_early = pm_sleep_ptr(resume_fn), \
|
||||
.poweroff_late = pm_sleep_ptr(suspend_fn), \
|
||||
.restore_early = pm_sleep_ptr(resume_fn),
|
||||
|
||||
#define NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
.suspend_noirq = pm_sleep_ptr(suspend_fn), \
|
||||
.resume_noirq = pm_sleep_ptr(resume_fn), \
|
||||
.freeze_noirq = pm_sleep_ptr(suspend_fn), \
|
||||
.thaw_noirq = pm_sleep_ptr(resume_fn), \
|
||||
.poweroff_noirq = pm_sleep_ptr(suspend_fn), \
|
||||
.restore_noirq = pm_sleep_ptr(resume_fn),
|
||||
|
||||
#define RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
|
||||
.runtime_suspend = suspend_fn, \
|
||||
.runtime_resume = resume_fn, \
|
||||
.runtime_idle = idle_fn,
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
#define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
.suspend = suspend_fn, \
|
||||
.resume = resume_fn, \
|
||||
.freeze = suspend_fn, \
|
||||
.thaw = resume_fn, \
|
||||
.poweroff = suspend_fn, \
|
||||
.restore = resume_fn,
|
||||
SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
|
||||
#else
|
||||
#define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
#define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
.suspend_late = suspend_fn, \
|
||||
.resume_early = resume_fn, \
|
||||
.freeze_late = suspend_fn, \
|
||||
.thaw_early = resume_fn, \
|
||||
.poweroff_late = suspend_fn, \
|
||||
.restore_early = resume_fn,
|
||||
LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
|
||||
#else
|
||||
#define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
#define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
.suspend_noirq = suspend_fn, \
|
||||
.resume_noirq = resume_fn, \
|
||||
.freeze_noirq = suspend_fn, \
|
||||
.thaw_noirq = resume_fn, \
|
||||
.poweroff_noirq = suspend_fn, \
|
||||
.restore_noirq = resume_fn,
|
||||
NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
|
||||
#else
|
||||
#define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
#define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
|
||||
.runtime_suspend = suspend_fn, \
|
||||
.runtime_resume = resume_fn, \
|
||||
.runtime_idle = idle_fn,
|
||||
RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
|
||||
#else
|
||||
#define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
|
||||
#endif
|
||||
@ -349,9 +361,9 @@ struct dev_pm_ops {
|
||||
* Use this if you want to use the same suspend and resume callbacks for suspend
|
||||
* to RAM and hibernation.
|
||||
*/
|
||||
#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
|
||||
const struct dev_pm_ops __maybe_unused name = { \
|
||||
SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
#define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
|
||||
static const struct dev_pm_ops name = { \
|
||||
SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
}
|
||||
|
||||
/*
|
||||
@ -367,17 +379,27 @@ const struct dev_pm_ops __maybe_unused name = { \
|
||||
* .resume_early(), to the same routines as .runtime_suspend() and
|
||||
* .runtime_resume(), respectively (and analogously for hibernation).
|
||||
*/
|
||||
#define DEFINE_UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
|
||||
static const struct dev_pm_ops name = { \
|
||||
SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
|
||||
}
|
||||
|
||||
/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
|
||||
#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
|
||||
const struct dev_pm_ops __maybe_unused name = { \
|
||||
SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
}
|
||||
|
||||
/* Deprecated. Use DEFINE_UNIVERSAL_DEV_PM_OPS() instead. */
|
||||
#define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
|
||||
const struct dev_pm_ops __maybe_unused name = { \
|
||||
SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
|
||||
SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
#define pm_ptr(_ptr) (_ptr)
|
||||
#else
|
||||
#define pm_ptr(_ptr) NULL
|
||||
#endif
|
||||
#define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr))
|
||||
#define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr))
|
||||
|
||||
/*
|
||||
* PM_EVENT_ messages
|
||||
@ -499,6 +521,7 @@ const struct dev_pm_ops __maybe_unused name = { \
|
||||
*/
|
||||
|
||||
enum rpm_status {
|
||||
RPM_INVALID = -1,
|
||||
RPM_ACTIVE = 0,
|
||||
RPM_RESUMING,
|
||||
RPM_SUSPENDED,
|
||||
@ -612,6 +635,7 @@ struct dev_pm_info {
|
||||
unsigned int links_count;
|
||||
enum rpm_request request;
|
||||
enum rpm_status runtime_status;
|
||||
enum rpm_status last_status;
|
||||
int runtime_error;
|
||||
int autosuspend_delay;
|
||||
u64 last_busy;
|
||||
|
@ -58,6 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev);
|
||||
extern void pm_runtime_put_suppliers(struct device *dev);
|
||||
extern void pm_runtime_new_link(struct device *dev);
|
||||
extern void pm_runtime_drop_link(struct device_link *link);
|
||||
extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle);
|
||||
|
||||
extern int devm_pm_runtime_enable(struct device *dev);
|
||||
|
||||
@ -283,6 +284,8 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {}
|
||||
static inline void pm_runtime_put_suppliers(struct device *dev) {}
|
||||
static inline void pm_runtime_new_link(struct device *dev) {}
|
||||
static inline void pm_runtime_drop_link(struct device_link *link) {}
|
||||
static inline void pm_runtime_release_supplier(struct device_link *link,
|
||||
bool check_idle) {}
|
||||
|
||||
#endif /* !CONFIG_PM */
|
||||
|
||||
|
@ -266,10 +266,10 @@ unsigned long arch_scale_thermal_pressure(int cpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef arch_set_thermal_pressure
|
||||
#ifndef arch_update_thermal_pressure
|
||||
static __always_inline
|
||||
void arch_set_thermal_pressure(const struct cpumask *cpus,
|
||||
unsigned long th_pressure)
|
||||
void arch_update_thermal_pressure(const struct cpumask *cpus,
|
||||
unsigned long capped_frequency)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
|
@ -446,6 +446,7 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
|
||||
extern asmlinkage int swsusp_arch_suspend(void);
|
||||
extern asmlinkage int swsusp_arch_resume(void);
|
||||
|
||||
extern u32 swsusp_hardware_signature;
|
||||
extern void hibernation_set_ops(const struct platform_hibernation_ops *ops);
|
||||
extern int hibernate(void);
|
||||
extern bool system_entering_hibernation(void);
|
||||
|
@ -550,7 +550,7 @@ config SCHED_THERMAL_PRESSURE
|
||||
i.e. put less load on throttled CPUs than on non/less throttled ones.
|
||||
|
||||
This requires the architecture to implement
|
||||
arch_set_thermal_pressure() and arch_scale_thermal_pressure().
|
||||
arch_update_thermal_pressure() and arch_scale_thermal_pressure().
|
||||
|
||||
config BSD_PROCESS_ACCT
|
||||
bool "BSD Process Accounting"
|
||||
|
@ -170,6 +170,7 @@ extern int swsusp_swap_in_use(void);
|
||||
#define SF_PLATFORM_MODE 1
|
||||
#define SF_NOCOMPRESS_MODE 2
|
||||
#define SF_CRC32_MODE 4
|
||||
#define SF_HW_SIG 8
|
||||
|
||||
/* kernel/power/hibernate.c */
|
||||
extern int swsusp_check(void);
|
||||
|
@ -36,6 +36,8 @@
|
||||
|
||||
#define HIBERNATE_SIG "S1SUSPEND"
|
||||
|
||||
u32 swsusp_hardware_signature;
|
||||
|
||||
/*
|
||||
* When reading an {un,}compressed image, we may restore pages in place,
|
||||
* in which case some architectures need these pages cleaning before they
|
||||
@ -104,7 +106,8 @@ struct swap_map_handle {
|
||||
|
||||
struct swsusp_header {
|
||||
char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) -
|
||||
sizeof(u32)];
|
||||
sizeof(u32) - sizeof(u32)];
|
||||
u32 hw_sig;
|
||||
u32 crc32;
|
||||
sector_t image;
|
||||
unsigned int flags; /* Flags to pass to the "boot" kernel */
|
||||
@ -312,7 +315,6 @@ static int hib_wait_io(struct hib_bio_batch *hb)
|
||||
/*
|
||||
* Saving part
|
||||
*/
|
||||
|
||||
static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
|
||||
{
|
||||
int error;
|
||||
@ -324,6 +326,10 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
|
||||
memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
|
||||
memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
|
||||
swsusp_header->image = handle->first_sector;
|
||||
if (swsusp_hardware_signature) {
|
||||
swsusp_header->hw_sig = swsusp_hardware_signature;
|
||||
flags |= SF_HW_SIG;
|
||||
}
|
||||
swsusp_header->flags = flags;
|
||||
if (flags & SF_CRC32_MODE)
|
||||
swsusp_header->crc32 = handle->crc32;
|
||||
@ -1537,6 +1543,12 @@ int swsusp_check(void)
|
||||
} else {
|
||||
error = -EINVAL;
|
||||
}
|
||||
if (!error && swsusp_header->flags & SF_HW_SIG &&
|
||||
swsusp_header->hw_sig != swsusp_hardware_signature) {
|
||||
pr_info("Suspend image hardware signature mismatch (%08x now %08x); aborting resume.\n",
|
||||
swsusp_header->hw_sig, swsusp_hardware_signature);
|
||||
error = -EINVAL;
|
||||
}
|
||||
|
||||
put:
|
||||
if (error)
|
||||
|
Loading…
Reference in New Issue
Block a user