mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-07 14:32:23 +00:00
arm64 updates for 6.8
* for-next/cpufeature - Remove ARM64_HAS_NO_HW_PREFETCH copy_page() optimisation for ye olde Thunder-X machines. - Avoid mapping KPTI trampoline when it is not required. - Make CPU capability API more robust during early initialisation. * for-next/early-idreg-overrides - Remove dependencies on core kernel helpers from the early command-line parsing logic in preparation for moving this code before the kernel is mapped. * for-next/fpsimd - Restore kernel-mode fpsimd context lazily, allowing us to run fpsimd code sequences in the kernel with pre-emption enabled. * for-next/kbuild - Install 'vmlinuz.efi' when CONFIG_EFI_ZBOOT=y. - Makefile cleanups. * for-next/lpa2-prep - Preparatory work for enabling the 'LPA2' extension, which will introduce 52-bit virtual and physical addressing even with 4KiB pages (including for KVM guests). * for-next/misc - Remove dead code and fix a typo. * for-next/mm - Pass NUMA node information for IRQ stack allocations. * for-next/perf - Add perf support for the Synopsys DesignWare PCIe PMU. - Add support for event counting thresholds (FEAT_PMUv3_TH) introduced in Armv8.8. - Add support for i.MX8DXL SoCs to the IMX DDR PMU driver. - Minor PMU driver fixes and optimisations. * for-next/rip-vpipt - Remove what support we had for the obsolete VPIPT I-cache policy. * for-next/selftests - Improvements to the SVE and SME selftests. * for-next/stacktrace - Refactor kernel unwind logic so that it can used by BPF unwinding and, eventually, reliable backtracing. * for-next/sysregs - Update a bunch of register definitions based on the latest XML drop from Arm. -----BEGIN PGP SIGNATURE----- iQFEBAABCgAuFiEEPxTL6PPUbjXGY88ct6xw3ITBYzQFAmWWvKYQHHdpbGxAa2Vy bmVsLm9yZwAKCRC3rHDchMFjNIiTB/9agZBkEhZjP2sNDGyE4UFwawweWHkt2r8h WyvdwP91Z/AIsYSsGYu36J0l4pOnMKp/i6t+rt031SK4j+Q8hJYhSfDt3RvVbc0/ Pz9D18V6cLrfq+Yxycqq9ufVdjs+m+CQ5WeLaRGmNIyEzJ/Jv/qrAN+2r603EeLP nq08qMZhDIQd2ZzbigCnGaNrTsVSafFfBFv1GsgDvnMZAjs1G6457A6zu+NatNUc +TMSG+3EawutHZZ2noXl0Ra7VOfIbVZFiUssxRPenKQByHHHR+QB2c/O1blri+dm XLMutvqO2/WvYGIfXO5koqZqvpVeR3zXxPwmGi5hQBsmOjtXzKd+ =U4mo -----END PGP SIGNATURE----- Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux Pull arm64 updates from Will Deacon: "CPU features: - Remove ARM64_HAS_NO_HW_PREFETCH copy_page() optimisation for ye olde Thunder-X machines - Avoid mapping KPTI trampoline when it is not required - Make CPU capability API more robust during early initialisation Early idreg overrides: - Remove dependencies on core kernel helpers from the early command-line parsing logic in preparation for moving this code before the kernel is mapped FPsimd: - Restore kernel-mode fpsimd context lazily, allowing us to run fpsimd code sequences in the kernel with pre-emption enabled KBuild: - Install 'vmlinuz.efi' when CONFIG_EFI_ZBOOT=y - Makefile cleanups LPA2 prep: - Preparatory work for enabling the 'LPA2' extension, which will introduce 52-bit virtual and physical addressing even with 4KiB pages (including for KVM guests). Misc: - Remove dead code and fix a typo MM: - Pass NUMA node information for IRQ stack allocations Perf: - Add perf support for the Synopsys DesignWare PCIe PMU - Add support for event counting thresholds (FEAT_PMUv3_TH) introduced in Armv8.8 - Add support for i.MX8DXL SoCs to the IMX DDR PMU driver. - Minor PMU driver fixes and optimisations RIP VPIPT: - Remove what support we had for the obsolete VPIPT I-cache policy Selftests: - Improvements to the SVE and SME selftests Stacktrace: - Refactor kernel unwind logic so that it can used by BPF unwinding and, eventually, reliable backtracing Sysregs: - Update a bunch of register definitions based on the latest XML drop from Arm" * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (87 commits) kselftest/arm64: Don't probe the current VL for unsupported vector types efi/libstub: zboot: do not use $(shell ...) in cmd_copy_and_pad arm64: properly install vmlinuz.efi arm64/sysreg: Add missing system instruction definitions for FGT arm64/sysreg: Add missing system register definitions for FGT arm64/sysreg: Add missing ExtTrcBuff field definition to ID_AA64DFR0_EL1 arm64/sysreg: Add missing Pauth_LR field definitions to ID_AA64ISAR1_EL1 arm64: memory: remove duplicated include arm: perf: Fix ARCH=arm build with GCC arm64: Align boot cpucap handling with system cpucap handling arm64: Cleanup system cpucap handling MAINTAINERS: add maintainers for DesignWare PCIe PMU driver drivers/perf: add DesignWare PCIe PMU driver PCI: Move pci_clear_and_set_dword() helper to PCI header PCI: Add Alibaba Vendor ID to linux/pci_ids.h docs: perf: Add description for Synopsys DesignWare PCIe PMU driver arm64: irq: set the correct node for shadow call stack Revert "perf/arm_dmc620: Remove duplicate format attribute #defines" arm64: fpsimd: Implement lazy restore for kernel mode FPSIMD arm64: fpsimd: Preserve/restore kernel mode NEON at context switch ...
This commit is contained in:
commit
ab5f3fcb7c
94
Documentation/admin-guide/perf/dwc_pcie_pmu.rst
Normal file
94
Documentation/admin-guide/perf/dwc_pcie_pmu.rst
Normal file
@ -0,0 +1,94 @@
|
||||
======================================================================
|
||||
Synopsys DesignWare Cores (DWC) PCIe Performance Monitoring Unit (PMU)
|
||||
======================================================================
|
||||
|
||||
DesignWare Cores (DWC) PCIe PMU
|
||||
===============================
|
||||
|
||||
The PMU is a PCIe configuration space register block provided by each PCIe Root
|
||||
Port in a Vendor-Specific Extended Capability named RAS D.E.S (Debug, Error
|
||||
injection, and Statistics).
|
||||
|
||||
As the name indicates, the RAS DES capability supports system level
|
||||
debugging, AER error injection, and collection of statistics. To facilitate
|
||||
collection of statistics, Synopsys DesignWare Cores PCIe controller
|
||||
provides the following two features:
|
||||
|
||||
- one 64-bit counter for Time Based Analysis (RX/TX data throughput and
|
||||
time spent in each low-power LTSSM state) and
|
||||
- one 32-bit counter for Event Counting (error and non-error events for
|
||||
a specified lane)
|
||||
|
||||
Note: There is no interrupt for counter overflow.
|
||||
|
||||
Time Based Analysis
|
||||
-------------------
|
||||
|
||||
Using this feature you can obtain information regarding RX/TX data
|
||||
throughput and time spent in each low-power LTSSM state by the controller.
|
||||
The PMU measures data in two categories:
|
||||
|
||||
- Group#0: Percentage of time the controller stays in LTSSM states.
|
||||
- Group#1: Amount of data processed (Units of 16 bytes).
|
||||
|
||||
Lane Event counters
|
||||
-------------------
|
||||
|
||||
Using this feature you can obtain Error and Non-Error information in
|
||||
specific lane by the controller. The PMU event is selected by all of:
|
||||
|
||||
- Group i
|
||||
- Event j within the Group i
|
||||
- Lane k
|
||||
|
||||
Some of the events only exist for specific configurations.
|
||||
|
||||
DesignWare Cores (DWC) PCIe PMU Driver
|
||||
=======================================
|
||||
|
||||
This driver adds PMU devices for each PCIe Root Port named based on the BDF of
|
||||
the Root Port. For example,
|
||||
|
||||
30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
|
||||
|
||||
the PMU device name for this Root Port is dwc_rootport_3018.
|
||||
|
||||
The DWC PCIe PMU driver registers a perf PMU driver, which provides
|
||||
description of available events and configuration options in sysfs, see
|
||||
/sys/bus/event_source/devices/dwc_rootport_{bdf}.
|
||||
|
||||
The "format" directory describes format of the config fields of the
|
||||
perf_event_attr structure. The "events" directory provides configuration
|
||||
templates for all documented events. For example,
|
||||
"Rx_PCIe_TLP_Data_Payload" is an equivalent of "eventid=0x22,type=0x1".
|
||||
|
||||
The "perf list" command shall list the available events from sysfs, e.g.::
|
||||
|
||||
$# perf list | grep dwc_rootport
|
||||
<...>
|
||||
dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/ [Kernel PMU event]
|
||||
<...>
|
||||
dwc_rootport_3018/rx_memory_read,lane=?/ [Kernel PMU event]
|
||||
|
||||
Time Based Analysis Event Usage
|
||||
-------------------------------
|
||||
|
||||
Example usage of counting PCIe RX TLP data payload (Units of bytes)::
|
||||
|
||||
$# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/
|
||||
|
||||
The average RX/TX bandwidth can be calculated using the following formula:
|
||||
|
||||
PCIe RX Bandwidth = Rx_PCIe_TLP_Data_Payload / Measure_Time_Window
|
||||
PCIe TX Bandwidth = Tx_PCIe_TLP_Data_Payload / Measure_Time_Window
|
||||
|
||||
Lane Event Usage
|
||||
-------------------------------
|
||||
|
||||
Each lane has the same event set and to avoid generating a list of hundreds
|
||||
of events, the user need to specify the lane ID explicitly, e.g.::
|
||||
|
||||
$# perf stat -a -e dwc_rootport_3018/rx_memory_read,lane=4/
|
||||
|
||||
The driver does not support sampling, therefore "perf record" will not
|
||||
work. Per-task (without "-a") perf sessions are not supported.
|
@ -13,8 +13,8 @@ is one register for each counter. Counter 0 is special in that it always counts
|
||||
interrupt is raised. If any other counter overflows, it continues counting, and
|
||||
no interrupt is raised.
|
||||
|
||||
The "format" directory describes format of the config (event ID) and config1
|
||||
(AXI filtering) fields of the perf_event_attr structure, see /sys/bus/event_source/
|
||||
The "format" directory describes format of the config (event ID) and config1/2
|
||||
(AXI filter setting) fields of the perf_event_attr structure, see /sys/bus/event_source/
|
||||
devices/imx8_ddr0/format/. The "events" directory describes the events types
|
||||
hardware supported that can be used with perf tool, see /sys/bus/event_source/
|
||||
devices/imx8_ddr0/events/. The "caps" directory describes filter features implemented
|
||||
@ -28,12 +28,11 @@ in DDR PMU, see /sys/bus/events_source/devices/imx8_ddr0/caps/.
|
||||
AXI filtering is only used by CSV modes 0x41 (axid-read) and 0x42 (axid-write)
|
||||
to count reading or writing matches filter setting. Filter setting is various
|
||||
from different DRAM controller implementations, which is distinguished by quirks
|
||||
in the driver. You also can dump info from userspace, filter in "caps" directory
|
||||
indicates whether PMU supports AXI ID filter or not; enhanced_filter indicates
|
||||
whether PMU supports enhanced AXI ID filter or not. Value 0 for un-supported, and
|
||||
value 1 for supported.
|
||||
in the driver. You also can dump info from userspace, "caps" directory show the
|
||||
type of AXI filter (filter, enhanced_filter and super_filter). Value 0 for
|
||||
un-supported, and value 1 for supported.
|
||||
|
||||
* With DDR_CAP_AXI_ID_FILTER quirk(filter: 1, enhanced_filter: 0).
|
||||
* With DDR_CAP_AXI_ID_FILTER quirk(filter: 1, enhanced_filter: 0, super_filter: 0).
|
||||
Filter is defined with two configuration parts:
|
||||
--AXI_ID defines AxID matching value.
|
||||
--AXI_MASKING defines which bits of AxID are meaningful for the matching.
|
||||
@ -65,7 +64,37 @@ value 1 for supported.
|
||||
|
||||
perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12
|
||||
|
||||
* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk(filter: 1, enhanced_filter: 1).
|
||||
* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk(filter: 1, enhanced_filter: 1, super_filter: 0).
|
||||
This is an extension to the DDR_CAP_AXI_ID_FILTER quirk which permits
|
||||
counting the number of bytes (as opposed to the number of bursts) from DDR
|
||||
read and write transactions concurrently with another set of data counters.
|
||||
|
||||
* With DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER quirk(filter: 0, enhanced_filter: 0, super_filter: 1).
|
||||
There is a limitation in previous AXI filter, it cannot filter different IDs
|
||||
at the same time as the filter is shared between counters. This quirk is the
|
||||
extension of AXI ID filter. One improvement is that counter 1-3 has their own
|
||||
filter, means that it supports concurrently filter various IDs. Another
|
||||
improvement is that counter 1-3 supports AXI PORT and CHANNEL selection. Support
|
||||
selecting address channel or data channel.
|
||||
|
||||
Filter is defined with 2 configuration registers per counter 1-3.
|
||||
--Counter N MASK COMP register - including AXI_ID and AXI_MASKING.
|
||||
--Counter N MUX CNTL register - including AXI CHANNEL and AXI PORT.
|
||||
|
||||
- 0: address channel
|
||||
- 1: data channel
|
||||
|
||||
PMU in DDR subsystem, only one single port0 exists, so axi_port is reserved
|
||||
which should be 0.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
perf stat -a -e imx8_ddr0/axid-read,axi_mask=0xMMMM,axi_id=0xDDDD,axi_channel=0xH/ cmd
|
||||
perf stat -a -e imx8_ddr0/axid-write,axi_mask=0xMMMM,axi_id=0xDDDD,axi_channel=0xH/ cmd
|
||||
|
||||
.. note::
|
||||
|
||||
axi_channel is inverted in userspace, and it will be reverted in driver
|
||||
automatically. So that users do not need specify axi_channel if want to
|
||||
monitor data channel from DDR transactions, since data channel is more
|
||||
meaningful.
|
||||
|
@ -19,6 +19,7 @@ Performance monitor support
|
||||
arm_dsu_pmu
|
||||
thunderx2-pmu
|
||||
alibaba_pmu
|
||||
dwc_pcie_pmu
|
||||
nvidia-pmu
|
||||
meson-ddr-pmu
|
||||
cxl
|
||||
|
@ -130,7 +130,7 @@ When an Arm system boots, it can either have DT information, ACPI tables,
|
||||
or in some very unusual cases, both. If no command line parameters are used,
|
||||
the kernel will try to use DT for device enumeration; if there is no DT
|
||||
present, the kernel will try to use ACPI tables, but only if they are present.
|
||||
In neither is available, the kernel will not boot. If acpi=force is used
|
||||
If neither is available, the kernel will not boot. If acpi=force is used
|
||||
on the command line, the kernel will attempt to use ACPI tables first, but
|
||||
fall back to DT if there are no ACPI tables present. The basic idea is that
|
||||
the kernel will not fail to boot unless it absolutely has no other choice.
|
||||
|
@ -164,3 +164,75 @@ and should be used to mask the upper bits as needed.
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/arch/arm64/tests/user-events.c
|
||||
.. _tools/lib/perf/tests/test-evsel.c:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/perf/tests/test-evsel.c
|
||||
|
||||
Event Counting Threshold
|
||||
==========================================
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
|
||||
events whose count meets a specified threshold condition. For example if
|
||||
threshold_compare is set to 2 ('Greater than or equal'), and the
|
||||
threshold is set to 2, then the PMU counter will now only increment by
|
||||
when an event would have previously incremented the PMU counter by 2 or
|
||||
more on a single processor cycle.
|
||||
|
||||
To increment by 1 after passing the threshold condition instead of the
|
||||
number of events on that cycle, add the 'threshold_count' option to the
|
||||
commandline.
|
||||
|
||||
How-to
|
||||
------
|
||||
|
||||
These are the parameters for controlling the feature:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Parameter
|
||||
- Description
|
||||
* - threshold
|
||||
- Value to threshold the event by. A value of 0 means that
|
||||
thresholding is disabled and the other parameters have no effect.
|
||||
* - threshold_compare
|
||||
- | Comparison function to use, with the following values supported:
|
||||
|
|
||||
| 0: Not-equal
|
||||
| 1: Equals
|
||||
| 2: Greater-than-or-equal
|
||||
| 3: Less-than
|
||||
* - threshold_count
|
||||
- If this is set, count by 1 after passing the threshold condition
|
||||
instead of the value of the event on this cycle.
|
||||
|
||||
The threshold, threshold_compare and threshold_count values can be
|
||||
provided per event, for example:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
perf stat -e stall_slot/threshold=2,threshold_compare=2/ \
|
||||
-e dtlb_walk/threshold=10,threshold_compare=3,threshold_count/
|
||||
|
||||
In this example the stall_slot event will count by 2 or more on every
|
||||
cycle where 2 or more stalls happen. And dtlb_walk will count by 1 on
|
||||
every cycle where the number of dtlb walks were less than 10.
|
||||
|
||||
The maximum supported threshold value can be read from the caps of each
|
||||
PMU, for example:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
|
||||
|
||||
0x000000ff
|
||||
|
||||
If a value higher than this is given, then opening the event will result
|
||||
in an error. The highest possible maximum is 4095, as the config field
|
||||
for threshold is limited to 12 bits, and the Perf tool will refuse to
|
||||
parse higher values.
|
||||
|
||||
If the PMU doesn't support FEAT_PMUv3_TH, then threshold_max will read
|
||||
0, and attempting to set a threshold value will also result in an error.
|
||||
threshold_max will also read as 0 on aarch32 guests, even if the host
|
||||
is running on hardware with the feature.
|
||||
|
@ -27,6 +27,9 @@ properties:
|
||||
- fsl,imx8mq-ddr-pmu
|
||||
- fsl,imx8mp-ddr-pmu
|
||||
- const: fsl,imx8m-ddr-pmu
|
||||
- items:
|
||||
- const: fsl,imx8dxl-ddr-pmu
|
||||
- const: fsl,imx8-ddr-pmu
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
@ -21028,6 +21028,13 @@ L: linux-mmc@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/mmc/host/dw_mmc*
|
||||
|
||||
SYNOPSYS DESIGNWARE PCIE PMU DRIVER
|
||||
M: Shuai Xue <xueshuai@linux.alibaba.com>
|
||||
M: Jing Zhang <renyu.zj@linux.alibaba.com>
|
||||
S: Supported
|
||||
F: Documentation/admin-guide/perf/dwc_pcie_pmu.rst
|
||||
F: drivers/perf/dwc_pcie_pmu.c
|
||||
|
||||
SYNOPSYS HSDK RESET CONTROLLER DRIVER
|
||||
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
|
||||
S: Supported
|
||||
|
@ -268,10 +268,8 @@ static inline void armv6pmu_write_counter(struct perf_event *event, u64 value)
|
||||
|
||||
static void armv6pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long val, mask, evt, flags;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
unsigned long val, mask, evt;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == idx) {
|
||||
@ -294,12 +292,10 @@ static void armv6pmu_enable_event(struct perf_event *event)
|
||||
* Mask out the current event and set the counter to count the event
|
||||
* that we're interested in.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
val &= ~mask;
|
||||
val |= evt;
|
||||
armv6_pmcr_write(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static irqreturn_t
|
||||
@ -362,26 +358,20 @@ armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||
|
||||
static void armv6pmu_start(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
unsigned long val;
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
val |= ARMV6_PMCR_ENABLE;
|
||||
armv6_pmcr_write(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
unsigned long val;
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
val &= ~ARMV6_PMCR_ENABLE;
|
||||
armv6_pmcr_write(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -419,10 +409,8 @@ static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc,
|
||||
|
||||
static void armv6pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long val, mask, evt, flags;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
unsigned long val, mask, evt;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == idx) {
|
||||
@ -444,20 +432,16 @@ static void armv6pmu_disable_event(struct perf_event *event)
|
||||
* of ETM bus signal assertion cycles. The external reporting should
|
||||
* be disabled and so this should never increment.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
val &= ~mask;
|
||||
val |= evt;
|
||||
armv6_pmcr_write(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void armv6mpcore_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long val, mask, flags, evt = 0;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
unsigned long val, mask, evt = 0;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == idx) {
|
||||
@ -475,12 +459,10 @@ static void armv6mpcore_pmu_disable_event(struct perf_event *event)
|
||||
* Unlike UP ARMv6, we don't have a way of stopping the counters. We
|
||||
* simply disable the interrupt reporting.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
val &= ~mask;
|
||||
val |= evt;
|
||||
armv6_pmcr_write(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static int armv6_map_event(struct perf_event *event)
|
||||
|
@ -870,10 +870,8 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
|
||||
|
||||
static void armv7pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
|
||||
@ -886,7 +884,6 @@ static void armv7pmu_enable_event(struct perf_event *event)
|
||||
* Enable counter and interrupt, and set the counter to count
|
||||
* the event that we're interested in.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
|
||||
/*
|
||||
* Disable counter
|
||||
@ -910,16 +907,12 @@ static void armv7pmu_enable_event(struct perf_event *event)
|
||||
* Enable counter
|
||||
*/
|
||||
armv7_pmnc_enable_counter(idx);
|
||||
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void armv7pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
|
||||
@ -931,7 +924,6 @@ static void armv7pmu_disable_event(struct perf_event *event)
|
||||
/*
|
||||
* Disable counter and interrupt
|
||||
*/
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
|
||||
/*
|
||||
* Disable counter
|
||||
@ -942,8 +934,6 @@ static void armv7pmu_disable_event(struct perf_event *event)
|
||||
* Disable interrupt for this counter
|
||||
*/
|
||||
armv7_pmnc_disable_intens(idx);
|
||||
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||
@ -1009,24 +999,14 @@ static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||
|
||||
static void armv7pmu_start(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
/* Enable all counters */
|
||||
armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
/* Disable all counters */
|
||||
armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
|
||||
@ -1072,8 +1052,10 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
|
||||
{
|
||||
unsigned long config_base = 0;
|
||||
|
||||
if (attr->exclude_idle)
|
||||
return -EPERM;
|
||||
if (attr->exclude_idle) {
|
||||
pr_debug("ARM performance counters do not support mode exclusion\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
if (attr->exclude_user)
|
||||
config_base |= ARMV7_EXCLUDE_USER;
|
||||
if (attr->exclude_kernel)
|
||||
@ -1492,14 +1474,10 @@ static void krait_clearpmu(u32 config_base)
|
||||
|
||||
static void krait_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
/* Disable counter and interrupt */
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
|
||||
/* Disable counter */
|
||||
armv7_pmnc_disable_counter(idx);
|
||||
@ -1512,23 +1490,17 @@ static void krait_pmu_disable_event(struct perf_event *event)
|
||||
|
||||
/* Disable interrupt for this counter */
|
||||
armv7_pmnc_disable_intens(idx);
|
||||
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void krait_pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
/*
|
||||
* Enable counter and interrupt, and set the counter to count
|
||||
* the event that we're interested in.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
|
||||
/* Disable counter */
|
||||
armv7_pmnc_disable_counter(idx);
|
||||
@ -1548,8 +1520,6 @@ static void krait_pmu_enable_event(struct perf_event *event)
|
||||
|
||||
/* Enable counter */
|
||||
armv7_pmnc_enable_counter(idx);
|
||||
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void krait_pmu_reset(void *info)
|
||||
@ -1825,14 +1795,10 @@ static void scorpion_clearpmu(u32 config_base)
|
||||
|
||||
static void scorpion_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
/* Disable counter and interrupt */
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
|
||||
/* Disable counter */
|
||||
armv7_pmnc_disable_counter(idx);
|
||||
@ -1845,23 +1811,17 @@ static void scorpion_pmu_disable_event(struct perf_event *event)
|
||||
|
||||
/* Disable interrupt for this counter */
|
||||
armv7_pmnc_disable_intens(idx);
|
||||
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void scorpion_pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
/*
|
||||
* Enable counter and interrupt, and set the counter to count
|
||||
* the event that we're interested in.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
|
||||
/* Disable counter */
|
||||
armv7_pmnc_disable_counter(idx);
|
||||
@ -1881,8 +1841,6 @@ static void scorpion_pmu_enable_event(struct perf_event *event)
|
||||
|
||||
/* Enable counter */
|
||||
armv7_pmnc_enable_counter(idx);
|
||||
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void scorpion_pmu_reset(void *info)
|
||||
|
@ -203,10 +203,8 @@ xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||
|
||||
static void xscale1pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long val, mask, evt, flags;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
unsigned long val, mask, evt;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
switch (idx) {
|
||||
@ -229,20 +227,16 @@ static void xscale1pmu_enable_event(struct perf_event *event)
|
||||
return;
|
||||
}
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = xscale1pmu_read_pmnc();
|
||||
val &= ~mask;
|
||||
val |= evt;
|
||||
xscale1pmu_write_pmnc(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void xscale1pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long val, mask, evt, flags;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
unsigned long val, mask, evt;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
switch (idx) {
|
||||
@ -263,12 +257,10 @@ static void xscale1pmu_disable_event(struct perf_event *event)
|
||||
return;
|
||||
}
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = xscale1pmu_read_pmnc();
|
||||
val &= ~mask;
|
||||
val |= evt;
|
||||
xscale1pmu_write_pmnc(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -300,26 +292,20 @@ static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc,
|
||||
|
||||
static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
unsigned long val;
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = xscale1pmu_read_pmnc();
|
||||
val |= XSCALE_PMU_ENABLE;
|
||||
xscale1pmu_write_pmnc(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
unsigned long val;
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = xscale1pmu_read_pmnc();
|
||||
val &= ~XSCALE_PMU_ENABLE;
|
||||
xscale1pmu_write_pmnc(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static inline u64 xscale1pmu_read_counter(struct perf_event *event)
|
||||
@ -549,10 +535,8 @@ xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||
|
||||
static void xscale2pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags, ien, evtsel;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
unsigned long ien, evtsel;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
ien = xscale2pmu_read_int_enable();
|
||||
@ -587,18 +571,14 @@ static void xscale2pmu_enable_event(struct perf_event *event)
|
||||
return;
|
||||
}
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
xscale2pmu_write_event_select(evtsel);
|
||||
xscale2pmu_write_int_enable(ien);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void xscale2pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
unsigned long flags, ien, evtsel, of_flags;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
unsigned long ien, evtsel, of_flags;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
ien = xscale2pmu_read_int_enable();
|
||||
@ -638,11 +618,9 @@ static void xscale2pmu_disable_event(struct perf_event *event)
|
||||
return;
|
||||
}
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
xscale2pmu_write_event_select(evtsel);
|
||||
xscale2pmu_write_int_enable(ien);
|
||||
xscale2pmu_write_overflow_flags(of_flags);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -663,26 +641,20 @@ xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
|
||||
|
||||
static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
unsigned long val;
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
|
||||
val |= XSCALE_PMU_ENABLE;
|
||||
xscale2pmu_write_pmnc(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
unsigned long val;
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = xscale2pmu_read_pmnc();
|
||||
val &= ~XSCALE_PMU_ENABLE;
|
||||
xscale2pmu_write_pmnc(val);
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static inline u64 xscale2pmu_read_counter(struct perf_event *event)
|
||||
|
@ -1549,7 +1549,7 @@ config ARCH_FORCE_MAX_ORDER
|
||||
Don't change if unsure.
|
||||
|
||||
config UNMAP_KERNEL_AT_EL0
|
||||
bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT
|
||||
bool "Unmap kernel when running in userspace (KPTI)" if EXPERT
|
||||
default y
|
||||
help
|
||||
Speculation attacks against some high-performance processors can
|
||||
|
@ -200,7 +200,7 @@ endif
|
||||
endif
|
||||
|
||||
vdso-install-y += arch/arm64/kernel/vdso/vdso.so.dbg
|
||||
vdso-install-$(CONFIG_COMPAT_VDSO) += arch/arm64/kernel/vdso32/vdso.so.dbg:vdso32.so
|
||||
vdso-install-$(CONFIG_COMPAT_VDSO) += arch/arm64/kernel/vdso32/vdso32.so.dbg
|
||||
|
||||
include $(srctree)/scripts/Makefile.defconf
|
||||
|
||||
|
@ -44,7 +44,7 @@ EFI_ZBOOT_BFD_TARGET := elf64-littleaarch64
|
||||
EFI_ZBOOT_MACH_TYPE := ARM64
|
||||
EFI_ZBOOT_FORWARD_CFI := $(CONFIG_ARM64_BTI_KERNEL)
|
||||
|
||||
EFI_ZBOOT_OBJCOPY_FLAGS = --add-symbol zboot_code_size=0x$(shell \
|
||||
EFI_ZBOOT_OBJCOPY_FLAGS = --add-symbol zboot_code_size=0x$$( \
|
||||
$(NM) vmlinux|grep _kernel_codesize|cut -d' ' -f1)
|
||||
|
||||
include $(srctree)/drivers/firmware/efi/libstub/Makefile.zboot
|
||||
|
@ -17,7 +17,8 @@
|
||||
# $3 - kernel map file
|
||||
# $4 - default install path (blank if root directory)
|
||||
|
||||
if [ "$(basename $2)" = "Image.gz" ]; then
|
||||
if [ "$(basename $2)" = "Image.gz" ] || [ "$(basename $2)" = "vmlinuz.efi" ]
|
||||
then
|
||||
# Compressed install
|
||||
echo "Installing compressed kernel"
|
||||
base=vmlinuz
|
||||
|
@ -12,7 +12,7 @@
|
||||
#ifndef __ASM_ASSEMBLER_H
|
||||
#define __ASM_ASSEMBLER_H
|
||||
|
||||
#include <asm-generic/export.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/asm-bug.h>
|
||||
|
@ -58,7 +58,6 @@ static inline unsigned int arch_slab_minalign(void)
|
||||
#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
|
||||
|
||||
#define ICACHEF_ALIASING 0
|
||||
#define ICACHEF_VPIPT 1
|
||||
extern unsigned long __icache_flags;
|
||||
|
||||
/*
|
||||
@ -70,11 +69,6 @@ static inline int icache_is_aliasing(void)
|
||||
return test_bit(ICACHEF_ALIASING, &__icache_flags);
|
||||
}
|
||||
|
||||
static __always_inline int icache_is_vpipt(void)
|
||||
{
|
||||
return test_bit(ICACHEF_VPIPT, &__icache_flags);
|
||||
}
|
||||
|
||||
static inline u32 cache_type_cwg(void)
|
||||
{
|
||||
return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype());
|
||||
|
@ -617,6 +617,7 @@ static inline bool id_aa64pfr1_mte(u64 pfr1)
|
||||
return val >= ID_AA64PFR1_EL1_MTE_MTE2;
|
||||
}
|
||||
|
||||
void __init setup_boot_cpu_features(void);
|
||||
void __init setup_system_features(void);
|
||||
void __init setup_user_features(void);
|
||||
|
||||
@ -819,6 +820,11 @@ static inline bool system_supports_tlb_range(void)
|
||||
return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE);
|
||||
}
|
||||
|
||||
static inline bool system_supports_lpa2(void)
|
||||
{
|
||||
return cpus_have_final_cap(ARM64_HAS_LPA2);
|
||||
}
|
||||
|
||||
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
|
||||
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
|
||||
|
||||
|
@ -242,14 +242,6 @@
|
||||
| (\nx << 5)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Zero the entire ZA array
|
||||
* ZERO ZA
|
||||
*/
|
||||
.macro zero_za
|
||||
.inst 0xc00800ff
|
||||
.endm
|
||||
|
||||
.macro __for from:req, to:req
|
||||
.if (\from) == (\to)
|
||||
_for__body %\from
|
||||
|
@ -37,27 +37,12 @@
|
||||
|
||||
|
||||
/*
|
||||
* If KASLR is enabled, then an offset K is added to the kernel address
|
||||
* space. The bottom 21 bits of this offset are zero to guarantee 2MB
|
||||
* alignment for PA and VA.
|
||||
*
|
||||
* For each pagetable level of the swapper, we know that the shift will
|
||||
* be larger than 21 (for the 4KB granule case we use section maps thus
|
||||
* the smallest shift is actually 30) thus there is the possibility that
|
||||
* KASLR can increase the number of pagetable entries by 1, so we make
|
||||
* room for this extra entry.
|
||||
*
|
||||
* Note KASLR cannot increase the number of required entries for a level
|
||||
* by more than one because it increments both the virtual start and end
|
||||
* addresses equally (the extra entry comes from the case where the end
|
||||
* address is just pushed over a boundary and the start address isn't).
|
||||
* A relocatable kernel may execute from an address that differs from the one at
|
||||
* which it was linked. In the worst case, its runtime placement may intersect
|
||||
* with two adjacent PGDIR entries, which means that an additional page table
|
||||
* may be needed at each subordinate level.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
#define EARLY_KASLR (1)
|
||||
#else
|
||||
#define EARLY_KASLR (0)
|
||||
#endif
|
||||
#define EXTRA_PAGE __is_defined(CONFIG_RELOCATABLE)
|
||||
|
||||
#define SPAN_NR_ENTRIES(vstart, vend, shift) \
|
||||
((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1)
|
||||
@ -83,7 +68,7 @@
|
||||
+ EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \
|
||||
+ EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \
|
||||
+ EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */
|
||||
#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR))
|
||||
#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE))
|
||||
|
||||
/* the initial ID map may need two extra pages if it needs to be extended */
|
||||
#if VA_BITS < 48
|
||||
|
@ -243,13 +243,6 @@ static inline size_t __invalidate_icache_max_range(void)
|
||||
|
||||
static inline void __invalidate_icache_guest_page(void *va, size_t size)
|
||||
{
|
||||
/*
|
||||
* VPIPT I-cache maintenance must be done from EL2. See comment in the
|
||||
* nVHE flavor of __kvm_tlb_flush_vmid_ipa().
|
||||
*/
|
||||
if (icache_is_vpipt() && read_sysreg(CurrentEL) != CurrentEL_EL2)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Blow the whole I-cache if it is aliasing (i.e. VIPT) or the
|
||||
* invalidation range exceeds our arbitrary limit on invadations by
|
||||
|
@ -25,6 +25,8 @@
|
||||
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U
|
||||
#endif
|
||||
|
||||
#define kvm_lpa2_is_enabled() false
|
||||
|
||||
static inline u64 kvm_get_parange(u64 mmfr0)
|
||||
{
|
||||
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
|
@ -182,6 +182,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/bug.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
#if VA_BITS > 48
|
||||
extern u64 vabits_actual;
|
||||
@ -193,15 +194,12 @@ extern s64 memstart_addr;
|
||||
/* PHYS_OFFSET - the physical address of the start of memory. */
|
||||
#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
|
||||
|
||||
/* the virtual base of the kernel image */
|
||||
extern u64 kimage_vaddr;
|
||||
|
||||
/* the offset between the kernel virtual and physical mappings */
|
||||
extern u64 kimage_voffset;
|
||||
|
||||
static inline unsigned long kaslr_offset(void)
|
||||
{
|
||||
return kimage_vaddr - KIMAGE_VADDR;
|
||||
return (u64)&_text - KIMAGE_VADDR;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
@ -407,6 +405,5 @@ void dump_mem_limit(void);
|
||||
#define INIT_MEMBLOCK_MEMORY_REGIONS (INIT_MEMBLOCK_REGIONS * 8)
|
||||
#endif
|
||||
|
||||
#include <asm-generic/memory_model.h>
|
||||
|
||||
#endif /* __ASM_MEMORY_H */
|
||||
|
@ -71,6 +71,8 @@ extern bool arm64_use_ng_mappings;
|
||||
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
|
||||
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
|
||||
|
||||
#define lpa2_is_enabled() false
|
||||
|
||||
/*
|
||||
* If we have userspace only BTI we don't want to mark kernel pages
|
||||
* guarded even if the system does support BTI.
|
||||
|
@ -167,6 +167,9 @@ struct thread_struct {
|
||||
unsigned long fault_address; /* fault info */
|
||||
unsigned long fault_code; /* ESR_EL1 value */
|
||||
struct debug_info debug; /* debugging */
|
||||
|
||||
struct user_fpsimd_state kernel_fpsimd_state;
|
||||
unsigned int kernel_fpsimd_cpu;
|
||||
#ifdef CONFIG_ARM64_PTR_AUTH
|
||||
struct ptrauth_keys_user keys_user;
|
||||
#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
|
||||
|
@ -12,8 +12,6 @@
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
DECLARE_PER_CPU(bool, fpsimd_context_busy);
|
||||
|
||||
#ifdef CONFIG_KERNEL_MODE_NEON
|
||||
|
||||
/*
|
||||
@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void)
|
||||
/*
|
||||
* We must make sure that the SVE has been initialized properly
|
||||
* before using the SIMD in kernel.
|
||||
* fpsimd_context_busy is only set while preemption is disabled,
|
||||
* and is clear whenever preemption is enabled. Since
|
||||
* this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
|
||||
* cannot change under our feet -- if it's set we cannot be
|
||||
* migrated, and if it's clear we cannot be migrated to a CPU
|
||||
* where it is set.
|
||||
*/
|
||||
return !WARN_ON(!system_capabilities_finalized()) &&
|
||||
system_supports_fpsimd() &&
|
||||
!in_hardirq() && !irqs_disabled() && !in_nmi() &&
|
||||
!this_cpu_read(fpsimd_context_busy);
|
||||
!in_hardirq() && !irqs_disabled() && !in_nmi();
|
||||
}
|
||||
|
||||
#else /* ! CONFIG_KERNEL_MODE_NEON */
|
||||
|
@ -9,7 +9,6 @@
|
||||
#ifndef __ASM_STACKTRACE_COMMON_H
|
||||
#define __ASM_STACKTRACE_COMMON_H
|
||||
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct stack_info {
|
||||
@ -23,12 +22,6 @@ struct stack_info {
|
||||
* @fp: The fp value in the frame record (or the real fp)
|
||||
* @pc: The lr value in the frame record (or the real lr)
|
||||
*
|
||||
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
|
||||
* associated with the most recently encountered replacement lr
|
||||
* value.
|
||||
*
|
||||
* @task: The task being unwound.
|
||||
*
|
||||
* @stack: The stack currently being unwound.
|
||||
* @stacks: An array of stacks which can be unwound.
|
||||
* @nr_stacks: The number of stacks in @stacks.
|
||||
@ -36,10 +29,6 @@ struct stack_info {
|
||||
struct unwind_state {
|
||||
unsigned long fp;
|
||||
unsigned long pc;
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
struct llist_node *kr_cur;
|
||||
#endif
|
||||
struct task_struct *task;
|
||||
|
||||
struct stack_info stack;
|
||||
struct stack_info *stacks;
|
||||
@ -66,14 +55,8 @@ static inline bool stackinfo_on_stack(const struct stack_info *info,
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void unwind_init_common(struct unwind_state *state,
|
||||
struct task_struct *task)
|
||||
static inline void unwind_init_common(struct unwind_state *state)
|
||||
{
|
||||
state->task = task;
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
state->kr_cur = NULL;
|
||||
#endif
|
||||
|
||||
state->stack = stackinfo_get_unknown();
|
||||
}
|
||||
|
||||
|
@ -31,7 +31,7 @@ static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
|
||||
unsigned long fp,
|
||||
unsigned long pc)
|
||||
{
|
||||
unwind_init_common(state, NULL);
|
||||
unwind_init_common(state);
|
||||
|
||||
state->fp = fp;
|
||||
state->pc = pc;
|
||||
|
@ -645,6 +645,7 @@
|
||||
#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
|
||||
#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
|
||||
#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
|
||||
#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2)
|
||||
#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
|
||||
#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
|
||||
#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
|
||||
@ -781,10 +782,16 @@
|
||||
#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
|
||||
|
||||
/* Misc instructions */
|
||||
#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4)
|
||||
#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5)
|
||||
#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6)
|
||||
#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0)
|
||||
|
||||
#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
|
||||
#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
|
||||
#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
|
||||
#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
|
||||
#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6)
|
||||
#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
|
||||
|
||||
/* Common SCTLR_ELx flags. */
|
||||
@ -871,10 +878,12 @@
|
||||
|
||||
/* id_aa64mmfr0 */
|
||||
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
|
||||
#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
|
||||
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
|
||||
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
|
||||
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
|
||||
#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
|
||||
|
||||
#define ARM64_MIN_PARANGE_BITS 32
|
||||
@ -882,6 +891,7 @@
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
|
||||
|
||||
#ifdef CONFIG_ARM64_PA_BITS_52
|
||||
@ -892,11 +902,13 @@
|
||||
|
||||
#if defined(CONFIG_ARM64_4K_PAGES)
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
|
||||
#elif defined(CONFIG_ARM64_16K_PAGES)
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
|
||||
@ -1039,6 +1051,19 @@
|
||||
|
||||
#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
|
||||
|
||||
/*
|
||||
* Permission Overlay Extension (POE) permission encodings.
|
||||
*/
|
||||
#define POE_NONE UL(0x0)
|
||||
#define POE_R UL(0x1)
|
||||
#define POE_X UL(0x2)
|
||||
#define POE_RX UL(0x3)
|
||||
#define POE_W UL(0x4)
|
||||
#define POE_RW UL(0x5)
|
||||
#define POE_XW UL(0x6)
|
||||
#define POE_RXW UL(0x7)
|
||||
#define POE_MASK UL(0xf)
|
||||
|
||||
#define ARM64_FEATURE_FIELD_BITS 4
|
||||
|
||||
/* Defined for compatibility only, do not add new users. */
|
||||
|
@ -80,6 +80,7 @@ void arch_setup_new_exec(void);
|
||||
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
|
||||
#define TIF_SME 27 /* SME in use */
|
||||
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
|
||||
#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
|
||||
|
||||
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
||||
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
||||
|
@ -22,15 +22,15 @@ static void tlb_flush(struct mmu_gather *tlb);
|
||||
#include <asm-generic/tlb.h>
|
||||
|
||||
/*
|
||||
* get the tlbi levels in arm64. Default value is 0 if more than one
|
||||
* of cleared_* is set or neither is set.
|
||||
* Arm64 doesn't support p4ds now.
|
||||
* get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than
|
||||
* one of cleared_* is set or neither is set - this elides the level hinting to
|
||||
* the hardware.
|
||||
*/
|
||||
static inline int tlb_get_level(struct mmu_gather *tlb)
|
||||
{
|
||||
/* The TTL field is only valid for the leaf entry. */
|
||||
if (tlb->freed_tables)
|
||||
return 0;
|
||||
return TLBI_TTL_UNKNOWN;
|
||||
|
||||
if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
|
||||
tlb->cleared_puds ||
|
||||
@ -47,7 +47,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb)
|
||||
tlb->cleared_p4ds))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
if (tlb->cleared_p4ds && !(tlb->cleared_ptes ||
|
||||
tlb->cleared_pmds ||
|
||||
tlb->cleared_puds))
|
||||
return 0;
|
||||
|
||||
return TLBI_TTL_UNKNOWN;
|
||||
}
|
||||
|
||||
static inline void tlb_flush(struct mmu_gather *tlb)
|
||||
|
@ -94,19 +94,22 @@ static inline unsigned long get_trans_granule(void)
|
||||
* When ARMv8.4-TTL exists, TLBI operations take an additional hint for
|
||||
* the level at which the invalidation must take place. If the level is
|
||||
* wrong, no invalidation may take place. In the case where the level
|
||||
* cannot be easily determined, a 0 value for the level parameter will
|
||||
* perform a non-hinted invalidation.
|
||||
* cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform
|
||||
* a non-hinted invalidation. Any provided level outside the hint range
|
||||
* will also cause fall-back to non-hinted invalidation.
|
||||
*
|
||||
* For Stage-2 invalidation, use the level values provided to that effect
|
||||
* in asm/stage2_pgtable.h.
|
||||
*/
|
||||
#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
|
||||
|
||||
#define TLBI_TTL_UNKNOWN INT_MAX
|
||||
|
||||
#define __tlbi_level(op, addr, level) do { \
|
||||
u64 arg = addr; \
|
||||
\
|
||||
if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \
|
||||
level) { \
|
||||
level >= 0 && level <= 3) { \
|
||||
u64 ttl = level & 3; \
|
||||
ttl |= get_trans_granule() << 2; \
|
||||
arg &= ~TLBI_TTL_MASK; \
|
||||
@ -122,28 +125,34 @@ static inline unsigned long get_trans_granule(void)
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* This macro creates a properly formatted VA operand for the TLB RANGE.
|
||||
* The value bit assignments are:
|
||||
* This macro creates a properly formatted VA operand for the TLB RANGE. The
|
||||
* value bit assignments are:
|
||||
*
|
||||
* +----------+------+-------+-------+-------+----------------------+
|
||||
* | ASID | TG | SCALE | NUM | TTL | BADDR |
|
||||
* +-----------------+-------+-------+-------+----------------------+
|
||||
* |63 48|47 46|45 44|43 39|38 37|36 0|
|
||||
*
|
||||
* The address range is determined by below formula:
|
||||
* [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
|
||||
* The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) *
|
||||
* 2^(5*SCALE + 1) * PAGESIZE)
|
||||
*
|
||||
* Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR
|
||||
* holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI
|
||||
* 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA,
|
||||
* EL1, Inner Shareable".
|
||||
*
|
||||
*/
|
||||
#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \
|
||||
({ \
|
||||
unsigned long __ta = (addr) >> PAGE_SHIFT; \
|
||||
__ta &= GENMASK_ULL(36, 0); \
|
||||
__ta |= (unsigned long)(ttl) << 37; \
|
||||
__ta |= (unsigned long)(num) << 39; \
|
||||
__ta |= (unsigned long)(scale) << 44; \
|
||||
__ta |= get_trans_granule() << 46; \
|
||||
__ta |= (unsigned long)(asid) << 48; \
|
||||
__ta; \
|
||||
#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \
|
||||
({ \
|
||||
unsigned long __ta = (baddr); \
|
||||
unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \
|
||||
__ta &= GENMASK_ULL(36, 0); \
|
||||
__ta |= __ttl << 37; \
|
||||
__ta |= (unsigned long)(num) << 39; \
|
||||
__ta |= (unsigned long)(scale) << 44; \
|
||||
__ta |= get_trans_granule() << 46; \
|
||||
__ta |= (unsigned long)(asid) << 48; \
|
||||
__ta; \
|
||||
})
|
||||
|
||||
/* These macros are used by the TLBI RANGE feature. */
|
||||
@ -216,12 +225,16 @@ static inline unsigned long get_trans_granule(void)
|
||||
* CPUs, ensuring that any walk-cache entries associated with the
|
||||
* translation are also invalidated.
|
||||
*
|
||||
* __flush_tlb_range(vma, start, end, stride, last_level)
|
||||
* __flush_tlb_range(vma, start, end, stride, last_level, tlb_level)
|
||||
* Invalidate the virtual-address range '[start, end)' on all
|
||||
* CPUs for the user address space corresponding to 'vma->mm'.
|
||||
* The invalidation operations are issued at a granularity
|
||||
* determined by 'stride' and only affect any walk-cache entries
|
||||
* if 'last_level' is equal to false.
|
||||
* if 'last_level' is equal to false. tlb_level is the level at
|
||||
* which the invalidation must take place. If the level is wrong,
|
||||
* no invalidation may take place. In the case where the level
|
||||
* cannot be easily determined, the value TLBI_TTL_UNKNOWN will
|
||||
* perform a non-hinted invalidation.
|
||||
*
|
||||
*
|
||||
* Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
|
||||
@ -345,34 +358,44 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
|
||||
* @tlb_level: Translation Table level hint, if known
|
||||
* @tlbi_user: If 'true', call an additional __tlbi_user()
|
||||
* (typically for user ASIDs). 'flase' for IPA instructions
|
||||
* @lpa2: If 'true', the lpa2 scheme is used as set out below
|
||||
*
|
||||
* When the CPU does not support TLB range operations, flush the TLB
|
||||
* entries one by one at the granularity of 'stride'. If the TLB
|
||||
* range ops are supported, then:
|
||||
*
|
||||
* 1. If 'pages' is odd, flush the first page through non-range
|
||||
* operations;
|
||||
* 1. If FEAT_LPA2 is in use, the start address of a range operation must be
|
||||
* 64KB aligned, so flush pages one by one until the alignment is reached
|
||||
* using the non-range operations. This step is skipped if LPA2 is not in
|
||||
* use.
|
||||
*
|
||||
* 2. For remaining pages: the minimum range granularity is decided
|
||||
* by 'scale', so multiple range TLBI operations may be required.
|
||||
* Start from scale = 0, flush the corresponding number of pages
|
||||
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
|
||||
* until no pages left.
|
||||
* 2. The minimum range granularity is decided by 'scale', so multiple range
|
||||
* TLBI operations may be required. Start from scale = 3, flush the largest
|
||||
* possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
|
||||
* requested range, then decrement scale and continue until one or zero pages
|
||||
* are left. We must start from highest scale to ensure 64KB start alignment
|
||||
* is maintained in the LPA2 case.
|
||||
*
|
||||
* 3. If there is 1 page remaining, flush it through non-range operations. Range
|
||||
* operations can only span an even number of pages. We save this for last to
|
||||
* ensure 64KB start alignment is maintained for the LPA2 case.
|
||||
*
|
||||
* Note that certain ranges can be represented by either num = 31 and
|
||||
* scale or num = 0 and scale + 1. The loop below favours the latter
|
||||
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
|
||||
*/
|
||||
#define __flush_tlb_range_op(op, start, pages, stride, \
|
||||
asid, tlb_level, tlbi_user) \
|
||||
asid, tlb_level, tlbi_user, lpa2) \
|
||||
do { \
|
||||
int num = 0; \
|
||||
int scale = 0; \
|
||||
int scale = 3; \
|
||||
int shift = lpa2 ? 16 : PAGE_SHIFT; \
|
||||
unsigned long addr; \
|
||||
\
|
||||
while (pages > 0) { \
|
||||
if (!system_supports_tlb_range() || \
|
||||
pages % 2 == 1) { \
|
||||
pages == 1 || \
|
||||
(lpa2 && start != ALIGN(start, SZ_64K))) { \
|
||||
addr = __TLBI_VADDR(start, asid); \
|
||||
__tlbi_level(op, addr, tlb_level); \
|
||||
if (tlbi_user) \
|
||||
@ -384,20 +407,20 @@ do { \
|
||||
\
|
||||
num = __TLBI_RANGE_NUM(pages, scale); \
|
||||
if (num >= 0) { \
|
||||
addr = __TLBI_VADDR_RANGE(start, asid, scale, \
|
||||
num, tlb_level); \
|
||||
addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
|
||||
scale, num, tlb_level); \
|
||||
__tlbi(r##op, addr); \
|
||||
if (tlbi_user) \
|
||||
__tlbi_user(r##op, addr); \
|
||||
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
|
||||
pages -= __TLBI_RANGE_PAGES(num, scale); \
|
||||
} \
|
||||
scale++; \
|
||||
scale--; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
|
||||
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
|
||||
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
|
||||
|
||||
static inline void __flush_tlb_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
@ -427,9 +450,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
|
||||
asid = ASID(vma->vm_mm);
|
||||
|
||||
if (last_level)
|
||||
__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
|
||||
__flush_tlb_range_op(vale1is, start, pages, stride, asid,
|
||||
tlb_level, true, lpa2_is_enabled());
|
||||
else
|
||||
__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
|
||||
__flush_tlb_range_op(vae1is, start, pages, stride, asid,
|
||||
tlb_level, true, lpa2_is_enabled());
|
||||
|
||||
dsb(ish);
|
||||
mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
|
||||
@ -441,9 +466,10 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
|
||||
/*
|
||||
* We cannot use leaf-only invalidation here, since we may be invalidating
|
||||
* table entries as part of collapsing hugepages or moving page tables.
|
||||
* Set the tlb_level to 0 because we can not get enough information here.
|
||||
* Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
|
||||
* information here.
|
||||
*/
|
||||
__flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
|
||||
__flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
|
||||
}
|
||||
|
||||
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
||||
|
@ -1081,25 +1081,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
|
||||
|
||||
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
|
||||
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
|
||||
|
||||
/*
|
||||
* Initialize the indirect array of CPU capabilities pointers before we
|
||||
* handle the boot CPU below.
|
||||
*/
|
||||
init_cpucap_indirect_list();
|
||||
|
||||
/*
|
||||
* Detect broken pseudo-NMI. Must be called _before_ the call to
|
||||
* setup_boot_cpu_capabilities() since it interacts with
|
||||
* can_use_gic_priorities().
|
||||
*/
|
||||
detect_system_supports_pseudo_nmi();
|
||||
|
||||
/*
|
||||
* Detect and enable early CPU capabilities based on the boot CPU,
|
||||
* after we have initialised the CPU feature infrastructure.
|
||||
*/
|
||||
setup_boot_cpu_capabilities();
|
||||
}
|
||||
|
||||
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
|
||||
@ -1584,16 +1565,6 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry,
|
||||
return has_sre;
|
||||
}
|
||||
|
||||
static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
|
||||
{
|
||||
u32 midr = read_cpuid_id();
|
||||
|
||||
/* Cavium ThunderX pass 1.x and 2.x */
|
||||
return midr_is_cpu_model_range(midr, MIDR_THUNDERX,
|
||||
MIDR_CPU_VAR_REV(0, 0),
|
||||
MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
|
||||
}
|
||||
|
||||
static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
|
||||
int scope)
|
||||
{
|
||||
@ -1768,6 +1739,39 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
|
||||
return !meltdown_safe;
|
||||
}
|
||||
|
||||
#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
|
||||
static bool has_lpa2_at_stage1(u64 mmfr0)
|
||||
{
|
||||
unsigned int tgran;
|
||||
|
||||
tgran = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_EL1_TGRAN_SHIFT);
|
||||
return tgran == ID_AA64MMFR0_EL1_TGRAN_LPA2;
|
||||
}
|
||||
|
||||
static bool has_lpa2_at_stage2(u64 mmfr0)
|
||||
{
|
||||
unsigned int tgran;
|
||||
|
||||
tgran = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_EL1_TGRAN_2_SHIFT);
|
||||
return tgran == ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2;
|
||||
}
|
||||
|
||||
static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
|
||||
{
|
||||
u64 mmfr0;
|
||||
|
||||
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
return has_lpa2_at_stage1(mmfr0) && has_lpa2_at_stage2(mmfr0);
|
||||
}
|
||||
#else
|
||||
static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
|
||||
#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
|
||||
|
||||
@ -1840,7 +1844,7 @@ static int __init __kpti_install_ng_mappings(void *__unused)
|
||||
static void __init kpti_install_ng_mappings(void)
|
||||
{
|
||||
/* Check whether KPTI is going to be used */
|
||||
if (!cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
|
||||
if (!arm64_kernel_unmapped_at_el0())
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -2325,12 +2329,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
|
||||
},
|
||||
#endif /* CONFIG_ARM64_LSE_ATOMICS */
|
||||
{
|
||||
.desc = "Software prefetching using PRFM",
|
||||
.capability = ARM64_HAS_NO_HW_PREFETCH,
|
||||
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
|
||||
.matches = has_no_hw_prefetch,
|
||||
},
|
||||
{
|
||||
.desc = "Virtualization Host Extensions",
|
||||
.capability = ARM64_HAS_VIRT_HOST_EXTN,
|
||||
@ -2735,6 +2733,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
.matches = has_cpuid_feature,
|
||||
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
|
||||
},
|
||||
{
|
||||
.desc = "52-bit Virtual Addressing for KVM (LPA2)",
|
||||
.capability = ARM64_HAS_LPA2,
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.matches = has_lpa2,
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
@ -3275,14 +3279,6 @@ void check_local_cpu_capabilities(void)
|
||||
verify_local_cpu_capabilities();
|
||||
}
|
||||
|
||||
static void __init setup_boot_cpu_capabilities(void)
|
||||
{
|
||||
/* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */
|
||||
update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
|
||||
/* Enable the SCOPE_BOOT_CPU capabilities alone right away */
|
||||
enable_cpu_capabilities(SCOPE_BOOT_CPU);
|
||||
}
|
||||
|
||||
bool this_cpu_has_cap(unsigned int n)
|
||||
{
|
||||
if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
|
||||
@ -3338,23 +3334,70 @@ unsigned long cpu_get_elf_hwcap2(void)
|
||||
return elf_hwcap[1];
|
||||
}
|
||||
|
||||
void __init setup_system_features(void)
|
||||
static void __init setup_boot_cpu_capabilities(void)
|
||||
{
|
||||
int i;
|
||||
/*
|
||||
* The system-wide safe feature feature register values have been
|
||||
* finalized. Finalize and log the available system capabilities.
|
||||
* The boot CPU's feature register values have been recorded. Detect
|
||||
* boot cpucaps and local cpucaps for the boot CPU, then enable and
|
||||
* patch alternatives for the available boot cpucaps.
|
||||
*/
|
||||
update_cpu_capabilities(SCOPE_SYSTEM);
|
||||
if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
|
||||
!cpus_have_cap(ARM64_HAS_PAN))
|
||||
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
|
||||
update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
|
||||
enable_cpu_capabilities(SCOPE_BOOT_CPU);
|
||||
apply_boot_alternatives();
|
||||
}
|
||||
|
||||
void __init setup_boot_cpu_features(void)
|
||||
{
|
||||
/*
|
||||
* Initialize the indirect array of CPU capabilities pointers before we
|
||||
* handle the boot CPU.
|
||||
*/
|
||||
init_cpucap_indirect_list();
|
||||
|
||||
/*
|
||||
* Enable all the available capabilities which have not been enabled
|
||||
* already.
|
||||
* Detect broken pseudo-NMI. Must be called _before_ the call to
|
||||
* setup_boot_cpu_capabilities() since it interacts with
|
||||
* can_use_gic_priorities().
|
||||
*/
|
||||
detect_system_supports_pseudo_nmi();
|
||||
|
||||
setup_boot_cpu_capabilities();
|
||||
}
|
||||
|
||||
static void __init setup_system_capabilities(void)
|
||||
{
|
||||
/*
|
||||
* The system-wide safe feature register values have been finalized.
|
||||
* Detect, enable, and patch alternatives for the available system
|
||||
* cpucaps.
|
||||
*/
|
||||
update_cpu_capabilities(SCOPE_SYSTEM);
|
||||
enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
|
||||
apply_alternatives_all();
|
||||
|
||||
/*
|
||||
* Log any cpucaps with a cpumask as these aren't logged by
|
||||
* update_cpu_capabilities().
|
||||
*/
|
||||
for (int i = 0; i < ARM64_NCAPS; i++) {
|
||||
const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
|
||||
|
||||
if (caps && caps->cpus && caps->desc &&
|
||||
cpumask_any(caps->cpus) < nr_cpu_ids)
|
||||
pr_info("detected: %s on CPU%*pbl\n",
|
||||
caps->desc, cpumask_pr_args(caps->cpus));
|
||||
}
|
||||
|
||||
/*
|
||||
* TTBR0 PAN doesn't have its own cpucap, so log it manually.
|
||||
*/
|
||||
if (system_uses_ttbr0_pan())
|
||||
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
|
||||
}
|
||||
|
||||
void __init setup_system_features(void)
|
||||
{
|
||||
setup_system_capabilities();
|
||||
|
||||
kpti_install_ng_mappings();
|
||||
|
||||
@ -3367,15 +3410,6 @@ void __init setup_system_features(void)
|
||||
if (!cache_type_cwg())
|
||||
pr_warn("No Cache Writeback Granule information, assuming %d\n",
|
||||
ARCH_DMA_MINALIGN);
|
||||
|
||||
for (i = 0; i < ARM64_NCAPS; i++) {
|
||||
const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
|
||||
|
||||
if (caps && caps->cpus && caps->desc &&
|
||||
cpumask_any(caps->cpus) < nr_cpu_ids)
|
||||
pr_info("detected: %s on CPU%*pbl\n",
|
||||
caps->desc, cpumask_pr_args(caps->cpus));
|
||||
}
|
||||
}
|
||||
|
||||
void __init setup_user_features(void)
|
||||
|
@ -36,8 +36,6 @@ static struct cpuinfo_arm64 boot_cpu_data;
|
||||
static inline const char *icache_policy_str(int l1ip)
|
||||
{
|
||||
switch (l1ip) {
|
||||
case CTR_EL0_L1Ip_VPIPT:
|
||||
return "VPIPT";
|
||||
case CTR_EL0_L1Ip_VIPT:
|
||||
return "VIPT";
|
||||
case CTR_EL0_L1Ip_PIPT:
|
||||
@ -388,9 +386,6 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
|
||||
switch (l1ip) {
|
||||
case CTR_EL0_L1Ip_PIPT:
|
||||
break;
|
||||
case CTR_EL0_L1Ip_VPIPT:
|
||||
set_bit(ICACHEF_VPIPT, &__icache_flags);
|
||||
break;
|
||||
case CTR_EL0_L1Ip_VIPT:
|
||||
default:
|
||||
/* Assume aliasing */
|
||||
|
@ -85,13 +85,13 @@
|
||||
* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
|
||||
* flag the register state as invalid.
|
||||
*
|
||||
* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
|
||||
* save the task's FPSIMD context back to task_struct from softirq context.
|
||||
* To prevent this from racing with the manipulation of the task's FPSIMD state
|
||||
* from task context and thereby corrupting the state, it is necessary to
|
||||
* protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
|
||||
* flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
|
||||
* run but prevent them to use FPSIMD.
|
||||
* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be
|
||||
* called from softirq context, which will save the task's FPSIMD context back
|
||||
* to task_struct. To prevent this from racing with the manipulation of the
|
||||
* task's FPSIMD state from task context and thereby corrupting the state, it
|
||||
* is necessary to protect any manipulation of a task's fpsimd_state or
|
||||
* TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend
|
||||
* softirq servicing entirely until put_cpu_fpsimd_context() is called.
|
||||
*
|
||||
* For a certain task, the sequence may look something like this:
|
||||
* - the task gets scheduled in; if both the task's fpsimd_cpu field
|
||||
@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { }
|
||||
|
||||
#endif
|
||||
|
||||
DEFINE_PER_CPU(bool, fpsimd_context_busy);
|
||||
EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
|
||||
|
||||
static void fpsimd_bind_task_to_cpu(void);
|
||||
|
||||
static void __get_cpu_fpsimd_context(void)
|
||||
{
|
||||
bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
|
||||
|
||||
WARN_ON(busy);
|
||||
}
|
||||
|
||||
/*
|
||||
* Claim ownership of the CPU FPSIMD context for use by the calling context.
|
||||
*
|
||||
* The caller may freely manipulate the FPSIMD context metadata until
|
||||
* put_cpu_fpsimd_context() is called.
|
||||
*
|
||||
* The double-underscore version must only be called if you know the task
|
||||
* can't be preempted.
|
||||
*
|
||||
* On RT kernels local_bh_disable() is not sufficient because it only
|
||||
* serializes soft interrupt related sections via a local lock, but stays
|
||||
* preemptible. Disabling preemption is the right choice here as bottom
|
||||
@ -242,14 +229,6 @@ static void get_cpu_fpsimd_context(void)
|
||||
local_bh_disable();
|
||||
else
|
||||
preempt_disable();
|
||||
__get_cpu_fpsimd_context();
|
||||
}
|
||||
|
||||
static void __put_cpu_fpsimd_context(void)
|
||||
{
|
||||
bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
|
||||
|
||||
WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
|
||||
}
|
||||
|
||||
/*
|
||||
@ -261,18 +240,12 @@ static void __put_cpu_fpsimd_context(void)
|
||||
*/
|
||||
static void put_cpu_fpsimd_context(void)
|
||||
{
|
||||
__put_cpu_fpsimd_context();
|
||||
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
local_bh_enable();
|
||||
else
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static bool have_cpu_fpsimd_context(void)
|
||||
{
|
||||
return !preemptible() && __this_cpu_read(fpsimd_context_busy);
|
||||
}
|
||||
|
||||
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
|
||||
{
|
||||
return task->thread.vl[type];
|
||||
@ -383,7 +356,8 @@ static void task_fpsimd_load(void)
|
||||
bool restore_ffr;
|
||||
|
||||
WARN_ON(!system_supports_fpsimd());
|
||||
WARN_ON(!have_cpu_fpsimd_context());
|
||||
WARN_ON(preemptible());
|
||||
WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
|
||||
|
||||
if (system_supports_sve() || system_supports_sme()) {
|
||||
switch (current->thread.fp_type) {
|
||||
@ -406,7 +380,7 @@ static void task_fpsimd_load(void)
|
||||
default:
|
||||
/*
|
||||
* This indicates either a bug in
|
||||
* fpsimd_save() or memory corruption, we
|
||||
* fpsimd_save_user_state() or memory corruption, we
|
||||
* should always record an explicit format
|
||||
* when we save. We always at least have the
|
||||
* memory allocated for FPSMID registers so
|
||||
@ -457,7 +431,7 @@ static void task_fpsimd_load(void)
|
||||
* than via current, if we are saving KVM state then it will have
|
||||
* ensured that the type of registers to save is set in last->to_save.
|
||||
*/
|
||||
static void fpsimd_save(void)
|
||||
static void fpsimd_save_user_state(void)
|
||||
{
|
||||
struct cpu_fp_state const *last =
|
||||
this_cpu_ptr(&fpsimd_last_state);
|
||||
@ -467,7 +441,7 @@ static void fpsimd_save(void)
|
||||
unsigned int vl;
|
||||
|
||||
WARN_ON(!system_supports_fpsimd());
|
||||
WARN_ON(!have_cpu_fpsimd_context());
|
||||
WARN_ON(preemptible());
|
||||
|
||||
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
|
||||
return;
|
||||
@ -888,7 +862,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
|
||||
if (task == current) {
|
||||
get_cpu_fpsimd_context();
|
||||
|
||||
fpsimd_save();
|
||||
fpsimd_save_user_state();
|
||||
}
|
||||
|
||||
fpsimd_flush_task_state(task);
|
||||
@ -1171,7 +1145,7 @@ void __init sve_setup(void)
|
||||
unsigned long b;
|
||||
int max_bit;
|
||||
|
||||
if (!cpus_have_cap(ARM64_SVE))
|
||||
if (!system_supports_sve())
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -1301,7 +1275,7 @@ void __init sme_setup(void)
|
||||
struct vl_info *info = &vl_info[ARM64_VEC_SME];
|
||||
int min_bit, max_bit;
|
||||
|
||||
if (!cpus_have_cap(ARM64_SME))
|
||||
if (!system_supports_sme())
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -1500,6 +1474,34 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
|
||||
current);
|
||||
}
|
||||
|
||||
static void fpsimd_load_kernel_state(struct task_struct *task)
|
||||
{
|
||||
struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
|
||||
|
||||
/*
|
||||
* Elide the load if this CPU holds the most recent kernel mode
|
||||
* FPSIMD context of the current task.
|
||||
*/
|
||||
if (last->st == &task->thread.kernel_fpsimd_state &&
|
||||
task->thread.kernel_fpsimd_cpu == smp_processor_id())
|
||||
return;
|
||||
|
||||
fpsimd_load_state(&task->thread.kernel_fpsimd_state);
|
||||
}
|
||||
|
||||
static void fpsimd_save_kernel_state(struct task_struct *task)
|
||||
{
|
||||
struct cpu_fp_state cpu_fp_state = {
|
||||
.st = &task->thread.kernel_fpsimd_state,
|
||||
.to_save = FP_STATE_FPSIMD,
|
||||
};
|
||||
|
||||
fpsimd_save_state(&task->thread.kernel_fpsimd_state);
|
||||
fpsimd_bind_state_to_cpu(&cpu_fp_state);
|
||||
|
||||
task->thread.kernel_fpsimd_cpu = smp_processor_id();
|
||||
}
|
||||
|
||||
void fpsimd_thread_switch(struct task_struct *next)
|
||||
{
|
||||
bool wrong_task, wrong_cpu;
|
||||
@ -1507,24 +1509,31 @@ void fpsimd_thread_switch(struct task_struct *next)
|
||||
if (!system_supports_fpsimd())
|
||||
return;
|
||||
|
||||
__get_cpu_fpsimd_context();
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
/* Save unsaved fpsimd state, if any: */
|
||||
fpsimd_save();
|
||||
if (test_thread_flag(TIF_KERNEL_FPSTATE))
|
||||
fpsimd_save_kernel_state(current);
|
||||
else
|
||||
fpsimd_save_user_state();
|
||||
|
||||
/*
|
||||
* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
|
||||
* state. For kernel threads, FPSIMD registers are never loaded
|
||||
* and wrong_task and wrong_cpu will always be true.
|
||||
*/
|
||||
wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
|
||||
&next->thread.uw.fpsimd_state;
|
||||
wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
|
||||
if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
|
||||
fpsimd_load_kernel_state(next);
|
||||
set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
|
||||
} else {
|
||||
/*
|
||||
* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
|
||||
* state. For kernel threads, FPSIMD registers are never
|
||||
* loaded with user mode FPSIMD state and so wrong_task and
|
||||
* wrong_cpu will always be true.
|
||||
*/
|
||||
wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
|
||||
&next->thread.uw.fpsimd_state;
|
||||
wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
|
||||
|
||||
update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
|
||||
wrong_task || wrong_cpu);
|
||||
|
||||
__put_cpu_fpsimd_context();
|
||||
update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
|
||||
wrong_task || wrong_cpu);
|
||||
}
|
||||
}
|
||||
|
||||
static void fpsimd_flush_thread_vl(enum vec_type type)
|
||||
@ -1614,7 +1623,7 @@ void fpsimd_preserve_current_state(void)
|
||||
return;
|
||||
|
||||
get_cpu_fpsimd_context();
|
||||
fpsimd_save();
|
||||
fpsimd_save_user_state();
|
||||
put_cpu_fpsimd_context();
|
||||
}
|
||||
|
||||
@ -1826,13 +1835,15 @@ static void fpsimd_flush_cpu_state(void)
|
||||
*/
|
||||
void fpsimd_save_and_flush_cpu_state(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!system_supports_fpsimd())
|
||||
return;
|
||||
WARN_ON(preemptible());
|
||||
__get_cpu_fpsimd_context();
|
||||
fpsimd_save();
|
||||
local_irq_save(flags);
|
||||
fpsimd_save_user_state();
|
||||
fpsimd_flush_cpu_state();
|
||||
__put_cpu_fpsimd_context();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KERNEL_MODE_NEON
|
||||
@ -1864,10 +1875,37 @@ void kernel_neon_begin(void)
|
||||
get_cpu_fpsimd_context();
|
||||
|
||||
/* Save unsaved fpsimd state, if any: */
|
||||
fpsimd_save();
|
||||
if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
|
||||
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
|
||||
fpsimd_save_kernel_state(current);
|
||||
} else {
|
||||
fpsimd_save_user_state();
|
||||
|
||||
/*
|
||||
* Set the thread flag so that the kernel mode FPSIMD state
|
||||
* will be context switched along with the rest of the task
|
||||
* state.
|
||||
*
|
||||
* On non-PREEMPT_RT, softirqs may interrupt task level kernel
|
||||
* mode FPSIMD, but the task will not be preemptible so setting
|
||||
* TIF_KERNEL_FPSTATE for those would be both wrong (as it
|
||||
* would mark the task context FPSIMD state as requiring a
|
||||
* context switch) and unnecessary.
|
||||
*
|
||||
* On PREEMPT_RT, softirqs are serviced from a separate thread,
|
||||
* which is scheduled as usual, and this guarantees that these
|
||||
* softirqs are not interrupting use of the FPSIMD in kernel
|
||||
* mode in task context. So in this case, setting the flag here
|
||||
* is always appropriate.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
|
||||
set_thread_flag(TIF_KERNEL_FPSTATE);
|
||||
}
|
||||
|
||||
/* Invalidate any task state remaining in the fpsimd regs: */
|
||||
fpsimd_flush_cpu_state();
|
||||
|
||||
put_cpu_fpsimd_context();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_neon_begin);
|
||||
|
||||
@ -1885,7 +1923,16 @@ void kernel_neon_end(void)
|
||||
if (!system_supports_fpsimd())
|
||||
return;
|
||||
|
||||
put_cpu_fpsimd_context();
|
||||
/*
|
||||
* If we are returning from a nested use of kernel mode FPSIMD, restore
|
||||
* the task context kernel mode FPSIMD state. This can only happen when
|
||||
* running in softirq context on non-PREEMPT_RT.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
|
||||
test_thread_flag(TIF_KERNEL_FPSTATE))
|
||||
fpsimd_load_kernel_state(current);
|
||||
else
|
||||
clear_thread_flag(TIF_KERNEL_FPSTATE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_neon_end);
|
||||
|
||||
|
@ -482,7 +482,7 @@ SYM_FUNC_START_LOCAL(__primary_switched)
|
||||
|
||||
str_l x21, __fdt_pointer, x5 // Save FDT pointer
|
||||
|
||||
ldr_l x4, kimage_vaddr // Save the offset between
|
||||
adrp x4, _text // Save the offset between
|
||||
sub x4, x4, x0 // the kernel virtual and
|
||||
str_l x4, kimage_voffset, x5 // physical mappings
|
||||
|
||||
|
@ -21,14 +21,25 @@
|
||||
|
||||
static u64 __boot_status __initdata;
|
||||
|
||||
// temporary __prel64 related definitions
|
||||
// to be removed when this code is moved under pi/
|
||||
|
||||
#define __prel64_initconst __initconst
|
||||
|
||||
#define PREL64(type, name) union { type *name; }
|
||||
|
||||
#define prel64_pointer(__d) (__d)
|
||||
|
||||
typedef bool filter_t(u64 val);
|
||||
|
||||
struct ftr_set_desc {
|
||||
char name[FTR_DESC_NAME_LEN];
|
||||
struct arm64_ftr_override *override;
|
||||
PREL64(struct arm64_ftr_override, override);
|
||||
struct {
|
||||
char name[FTR_DESC_FIELD_LEN];
|
||||
u8 shift;
|
||||
u8 width;
|
||||
bool (*filter)(u64 val);
|
||||
PREL64(filter_t, filter);
|
||||
} fields[];
|
||||
};
|
||||
|
||||
@ -46,7 +57,7 @@ static bool __init mmfr1_vh_filter(u64 val)
|
||||
val == 0);
|
||||
}
|
||||
|
||||
static const struct ftr_set_desc mmfr1 __initconst = {
|
||||
static const struct ftr_set_desc mmfr1 __prel64_initconst = {
|
||||
.name = "id_aa64mmfr1",
|
||||
.override = &id_aa64mmfr1_override,
|
||||
.fields = {
|
||||
@ -70,7 +81,7 @@ static bool __init pfr0_sve_filter(u64 val)
|
||||
return true;
|
||||
}
|
||||
|
||||
static const struct ftr_set_desc pfr0 __initconst = {
|
||||
static const struct ftr_set_desc pfr0 __prel64_initconst = {
|
||||
.name = "id_aa64pfr0",
|
||||
.override = &id_aa64pfr0_override,
|
||||
.fields = {
|
||||
@ -94,7 +105,7 @@ static bool __init pfr1_sme_filter(u64 val)
|
||||
return true;
|
||||
}
|
||||
|
||||
static const struct ftr_set_desc pfr1 __initconst = {
|
||||
static const struct ftr_set_desc pfr1 __prel64_initconst = {
|
||||
.name = "id_aa64pfr1",
|
||||
.override = &id_aa64pfr1_override,
|
||||
.fields = {
|
||||
@ -105,7 +116,7 @@ static const struct ftr_set_desc pfr1 __initconst = {
|
||||
},
|
||||
};
|
||||
|
||||
static const struct ftr_set_desc isar1 __initconst = {
|
||||
static const struct ftr_set_desc isar1 __prel64_initconst = {
|
||||
.name = "id_aa64isar1",
|
||||
.override = &id_aa64isar1_override,
|
||||
.fields = {
|
||||
@ -117,7 +128,7 @@ static const struct ftr_set_desc isar1 __initconst = {
|
||||
},
|
||||
};
|
||||
|
||||
static const struct ftr_set_desc isar2 __initconst = {
|
||||
static const struct ftr_set_desc isar2 __prel64_initconst = {
|
||||
.name = "id_aa64isar2",
|
||||
.override = &id_aa64isar2_override,
|
||||
.fields = {
|
||||
@ -128,7 +139,7 @@ static const struct ftr_set_desc isar2 __initconst = {
|
||||
},
|
||||
};
|
||||
|
||||
static const struct ftr_set_desc smfr0 __initconst = {
|
||||
static const struct ftr_set_desc smfr0 __prel64_initconst = {
|
||||
.name = "id_aa64smfr0",
|
||||
.override = &id_aa64smfr0_override,
|
||||
.fields = {
|
||||
@ -149,7 +160,7 @@ static bool __init hvhe_filter(u64 val)
|
||||
ID_AA64MMFR1_EL1_VH_SHIFT));
|
||||
}
|
||||
|
||||
static const struct ftr_set_desc sw_features __initconst = {
|
||||
static const struct ftr_set_desc sw_features __prel64_initconst = {
|
||||
.name = "arm64_sw",
|
||||
.override = &arm64_sw_feature_override,
|
||||
.fields = {
|
||||
@ -159,22 +170,23 @@ static const struct ftr_set_desc sw_features __initconst = {
|
||||
},
|
||||
};
|
||||
|
||||
static const struct ftr_set_desc * const regs[] __initconst = {
|
||||
&mmfr1,
|
||||
&pfr0,
|
||||
&pfr1,
|
||||
&isar1,
|
||||
&isar2,
|
||||
&smfr0,
|
||||
&sw_features,
|
||||
static const
|
||||
PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = {
|
||||
{ &mmfr1 },
|
||||
{ &pfr0 },
|
||||
{ &pfr1 },
|
||||
{ &isar1 },
|
||||
{ &isar2 },
|
||||
{ &smfr0 },
|
||||
{ &sw_features },
|
||||
};
|
||||
|
||||
static const struct {
|
||||
char alias[FTR_ALIAS_NAME_LEN];
|
||||
char feature[FTR_ALIAS_OPTION_LEN];
|
||||
} aliases[] __initconst = {
|
||||
{ "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
|
||||
{ "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
|
||||
{ "kvm_arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
|
||||
{ "kvm_arm.mode=protected", "id_aa64mmfr1.vh=0" },
|
||||
{ "arm64.nosve", "id_aa64pfr0.sve=0" },
|
||||
{ "arm64.nosme", "id_aa64pfr1.sme=0" },
|
||||
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
|
||||
@ -187,45 +199,61 @@ static const struct {
|
||||
{ "nokaslr", "arm64_sw.nokaslr=1" },
|
||||
};
|
||||
|
||||
static int __init parse_nokaslr(char *unused)
|
||||
static int __init parse_hexdigit(const char *p, u64 *v)
|
||||
{
|
||||
/* nokaslr param handling is done by early cpufeature code */
|
||||
// skip "0x" if it comes next
|
||||
if (p[0] == '0' && tolower(p[1]) == 'x')
|
||||
p += 2;
|
||||
|
||||
// check whether the RHS is a single hex digit
|
||||
if (!isxdigit(p[0]) || (p[1] && !isspace(p[1])))
|
||||
return -EINVAL;
|
||||
|
||||
*v = tolower(*p) - (isdigit(*p) ? '0' : 'a' - 10);
|
||||
return 0;
|
||||
}
|
||||
early_param("nokaslr", parse_nokaslr);
|
||||
|
||||
static int __init find_field(const char *cmdline,
|
||||
static int __init find_field(const char *cmdline, char *opt, int len,
|
||||
const struct ftr_set_desc *reg, int f, u64 *v)
|
||||
{
|
||||
char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
|
||||
int len;
|
||||
int flen = strlen(reg->fields[f].name);
|
||||
|
||||
len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=",
|
||||
reg->name, reg->fields[f].name);
|
||||
// append '<fieldname>=' to obtain '<name>.<fieldname>='
|
||||
memcpy(opt + len, reg->fields[f].name, flen);
|
||||
len += flen;
|
||||
opt[len++] = '=';
|
||||
|
||||
if (!parameqn(cmdline, opt, len))
|
||||
if (memcmp(cmdline, opt, len))
|
||||
return -1;
|
||||
|
||||
return kstrtou64(cmdline + len, 0, v);
|
||||
return parse_hexdigit(cmdline + len, v);
|
||||
}
|
||||
|
||||
static void __init match_options(const char *cmdline)
|
||||
{
|
||||
char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(regs); i++) {
|
||||
const struct ftr_set_desc *reg = prel64_pointer(regs[i].reg);
|
||||
struct arm64_ftr_override *override;
|
||||
int len = strlen(reg->name);
|
||||
int f;
|
||||
|
||||
if (!regs[i]->override)
|
||||
continue;
|
||||
override = prel64_pointer(reg->override);
|
||||
|
||||
for (f = 0; strlen(regs[i]->fields[f].name); f++) {
|
||||
u64 shift = regs[i]->fields[f].shift;
|
||||
u64 width = regs[i]->fields[f].width ?: 4;
|
||||
// set opt[] to '<name>.'
|
||||
memcpy(opt, reg->name, len);
|
||||
opt[len++] = '.';
|
||||
|
||||
for (f = 0; reg->fields[f].name[0] != '\0'; f++) {
|
||||
u64 shift = reg->fields[f].shift;
|
||||
u64 width = reg->fields[f].width ?: 4;
|
||||
u64 mask = GENMASK_ULL(shift + width - 1, shift);
|
||||
bool (*filter)(u64 val);
|
||||
u64 v;
|
||||
|
||||
if (find_field(cmdline, regs[i], f, &v))
|
||||
if (find_field(cmdline, opt, len, reg, f, &v))
|
||||
continue;
|
||||
|
||||
/*
|
||||
@ -233,16 +261,16 @@ static void __init match_options(const char *cmdline)
|
||||
* it by setting the value to the all-ones while
|
||||
* clearing the mask... Yes, this is fragile.
|
||||
*/
|
||||
if (regs[i]->fields[f].filter &&
|
||||
!regs[i]->fields[f].filter(v)) {
|
||||
regs[i]->override->val |= mask;
|
||||
regs[i]->override->mask &= ~mask;
|
||||
filter = prel64_pointer(reg->fields[f].filter);
|
||||
if (filter && !filter(v)) {
|
||||
override->val |= mask;
|
||||
override->mask &= ~mask;
|
||||
continue;
|
||||
}
|
||||
|
||||
regs[i]->override->val &= ~mask;
|
||||
regs[i]->override->val |= (v << shift) & mask;
|
||||
regs[i]->override->mask |= mask;
|
||||
override->val &= ~mask;
|
||||
override->val |= (v << shift) & mask;
|
||||
override->mask |= mask;
|
||||
|
||||
return;
|
||||
}
|
||||
@ -258,23 +286,29 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
|
||||
|
||||
cmdline = skip_spaces(cmdline);
|
||||
|
||||
for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++);
|
||||
/* terminate on "--" appearing on the command line by itself */
|
||||
if (cmdline[0] == '-' && cmdline[1] == '-' && isspace(cmdline[2]))
|
||||
return;
|
||||
|
||||
for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++) {
|
||||
if (len >= sizeof(buf) - 1)
|
||||
break;
|
||||
if (cmdline[len] == '-')
|
||||
buf[len] = '_';
|
||||
else
|
||||
buf[len] = cmdline[len];
|
||||
}
|
||||
if (!len)
|
||||
return;
|
||||
|
||||
len = min(len, ARRAY_SIZE(buf) - 1);
|
||||
memcpy(buf, cmdline, len);
|
||||
buf[len] = '\0';
|
||||
|
||||
if (strcmp(buf, "--") == 0)
|
||||
return;
|
||||
buf[len] = 0;
|
||||
|
||||
cmdline += len;
|
||||
|
||||
match_options(buf);
|
||||
|
||||
for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++)
|
||||
if (parameq(buf, aliases[i].alias))
|
||||
if (!memcmp(buf, aliases[i].alias, len + 1))
|
||||
__parse_cmdline(aliases[i].feature, false);
|
||||
} while (1);
|
||||
}
|
||||
@ -316,13 +350,16 @@ void init_feature_override(u64 boot_status);
|
||||
|
||||
asmlinkage void __init init_feature_override(u64 boot_status)
|
||||
{
|
||||
struct arm64_ftr_override *override;
|
||||
const struct ftr_set_desc *reg;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(regs); i++) {
|
||||
if (regs[i]->override) {
|
||||
regs[i]->override->val = 0;
|
||||
regs[i]->override->mask = 0;
|
||||
}
|
||||
reg = prel64_pointer(regs[i].reg);
|
||||
override = prel64_pointer(reg->override);
|
||||
|
||||
override->val = 0;
|
||||
override->mask = 0;
|
||||
}
|
||||
|
||||
__boot_status = boot_status;
|
||||
@ -330,9 +367,9 @@ asmlinkage void __init init_feature_override(u64 boot_status)
|
||||
parse_cmdline();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(regs); i++) {
|
||||
if (regs[i]->override)
|
||||
dcache_clean_inval_poc((unsigned long)regs[i]->override,
|
||||
(unsigned long)regs[i]->override +
|
||||
sizeof(*regs[i]->override));
|
||||
reg = prel64_pointer(regs[i].reg);
|
||||
override = prel64_pointer(reg->override);
|
||||
dcache_clean_inval_poc((unsigned long)override,
|
||||
(unsigned long)(override + 1));
|
||||
}
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <asm/daifflags.h>
|
||||
#include <asm/exception.h>
|
||||
#include <asm/numa.h>
|
||||
#include <asm/softirq_stack.h>
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/vmap_stack.h>
|
||||
@ -47,17 +48,17 @@ static void init_irq_scs(void)
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu(irq_shadow_call_stack_ptr, cpu) =
|
||||
scs_alloc(cpu_to_node(cpu));
|
||||
scs_alloc(early_cpu_to_node(cpu));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
static void init_irq_stacks(void)
|
||||
static void __init init_irq_stacks(void)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long *p;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
|
||||
p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, early_cpu_to_node(cpu));
|
||||
per_cpu(irq_stack_ptr, cpu) = p;
|
||||
}
|
||||
}
|
||||
|
@ -36,3 +36,10 @@ void __init kaslr_init(void)
|
||||
pr_info("KASLR enabled\n");
|
||||
__kaslr_is_enabled = true;
|
||||
}
|
||||
|
||||
static int __init parse_nokaslr(char *unused)
|
||||
{
|
||||
/* nokaslr param handling is done by early cpufeature code */
|
||||
return 0;
|
||||
}
|
||||
early_param("nokaslr", parse_nokaslr);
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
|
||||
-Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
|
||||
$(DISABLE_LATENT_ENTROPY_PLUGIN) \
|
||||
$(call cc-option,-mbranch-protection=none) \
|
||||
-I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
|
||||
-include $(srctree)/include/linux/hidden.h \
|
||||
|
@ -439,9 +439,8 @@ static void __init hyp_mode_check(void)
|
||||
void __init smp_cpus_done(unsigned int max_cpus)
|
||||
{
|
||||
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
|
||||
setup_system_features();
|
||||
hyp_mode_check();
|
||||
apply_alternatives_all();
|
||||
setup_system_features();
|
||||
setup_user_features();
|
||||
mark_linear_text_alias_ro();
|
||||
}
|
||||
@ -454,14 +453,9 @@ void __init smp_prepare_boot_cpu(void)
|
||||
* freed shortly, so we must move over to the runtime per-cpu area.
|
||||
*/
|
||||
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
|
||||
cpuinfo_store_boot_cpu();
|
||||
|
||||
/*
|
||||
* We now know enough about the boot CPU to apply the
|
||||
* alternatives that cannot wait until interrupt handling
|
||||
* and/or scheduling is enabled.
|
||||
*/
|
||||
apply_boot_alternatives();
|
||||
cpuinfo_store_boot_cpu();
|
||||
setup_boot_cpu_features();
|
||||
|
||||
/* Conditionally switch to GIC PMR for interrupt masking */
|
||||
if (system_uses_irq_prio_masking())
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <linux/efi.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
@ -18,6 +19,31 @@
|
||||
#include <asm/stack_pointer.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
/*
|
||||
* Kernel unwind state
|
||||
*
|
||||
* @common: Common unwind state.
|
||||
* @task: The task being unwound.
|
||||
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
|
||||
* associated with the most recently encountered replacement lr
|
||||
* value.
|
||||
*/
|
||||
struct kunwind_state {
|
||||
struct unwind_state common;
|
||||
struct task_struct *task;
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
struct llist_node *kr_cur;
|
||||
#endif
|
||||
};
|
||||
|
||||
static __always_inline void
|
||||
kunwind_init(struct kunwind_state *state,
|
||||
struct task_struct *task)
|
||||
{
|
||||
unwind_init_common(&state->common);
|
||||
state->task = task;
|
||||
}
|
||||
|
||||
/*
|
||||
* Start an unwind from a pt_regs.
|
||||
*
|
||||
@ -26,13 +52,13 @@
|
||||
* The regs must be on a stack currently owned by the calling task.
|
||||
*/
|
||||
static __always_inline void
|
||||
unwind_init_from_regs(struct unwind_state *state,
|
||||
struct pt_regs *regs)
|
||||
kunwind_init_from_regs(struct kunwind_state *state,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
unwind_init_common(state, current);
|
||||
kunwind_init(state, current);
|
||||
|
||||
state->fp = regs->regs[29];
|
||||
state->pc = regs->pc;
|
||||
state->common.fp = regs->regs[29];
|
||||
state->common.pc = regs->pc;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -44,12 +70,12 @@ unwind_init_from_regs(struct unwind_state *state,
|
||||
* The function which invokes this must be noinline.
|
||||
*/
|
||||
static __always_inline void
|
||||
unwind_init_from_caller(struct unwind_state *state)
|
||||
kunwind_init_from_caller(struct kunwind_state *state)
|
||||
{
|
||||
unwind_init_common(state, current);
|
||||
kunwind_init(state, current);
|
||||
|
||||
state->fp = (unsigned long)__builtin_frame_address(1);
|
||||
state->pc = (unsigned long)__builtin_return_address(0);
|
||||
state->common.fp = (unsigned long)__builtin_frame_address(1);
|
||||
state->common.pc = (unsigned long)__builtin_return_address(0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -63,35 +89,38 @@ unwind_init_from_caller(struct unwind_state *state)
|
||||
* call this for the current task.
|
||||
*/
|
||||
static __always_inline void
|
||||
unwind_init_from_task(struct unwind_state *state,
|
||||
struct task_struct *task)
|
||||
kunwind_init_from_task(struct kunwind_state *state,
|
||||
struct task_struct *task)
|
||||
{
|
||||
unwind_init_common(state, task);
|
||||
kunwind_init(state, task);
|
||||
|
||||
state->fp = thread_saved_fp(task);
|
||||
state->pc = thread_saved_pc(task);
|
||||
state->common.fp = thread_saved_fp(task);
|
||||
state->common.pc = thread_saved_pc(task);
|
||||
}
|
||||
|
||||
static __always_inline int
|
||||
unwind_recover_return_address(struct unwind_state *state)
|
||||
kunwind_recover_return_address(struct kunwind_state *state)
|
||||
{
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
if (state->task->ret_stack &&
|
||||
(state->pc == (unsigned long)return_to_handler)) {
|
||||
(state->common.pc == (unsigned long)return_to_handler)) {
|
||||
unsigned long orig_pc;
|
||||
orig_pc = ftrace_graph_ret_addr(state->task, NULL, state->pc,
|
||||
(void *)state->fp);
|
||||
if (WARN_ON_ONCE(state->pc == orig_pc))
|
||||
orig_pc = ftrace_graph_ret_addr(state->task, NULL,
|
||||
state->common.pc,
|
||||
(void *)state->common.fp);
|
||||
if (WARN_ON_ONCE(state->common.pc == orig_pc))
|
||||
return -EINVAL;
|
||||
state->pc = orig_pc;
|
||||
state->common.pc = orig_pc;
|
||||
}
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
if (is_kretprobe_trampoline(state->pc)) {
|
||||
state->pc = kretprobe_find_ret_addr(state->task,
|
||||
(void *)state->fp,
|
||||
&state->kr_cur);
|
||||
if (is_kretprobe_trampoline(state->common.pc)) {
|
||||
unsigned long orig_pc;
|
||||
orig_pc = kretprobe_find_ret_addr(state->task,
|
||||
(void *)state->common.fp,
|
||||
&state->kr_cur);
|
||||
state->common.pc = orig_pc;
|
||||
}
|
||||
#endif /* CONFIG_KRETPROBES */
|
||||
|
||||
@ -106,38 +135,40 @@ unwind_recover_return_address(struct unwind_state *state)
|
||||
* and the location (but not the fp value) of B.
|
||||
*/
|
||||
static __always_inline int
|
||||
unwind_next(struct unwind_state *state)
|
||||
kunwind_next(struct kunwind_state *state)
|
||||
{
|
||||
struct task_struct *tsk = state->task;
|
||||
unsigned long fp = state->fp;
|
||||
unsigned long fp = state->common.fp;
|
||||
int err;
|
||||
|
||||
/* Final frame; nothing to unwind */
|
||||
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
|
||||
return -ENOENT;
|
||||
|
||||
err = unwind_next_frame_record(state);
|
||||
err = unwind_next_frame_record(&state->common);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
state->pc = ptrauth_strip_kernel_insn_pac(state->pc);
|
||||
state->common.pc = ptrauth_strip_kernel_insn_pac(state->common.pc);
|
||||
|
||||
return unwind_recover_return_address(state);
|
||||
return kunwind_recover_return_address(state);
|
||||
}
|
||||
|
||||
typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
|
||||
|
||||
static __always_inline void
|
||||
unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry,
|
||||
void *cookie)
|
||||
do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
|
||||
void *cookie)
|
||||
{
|
||||
if (unwind_recover_return_address(state))
|
||||
if (kunwind_recover_return_address(state))
|
||||
return;
|
||||
|
||||
while (1) {
|
||||
int ret;
|
||||
|
||||
if (!consume_entry(cookie, state->pc))
|
||||
if (!consume_state(state, cookie))
|
||||
break;
|
||||
ret = unwind_next(state);
|
||||
ret = kunwind_next(state);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
@ -172,9 +203,10 @@ unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry,
|
||||
: stackinfo_get_unknown(); \
|
||||
})
|
||||
|
||||
noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
|
||||
void *cookie, struct task_struct *task,
|
||||
struct pt_regs *regs)
|
||||
static __always_inline void
|
||||
kunwind_stack_walk(kunwind_consume_fn consume_state,
|
||||
void *cookie, struct task_struct *task,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct stack_info stacks[] = {
|
||||
stackinfo_get_task(task),
|
||||
@ -190,22 +222,48 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
|
||||
STACKINFO_EFI,
|
||||
#endif
|
||||
};
|
||||
struct unwind_state state = {
|
||||
.stacks = stacks,
|
||||
.nr_stacks = ARRAY_SIZE(stacks),
|
||||
struct kunwind_state state = {
|
||||
.common = {
|
||||
.stacks = stacks,
|
||||
.nr_stacks = ARRAY_SIZE(stacks),
|
||||
},
|
||||
};
|
||||
|
||||
if (regs) {
|
||||
if (task != current)
|
||||
return;
|
||||
unwind_init_from_regs(&state, regs);
|
||||
kunwind_init_from_regs(&state, regs);
|
||||
} else if (task == current) {
|
||||
unwind_init_from_caller(&state);
|
||||
kunwind_init_from_caller(&state);
|
||||
} else {
|
||||
unwind_init_from_task(&state, task);
|
||||
kunwind_init_from_task(&state, task);
|
||||
}
|
||||
|
||||
unwind(&state, consume_entry, cookie);
|
||||
do_kunwind(&state, consume_state, cookie);
|
||||
}
|
||||
|
||||
struct kunwind_consume_entry_data {
|
||||
stack_trace_consume_fn consume_entry;
|
||||
void *cookie;
|
||||
};
|
||||
|
||||
static bool
|
||||
arch_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
|
||||
{
|
||||
struct kunwind_consume_entry_data *data = cookie;
|
||||
return data->consume_entry(data->cookie, state->common.pc);
|
||||
}
|
||||
|
||||
noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
|
||||
void *cookie, struct task_struct *task,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct kunwind_consume_entry_data data = {
|
||||
.consume_entry = consume_entry,
|
||||
.cookie = cookie,
|
||||
};
|
||||
|
||||
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
|
||||
}
|
||||
|
||||
static bool dump_backtrace_entry(void *arg, unsigned long where)
|
||||
|
@ -118,7 +118,7 @@ endif
|
||||
VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
|
||||
|
||||
# Build rules
|
||||
targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw
|
||||
targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso32.so.dbg vdso.so.raw
|
||||
c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso))
|
||||
c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
|
||||
asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
|
||||
@ -127,15 +127,15 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
|
||||
targets += vdso.lds
|
||||
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
|
||||
|
||||
include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
|
||||
include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
|
||||
$(call if_changed,vdsosym)
|
||||
|
||||
# Strip rule for vdso.so
|
||||
$(obj)/vdso.so: OBJCOPYFLAGS := -S
|
||||
$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
|
||||
$(obj)/vdso.so: $(obj)/vdso32.so.dbg FORCE
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
|
||||
$(obj)/vdso32.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
|
||||
$(call if_changed,vdsomunge)
|
||||
|
||||
# Link rule for the .so file, .lds has to be first
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <nvhe/pkvm.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
/* Used by icache_is_vpipt(). */
|
||||
/* Used by icache_is_aliasing(). */
|
||||
unsigned long __icache_flags;
|
||||
|
||||
/* Used by kvm_get_vttbr(). */
|
||||
|
@ -105,28 +105,6 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
|
||||
dsb(ish);
|
||||
isb();
|
||||
|
||||
/*
|
||||
* If the host is running at EL1 and we have a VPIPT I-cache,
|
||||
* then we must perform I-cache maintenance at EL2 in order for
|
||||
* it to have an effect on the guest. Since the guest cannot hit
|
||||
* I-cache lines allocated with a different VMID, we don't need
|
||||
* to worry about junk out of guest reset (we nuke the I-cache on
|
||||
* VMID rollover), but we do need to be careful when remapping
|
||||
* executable pages for the same guest. This can happen when KSM
|
||||
* takes a CoW fault on an executable page, copies the page into
|
||||
* a page that was previously mapped in the guest and then needs
|
||||
* to invalidate the guest view of the I-cache for that page
|
||||
* from EL1. To solve this, we invalidate the entire I-cache when
|
||||
* unmapping a page from a guest if we have a VPIPT I-cache but
|
||||
* the host is running at EL1. As above, we could do better if
|
||||
* we had the VA.
|
||||
*
|
||||
* The moral of this story is: if you have a VPIPT I-cache, then
|
||||
* you should be running with VHE enabled.
|
||||
*/
|
||||
if (icache_is_vpipt())
|
||||
icache_inval_all_pou();
|
||||
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
@ -157,28 +135,6 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
dsb(nsh);
|
||||
isb();
|
||||
|
||||
/*
|
||||
* If the host is running at EL1 and we have a VPIPT I-cache,
|
||||
* then we must perform I-cache maintenance at EL2 in order for
|
||||
* it to have an effect on the guest. Since the guest cannot hit
|
||||
* I-cache lines allocated with a different VMID, we don't need
|
||||
* to worry about junk out of guest reset (we nuke the I-cache on
|
||||
* VMID rollover), but we do need to be careful when remapping
|
||||
* executable pages for the same guest. This can happen when KSM
|
||||
* takes a CoW fault on an executable page, copies the page into
|
||||
* a page that was previously mapped in the guest and then needs
|
||||
* to invalidate the guest view of the I-cache for that page
|
||||
* from EL1. To solve this, we invalidate the entire I-cache when
|
||||
* unmapping a page from a guest if we have a VPIPT I-cache but
|
||||
* the host is running at EL1. As above, we could do better if
|
||||
* we had the VA.
|
||||
*
|
||||
* The moral of this story is: if you have a VPIPT I-cache, then
|
||||
* you should be running with VHE enabled.
|
||||
*/
|
||||
if (icache_is_vpipt())
|
||||
icache_inval_all_pou();
|
||||
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
@ -205,10 +161,6 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
|
||||
dsb(ish);
|
||||
isb();
|
||||
|
||||
/* See the comment in __kvm_tlb_flush_vmid_ipa() */
|
||||
if (icache_is_vpipt())
|
||||
icache_inval_all_pou();
|
||||
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
@ -246,18 +198,5 @@ void __kvm_flush_vm_context(void)
|
||||
/* Same remark as in __tlb_switch_to_guest() */
|
||||
dsb(ish);
|
||||
__tlbi(alle1is);
|
||||
|
||||
/*
|
||||
* VIPT and PIPT caches are not affected by VMID, so no maintenance
|
||||
* is necessary across a VMID rollover.
|
||||
*
|
||||
* VPIPT caches constrain lookup and maintenance to the active VMID,
|
||||
* so we need to invalidate lines with a stale VMID to avoid an ABA
|
||||
* race after multiple rollovers.
|
||||
*
|
||||
*/
|
||||
if (icache_is_vpipt())
|
||||
asm volatile("ic ialluis");
|
||||
|
||||
dsb(ish);
|
||||
}
|
||||
|
@ -216,18 +216,5 @@ void __kvm_flush_vm_context(void)
|
||||
{
|
||||
dsb(ishst);
|
||||
__tlbi(alle1is);
|
||||
|
||||
/*
|
||||
* VIPT and PIPT caches are not affected by VMID, so no maintenance
|
||||
* is necessary across a VMID rollover.
|
||||
*
|
||||
* VPIPT caches constrain lookup and maintenance to the active VMID,
|
||||
* so we need to invalidate lines with a stale VMID to avoid an ABA
|
||||
* race after multiple rollovers.
|
||||
*
|
||||
*/
|
||||
if (icache_is_vpipt())
|
||||
asm volatile("ic ialluis");
|
||||
|
||||
dsb(ish);
|
||||
}
|
||||
|
@ -267,9 +267,8 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
||||
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val = kvm_vcpu_read_pmcr(vcpu) >> ARMV8_PMU_PMCR_N_SHIFT;
|
||||
u64 val = FIELD_GET(ARMV8_PMU_PMCR_N, kvm_vcpu_read_pmcr(vcpu));
|
||||
|
||||
val &= ARMV8_PMU_PMCR_N_MASK;
|
||||
if (val == 0)
|
||||
return BIT(ARMV8_PMU_CYCLE_IDX);
|
||||
else
|
||||
@ -1136,8 +1135,7 @@ u8 kvm_arm_pmu_get_pmuver_limit(void)
|
||||
*/
|
||||
u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0) &
|
||||
~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
|
||||
u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
|
||||
|
||||
return pmcr | ((u64)vcpu->kvm->arch.pmcr_n << ARMV8_PMU_PMCR_N_SHIFT);
|
||||
return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N);
|
||||
}
|
||||
|
@ -877,7 +877,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
|
||||
u64 pmcr, val;
|
||||
|
||||
pmcr = kvm_vcpu_read_pmcr(vcpu);
|
||||
val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
|
||||
val = FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
|
||||
if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) {
|
||||
kvm_inject_undefined(vcpu);
|
||||
return false;
|
||||
@ -1143,7 +1143,7 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 val)
|
||||
{
|
||||
u8 new_n = (val >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
|
||||
u8 new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
mutex_lock(&kvm->arch.config_lock);
|
||||
|
@ -18,13 +18,6 @@
|
||||
* x1 - src
|
||||
*/
|
||||
SYM_FUNC_START(__pi_copy_page)
|
||||
alternative_if ARM64_HAS_NO_HW_PREFETCH
|
||||
// Prefetch three cache lines ahead.
|
||||
prfm pldl1strm, [x1, #128]
|
||||
prfm pldl1strm, [x1, #256]
|
||||
prfm pldl1strm, [x1, #384]
|
||||
alternative_else_nop_endif
|
||||
|
||||
ldp x2, x3, [x1]
|
||||
ldp x4, x5, [x1, #16]
|
||||
ldp x6, x7, [x1, #32]
|
||||
@ -39,10 +32,6 @@ alternative_else_nop_endif
|
||||
1:
|
||||
tst x0, #(PAGE_SIZE - 1)
|
||||
|
||||
alternative_if ARM64_HAS_NO_HW_PREFETCH
|
||||
prfm pldl1strm, [x1, #384]
|
||||
alternative_else_nop_endif
|
||||
|
||||
stnp x2, x3, [x0, #-256]
|
||||
ldp x2, x3, [x1]
|
||||
stnp x4, x5, [x0, #16 - 256]
|
||||
|
@ -52,9 +52,6 @@ u64 vabits_actual __ro_after_init = VA_BITS_MIN;
|
||||
EXPORT_SYMBOL(vabits_actual);
|
||||
#endif
|
||||
|
||||
u64 kimage_vaddr __ro_after_init = (u64)&_text;
|
||||
EXPORT_SYMBOL(kimage_vaddr);
|
||||
|
||||
u64 kimage_voffset __ro_after_init;
|
||||
EXPORT_SYMBOL(kimage_voffset);
|
||||
|
||||
@ -674,6 +671,9 @@ static int __init map_entry_trampoline(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!arm64_kernel_unmapped_at_el0())
|
||||
return 0;
|
||||
|
||||
pgprot_t prot = kernel_exec_prot();
|
||||
phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
|
||||
|
||||
|
@ -37,10 +37,10 @@ HAS_GIC_PRIO_MASKING
|
||||
HAS_GIC_PRIO_RELAXED_SYNC
|
||||
HAS_HCX
|
||||
HAS_LDAPR
|
||||
HAS_LPA2
|
||||
HAS_LSE_ATOMICS
|
||||
HAS_MOPS
|
||||
HAS_NESTED_VIRT
|
||||
HAS_NO_HW_PREFETCH
|
||||
HAS_PAN
|
||||
HAS_S1PIE
|
||||
HAS_RAS_EXTN
|
||||
|
@ -1002,6 +1002,27 @@ UnsignedEnum 3:0 BT
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_AA64PFR2_EL1 3 0 0 4 2
|
||||
Res0 63:36
|
||||
UnsignedEnum 35:32 FPMR
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Res0 31:12
|
||||
UnsignedEnum 11:8 MTEFAR
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 7:4 MTESTOREONLY
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 3:0 MTEPERM
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4
|
||||
Res0 63:60
|
||||
UnsignedEnum 59:56 F64MM
|
||||
@ -1058,7 +1079,11 @@ UnsignedEnum 63 FA64
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
Res0 62:60
|
||||
Res0 62:61
|
||||
UnsignedEnum 60 LUTv2
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 59:56 SMEver
|
||||
0b0000 SME
|
||||
0b0001 SME2
|
||||
@ -1086,7 +1111,14 @@ UnsignedEnum 42 F16F16
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
Res0 41:40
|
||||
UnsignedEnum 41 F8F16
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 40 F8F32
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 39:36 I8I32
|
||||
0b0000 NI
|
||||
0b1111 IMP
|
||||
@ -1107,7 +1139,49 @@ UnsignedEnum 32 F32F32
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
Res0 31:0
|
||||
Res0 31
|
||||
UnsignedEnum 30 SF8FMA
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 29 SF8DP4
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 28 SF8DP2
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
Res0 27:0
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_AA64FPFR0_EL1 3 0 0 4 7
|
||||
Res0 63:32
|
||||
UnsignedEnum 31 F8CVT
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 30 F8FMA
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 29 F8DP4
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 28 F8DP2
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
Res0 27:2
|
||||
UnsignedEnum 1 F8E4M3
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 0 F8E5M2
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_AA64DFR0_EL1 3 0 0 5 0
|
||||
@ -1115,7 +1189,10 @@ Enum 63:60 HPMN0
|
||||
0b0000 UNPREDICTABLE
|
||||
0b0001 DEF
|
||||
EndEnum
|
||||
Res0 59:56
|
||||
UnsignedEnum 59:56 ExtTrcBuff
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 55:52 BRBE
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
@ -1327,6 +1404,7 @@ UnsignedEnum 11:8 API
|
||||
0b0011 PAuth2
|
||||
0b0100 FPAC
|
||||
0b0101 FPACCOMBINE
|
||||
0b0110 PAuth_LR
|
||||
EndEnum
|
||||
UnsignedEnum 7:4 APA
|
||||
0b0000 NI
|
||||
@ -1335,6 +1413,7 @@ UnsignedEnum 7:4 APA
|
||||
0b0011 PAuth2
|
||||
0b0100 FPAC
|
||||
0b0101 FPACCOMBINE
|
||||
0b0110 PAuth_LR
|
||||
EndEnum
|
||||
UnsignedEnum 3:0 DPB
|
||||
0b0000 NI
|
||||
@ -1344,7 +1423,14 @@ EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2
|
||||
Res0 63:56
|
||||
UnsignedEnum 63:60 ATS1A
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 59:56 LUT
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 55:52 CSSC
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
@ -1353,7 +1439,19 @@ UnsignedEnum 51:48 RPRFM
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Res0 47:32
|
||||
Res0 47:44
|
||||
UnsignedEnum 43:40 PRFMSLC
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 39:36 SYSINSTR_128
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 35:32 SYSREG_128
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 31:28 CLRBHB
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
@ -1377,6 +1475,7 @@ UnsignedEnum 15:12 APA3
|
||||
0b0011 PAuth2
|
||||
0b0100 FPAC
|
||||
0b0101 FPACCOMBINE
|
||||
0b0110 PAuth_LR
|
||||
EndEnum
|
||||
UnsignedEnum 11:8 GPA3
|
||||
0b0000 NI
|
||||
@ -1392,6 +1491,23 @@ UnsignedEnum 3:0 WFxT
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_AA64ISAR3_EL1 3 0 0 6 3
|
||||
Res0 63:12
|
||||
UnsignedEnum 11:8 TLBIW
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 7:4 FAMINMAX
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
UnsignedEnum 3:0 CPA
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
0b0010 CPA2
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_AA64MMFR0_EL1 3 0 0 7 0
|
||||
UnsignedEnum 63:60 ECV
|
||||
0b0000 NI
|
||||
@ -1680,7 +1796,8 @@ Field 63 TIDCP
|
||||
Field 62 SPINTMASK
|
||||
Field 61 NMI
|
||||
Field 60 EnTP2
|
||||
Res0 59:58
|
||||
Field 59 TCSO
|
||||
Field 58 TCSO0
|
||||
Field 57 EPAN
|
||||
Field 56 EnALS
|
||||
Field 55 EnAS0
|
||||
@ -1709,7 +1826,7 @@ EndEnum
|
||||
Field 37 ITFSB
|
||||
Field 36 BT1
|
||||
Field 35 BT0
|
||||
Res0 34
|
||||
Field 34 EnFPM
|
||||
Field 33 MSCEn
|
||||
Field 32 CMOW
|
||||
Field 31 EnIA
|
||||
@ -1747,7 +1864,8 @@ Field 0 M
|
||||
EndSysreg
|
||||
|
||||
SysregFields CPACR_ELx
|
||||
Res0 63:29
|
||||
Res0 63:30
|
||||
Field 29 E0POE
|
||||
Field 28 TTA
|
||||
Res0 27:26
|
||||
Field 25:24 SMEN
|
||||
@ -1790,6 +1908,41 @@ Sysreg SMCR_EL1 3 0 1 2 6
|
||||
Fields SMCR_ELx
|
||||
EndSysreg
|
||||
|
||||
SysregFields GCSCR_ELx
|
||||
Res0 63:10
|
||||
Field 9 STREn
|
||||
Field 8 PUSHMEn
|
||||
Res0 7
|
||||
Field 6 EXLOCKEN
|
||||
Field 5 RVCHKEN
|
||||
Res0 4:1
|
||||
Field 0 PCRSEL
|
||||
EndSysregFields
|
||||
|
||||
Sysreg GCSCR_EL1 3 0 2 5 0
|
||||
Fields GCSCR_ELx
|
||||
EndSysreg
|
||||
|
||||
SysregFields GCSPR_ELx
|
||||
Field 63:3 PTR
|
||||
Res0 2:0
|
||||
EndSysregFields
|
||||
|
||||
Sysreg GCSPR_EL1 3 0 2 5 1
|
||||
Fields GCSPR_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg GCSCRE0_EL1 3 0 2 5 2
|
||||
Res0 63:11
|
||||
Field 10 nTR
|
||||
Field 9 STREn
|
||||
Field 8 PUSHMEn
|
||||
Res0 7:6
|
||||
Field 5 RVCHKEN
|
||||
Res0 4:1
|
||||
Field 0 PCRSEL
|
||||
EndSysreg
|
||||
|
||||
Sysreg ALLINT 3 0 4 3 0
|
||||
Res0 63:14
|
||||
Field 13 ALLINT
|
||||
@ -1933,10 +2086,18 @@ Sysreg CONTEXTIDR_EL1 3 0 13 0 1
|
||||
Fields CONTEXTIDR_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg RCWSMASK_EL1 3 0 13 0 3
|
||||
Field 63:0 RCWSMASK
|
||||
EndSysreg
|
||||
|
||||
Sysreg TPIDR_EL1 3 0 13 0 4
|
||||
Field 63:0 ThreadID
|
||||
EndSysreg
|
||||
|
||||
Sysreg RCWMASK_EL1 3 0 13 0 6
|
||||
Field 63:0 RCWMASK
|
||||
EndSysreg
|
||||
|
||||
Sysreg SCXTNUM_EL1 3 0 13 0 7
|
||||
Field 63:0 SoftwareContextNumber
|
||||
EndSysreg
|
||||
@ -2004,9 +2165,10 @@ Field 27:24 CWG
|
||||
Field 23:20 ERG
|
||||
Field 19:16 DminLine
|
||||
Enum 15:14 L1Ip
|
||||
0b00 VPIPT
|
||||
# This was named as VPIPT in the ARM but now documented as reserved
|
||||
0b00 RESERVED_VPIPT
|
||||
# This is named as AIVIVT in the ARM but documented as reserved
|
||||
0b01 RESERVED
|
||||
0b01 RESERVED_AIVIVT
|
||||
0b10 VIPT
|
||||
0b11 PIPT
|
||||
EndEnum
|
||||
@ -2020,12 +2182,39 @@ Field 4 DZP
|
||||
Field 3:0 BS
|
||||
EndSysreg
|
||||
|
||||
Sysreg GCSPR_EL0 3 3 2 5 1
|
||||
Fields GCSPR_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg SVCR 3 3 4 2 2
|
||||
Res0 63:2
|
||||
Field 1 ZA
|
||||
Field 0 SM
|
||||
EndSysreg
|
||||
|
||||
Sysreg FPMR 3 3 4 4 2
|
||||
Res0 63:38
|
||||
Field 37:32 LSCALE2
|
||||
Field 31:24 NSCALE
|
||||
Res0 23
|
||||
Field 22:16 LSCALE
|
||||
Field 15 OSC
|
||||
Field 14 OSM
|
||||
Res0 13:9
|
||||
UnsignedEnum 8:6 F8D
|
||||
0b000 E5M2
|
||||
0b001 E4M3
|
||||
EndEnum
|
||||
UnsignedEnum 5:3 F8S2
|
||||
0b000 E5M2
|
||||
0b001 E4M3
|
||||
EndEnum
|
||||
UnsignedEnum 2:0 F8S1
|
||||
0b000 E5M2
|
||||
0b001 E4M3
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
SysregFields HFGxTR_EL2
|
||||
Field 63 nAMAIR2_EL1
|
||||
Field 62 nMAIR2_EL1
|
||||
@ -2102,7 +2291,9 @@ Fields HFGxTR_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg HFGITR_EL2 3 4 1 1 6
|
||||
Res0 63:61
|
||||
Res0 63
|
||||
Field 62 ATS1E1A
|
||||
Res0 61
|
||||
Field 60 COSPRCTX
|
||||
Field 59 nGCSEPP
|
||||
Field 58 nGCSSTR_EL1
|
||||
@ -2295,12 +2486,57 @@ Field 1 DBGBVRn_EL1
|
||||
Field 0 DBGBCRn_EL1
|
||||
EndSysreg
|
||||
|
||||
Sysreg HAFGRTR_EL2 3 4 3 1 6
|
||||
Res0 63:50
|
||||
Field 49 AMEVTYPER115_EL0
|
||||
Field 48 AMEVCNTR115_EL0
|
||||
Field 47 AMEVTYPER114_EL0
|
||||
Field 46 AMEVCNTR114_EL0
|
||||
Field 45 AMEVTYPER113_EL0
|
||||
Field 44 AMEVCNTR113_EL0
|
||||
Field 43 AMEVTYPER112_EL0
|
||||
Field 42 AMEVCNTR112_EL0
|
||||
Field 41 AMEVTYPER111_EL0
|
||||
Field 40 AMEVCNTR111_EL0
|
||||
Field 39 AMEVTYPER110_EL0
|
||||
Field 38 AMEVCNTR110_EL0
|
||||
Field 37 AMEVTYPER19_EL0
|
||||
Field 36 AMEVCNTR19_EL0
|
||||
Field 35 AMEVTYPER18_EL0
|
||||
Field 34 AMEVCNTR18_EL0
|
||||
Field 33 AMEVTYPER17_EL0
|
||||
Field 32 AMEVCNTR17_EL0
|
||||
Field 31 AMEVTYPER16_EL0
|
||||
Field 30 AMEVCNTR16_EL0
|
||||
Field 29 AMEVTYPER15_EL0
|
||||
Field 28 AMEVCNTR15_EL0
|
||||
Field 27 AMEVTYPER14_EL0
|
||||
Field 26 AMEVCNTR14_EL0
|
||||
Field 25 AMEVTYPER13_EL0
|
||||
Field 24 AMEVCNTR13_EL0
|
||||
Field 23 AMEVTYPER12_EL0
|
||||
Field 22 AMEVCNTR12_EL0
|
||||
Field 21 AMEVTYPER11_EL0
|
||||
Field 20 AMEVCNTR11_EL0
|
||||
Field 19 AMEVTYPER10_EL0
|
||||
Field 18 AMEVCNTR10_EL0
|
||||
Field 17 AMCNTEN1
|
||||
Res0 16:5
|
||||
Field 4 AMEVCNTR03_EL0
|
||||
Field 3 AMEVCNTR02_EL0
|
||||
Field 2 AMEVCNTR01_EL0
|
||||
Field 1 AMEVCNTR00_EL0
|
||||
Field 0 AMCNTEN0
|
||||
EndSysreg
|
||||
|
||||
Sysreg ZCR_EL2 3 4 1 2 0
|
||||
Fields ZCR_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg HCRX_EL2 3 4 1 2 2
|
||||
Res0 63:23
|
||||
Res0 63:25
|
||||
Field 24 PACMEn
|
||||
Field 23 EnFPM
|
||||
Field 22 GCSEn
|
||||
Field 21 EnIDCP128
|
||||
Field 20 EnSDERR
|
||||
@ -2348,6 +2584,14 @@ Sysreg SMCR_EL2 3 4 1 2 6
|
||||
Fields SMCR_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg GCSCR_EL2 3 4 2 5 0
|
||||
Fields GCSCR_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg GCSPR_EL2 3 4 2 5 1
|
||||
Fields GCSPR_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg DACR32_EL2 3 4 3 0 0
|
||||
Res0 63:32
|
||||
Field 31:30 D15
|
||||
@ -2407,6 +2651,14 @@ Sysreg SMCR_EL12 3 5 1 2 6
|
||||
Fields SMCR_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg GCSCR_EL12 3 5 2 5 0
|
||||
Fields GCSCR_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg GCSPR_EL12 3 5 2 5 1
|
||||
Fields GCSPR_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg FAR_EL12 3 5 6 0 0
|
||||
Field 63:0 ADDR
|
||||
EndSysreg
|
||||
@ -2471,6 +2723,33 @@ Field 1 PIE
|
||||
Field 0 PnCH
|
||||
EndSysreg
|
||||
|
||||
SysregFields MAIR2_ELx
|
||||
Field 63:56 Attr7
|
||||
Field 55:48 Attr6
|
||||
Field 47:40 Attr5
|
||||
Field 39:32 Attr4
|
||||
Field 31:24 Attr3
|
||||
Field 23:16 Attr2
|
||||
Field 15:8 Attr1
|
||||
Field 7:0 Attr0
|
||||
EndSysregFields
|
||||
|
||||
Sysreg MAIR2_EL1 3 0 10 2 1
|
||||
Fields MAIR2_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg MAIR2_EL2 3 4 10 1 1
|
||||
Fields MAIR2_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg AMAIR2_EL1 3 0 10 3 1
|
||||
Field 63:0 ImpDef
|
||||
EndSysreg
|
||||
|
||||
Sysreg AMAIR2_EL2 3 4 10 3 1
|
||||
Field 63:0 ImpDef
|
||||
EndSysreg
|
||||
|
||||
SysregFields PIRx_ELx
|
||||
Field 63:60 Perm15
|
||||
Field 59:56 Perm14
|
||||
@ -2510,6 +2789,26 @@ Sysreg PIR_EL2 3 4 10 2 3
|
||||
Fields PIRx_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg POR_EL0 3 3 10 2 4
|
||||
Fields PIRx_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg POR_EL1 3 0 10 2 4
|
||||
Fields PIRx_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg POR_EL12 3 5 10 2 4
|
||||
Fields PIRx_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg S2POR_EL1 3 0 10 2 5
|
||||
Fields PIRx_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg S2PIR_EL2 3 4 10 2 5
|
||||
Fields PIRx_ELx
|
||||
EndSysreg
|
||||
|
||||
Sysreg LORSA_EL1 3 0 10 4 0
|
||||
Res0 63:52
|
||||
Field 51:16 SA
|
||||
|
@ -144,7 +144,7 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid)
|
||||
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
|
||||
EXPORT_SYMBOL(__per_cpu_offset);
|
||||
|
||||
static int __init early_cpu_to_node(int cpu)
|
||||
int __init early_cpu_to_node(int cpu)
|
||||
{
|
||||
return cpu_to_node_map[cpu];
|
||||
}
|
||||
|
@ -5,8 +5,8 @@
|
||||
# EFI_ZBOOT_FORWARD_CFI
|
||||
|
||||
quiet_cmd_copy_and_pad = PAD $@
|
||||
cmd_copy_and_pad = cp $< $@ && \
|
||||
truncate -s $(shell hexdump -s16 -n4 -e '"%u"' $<) $@
|
||||
cmd_copy_and_pad = cp $< $@; \
|
||||
truncate -s $$(hexdump -s16 -n4 -e '"%u"' $<) $@
|
||||
|
||||
# Pad the file to the size of the uncompressed image in memory, including BSS
|
||||
$(obj)/vmlinux.bin: $(obj)/$(EFI_ZBOOT_PAYLOAD) FORCE
|
||||
|
@ -11,8 +11,6 @@
|
||||
#include <linux/types.h>
|
||||
|
||||
/* PCIe device related definition. */
|
||||
#define PCI_VENDOR_ID_ALIBABA 0x1ded
|
||||
|
||||
#define ERDMA_PCI_WIDTH 64
|
||||
#define ERDMA_FUNC_BAR 0
|
||||
#define ERDMA_MISX_BAR 2
|
||||
|
@ -598,3 +598,15 @@ int pci_write_config_dword(const struct pci_dev *dev, int where,
|
||||
return pci_bus_write_config_dword(dev->bus, dev->devfn, where, val);
|
||||
}
|
||||
EXPORT_SYMBOL(pci_write_config_dword);
|
||||
|
||||
void pci_clear_and_set_config_dword(const struct pci_dev *dev, int pos,
|
||||
u32 clear, u32 set)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
pci_read_config_dword(dev, pos, &val);
|
||||
val &= ~clear;
|
||||
val |= set;
|
||||
pci_write_config_dword(dev, pos, val);
|
||||
}
|
||||
EXPORT_SYMBOL(pci_clear_and_set_config_dword);
|
||||
|
@ -426,17 +426,6 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
|
||||
}
|
||||
}
|
||||
|
||||
static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos,
|
||||
u32 clear, u32 set)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
pci_read_config_dword(pdev, pos, &val);
|
||||
val &= ~clear;
|
||||
val |= set;
|
||||
pci_write_config_dword(pdev, pos, val);
|
||||
}
|
||||
|
||||
/* Calculate L1.2 PM substate timing parameters */
|
||||
static void aspm_calc_l12_info(struct pcie_link_state *link,
|
||||
u32 parent_l1ss_cap, u32 child_l1ss_cap)
|
||||
@ -501,10 +490,12 @@ static void aspm_calc_l12_info(struct pcie_link_state *link,
|
||||
cl1_2_enables = cctl1 & PCI_L1SS_CTL1_L1_2_MASK;
|
||||
|
||||
if (pl1_2_enables || cl1_2_enables) {
|
||||
pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1_2_MASK, 0);
|
||||
pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1_2_MASK, 0);
|
||||
pci_clear_and_set_config_dword(child,
|
||||
child->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1_2_MASK, 0);
|
||||
pci_clear_and_set_config_dword(parent,
|
||||
parent->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1_2_MASK, 0);
|
||||
}
|
||||
|
||||
/* Program T_POWER_ON times in both ports */
|
||||
@ -512,22 +503,26 @@ static void aspm_calc_l12_info(struct pcie_link_state *link,
|
||||
pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2);
|
||||
|
||||
/* Program Common_Mode_Restore_Time in upstream device */
|
||||
pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1);
|
||||
pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1);
|
||||
|
||||
/* Program LTR_L1.2_THRESHOLD time in both ports */
|
||||
pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
|
||||
PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1);
|
||||
pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
|
||||
PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1);
|
||||
pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
|
||||
PCI_L1SS_CTL1_LTR_L12_TH_SCALE,
|
||||
ctl1);
|
||||
pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
|
||||
PCI_L1SS_CTL1_LTR_L12_TH_SCALE,
|
||||
ctl1);
|
||||
|
||||
if (pl1_2_enables || cl1_2_enables) {
|
||||
pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, 0,
|
||||
pl1_2_enables);
|
||||
pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, 0,
|
||||
cl1_2_enables);
|
||||
pci_clear_and_set_config_dword(parent,
|
||||
parent->l1ss + PCI_L1SS_CTL1, 0,
|
||||
pl1_2_enables);
|
||||
pci_clear_and_set_config_dword(child,
|
||||
child->l1ss + PCI_L1SS_CTL1, 0,
|
||||
cl1_2_enables);
|
||||
}
|
||||
}
|
||||
|
||||
@ -687,10 +682,10 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
|
||||
*/
|
||||
|
||||
/* Disable all L1 substates */
|
||||
pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1SS_MASK, 0);
|
||||
pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1SS_MASK, 0);
|
||||
pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1SS_MASK, 0);
|
||||
pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1SS_MASK, 0);
|
||||
/*
|
||||
* If needed, disable L1, and it gets enabled later
|
||||
* in pcie_config_aspm_link().
|
||||
@ -713,10 +708,10 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
|
||||
val |= PCI_L1SS_CTL1_PCIPM_L1_2;
|
||||
|
||||
/* Enable what we need to enable */
|
||||
pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1SS_MASK, val);
|
||||
pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1SS_MASK, val);
|
||||
pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1SS_MASK, val);
|
||||
pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1,
|
||||
PCI_L1SS_CTL1_L1SS_MASK, val);
|
||||
}
|
||||
|
||||
static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val)
|
||||
|
@ -217,6 +217,13 @@ config MARVELL_CN10K_DDR_PMU
|
||||
Enable perf support for Marvell DDR Performance monitoring
|
||||
event on CN10K platform.
|
||||
|
||||
config DWC_PCIE_PMU
|
||||
tristate "Synopsys DesignWare PCIe PMU"
|
||||
depends on PCI
|
||||
help
|
||||
Enable perf support for Synopsys DesignWare PCIe PMU Performance
|
||||
monitoring event on platform including the Alibaba Yitian 710.
|
||||
|
||||
source "drivers/perf/arm_cspmu/Kconfig"
|
||||
|
||||
source "drivers/perf/amlogic/Kconfig"
|
||||
|
@ -23,6 +23,7 @@ obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
|
||||
obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
|
||||
obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
|
||||
obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
|
||||
obj-$(CONFIG_DWC_PCIE_PMU) += dwc_pcie_pmu.o
|
||||
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
|
||||
obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
|
||||
obj-$(CONFIG_CXL_PMU) += cxl_pmu.o
|
||||
|
@ -524,8 +524,10 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event,
|
||||
{
|
||||
unsigned long config_base = 0;
|
||||
|
||||
if (!attr->exclude_guest)
|
||||
return -EINVAL;
|
||||
if (!attr->exclude_guest) {
|
||||
pr_debug("ARM performance counters do not support mode exclusion\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
if (!attr->exclude_kernel)
|
||||
config_base |= M1_PMU_CFG_COUNT_KERNEL;
|
||||
if (!attr->exclude_user)
|
||||
|
@ -811,7 +811,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
|
||||
#define CMN_EVENT_HNF_OCC(_model, _name, _event) \
|
||||
CMN_EVENT_HN_OCC(_model, hnf_##_name, CMN_TYPE_HNF, _event)
|
||||
#define CMN_EVENT_HNF_CLS(_model, _name, _event) \
|
||||
CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNS, _event)
|
||||
CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNF, _event)
|
||||
#define CMN_EVENT_HNF_SNT(_model, _name, _event) \
|
||||
CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event)
|
||||
|
||||
|
@ -371,7 +371,7 @@ static inline u32 dsu_pmu_get_reset_overflow(void)
|
||||
return __dsu_pmu_get_reset_overflow();
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* dsu_pmu_set_event_period: Set the period for the counter.
|
||||
*
|
||||
* All DSU PMU event counters, except the cycle counter are 32bit
|
||||
@ -602,7 +602,7 @@ static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev)
|
||||
return dsu_pmu;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster
|
||||
* from device tree.
|
||||
*/
|
||||
@ -632,7 +632,7 @@ static int dsu_pmu_dt_get_cpus(struct device *dev, cpumask_t *mask)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* dsu_pmu_acpi_get_cpus: Get the list of CPUs in the cluster
|
||||
* from ACPI.
|
||||
*/
|
||||
|
@ -445,7 +445,7 @@ __hw_perf_event_init(struct perf_event *event)
|
||||
{
|
||||
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int mapping;
|
||||
int mapping, ret;
|
||||
|
||||
hwc->flags = 0;
|
||||
mapping = armpmu->map_event(event);
|
||||
@ -470,11 +470,10 @@ __hw_perf_event_init(struct perf_event *event)
|
||||
/*
|
||||
* Check whether we need to exclude the counter from certain modes.
|
||||
*/
|
||||
if (armpmu->set_event_filter &&
|
||||
armpmu->set_event_filter(hwc, &event->attr)) {
|
||||
pr_debug("ARM performance counters do not support "
|
||||
"mode exclusion\n");
|
||||
return -EOPNOTSUPP;
|
||||
if (armpmu->set_event_filter) {
|
||||
ret = armpmu->set_event_filter(hwc, &event->attr);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -893,7 +892,6 @@ struct arm_pmu *armpmu_alloc(void)
|
||||
struct pmu_hw_events *events;
|
||||
|
||||
events = per_cpu_ptr(pmu->hw_events, cpu);
|
||||
raw_spin_lock_init(&events->pmu_lock);
|
||||
events->percpu_pmu = pmu;
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <clocksource/arm_arch_timer.h>
|
||||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/perf/arm_pmu.h>
|
||||
@ -169,7 +170,11 @@ armv8pmu_events_sysfs_show(struct device *dev,
|
||||
PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config)
|
||||
|
||||
static struct attribute *armv8_pmuv3_event_attrs[] = {
|
||||
ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
|
||||
/*
|
||||
* Don't expose the sw_incr event in /sys. It's not usable as writes to
|
||||
* PMSWINC_EL0 will trap as PMUSERENR.{SW,EN}=={0,0} and event rotation
|
||||
* means we don't have a fixed event<->counter relationship regardless.
|
||||
*/
|
||||
ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
|
||||
ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
|
||||
ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
|
||||
@ -294,26 +299,66 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = {
|
||||
.is_visible = armv8pmu_event_attr_is_visible,
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-15");
|
||||
PMU_FORMAT_ATTR(long, "config1:0");
|
||||
PMU_FORMAT_ATTR(rdpmc, "config1:1");
|
||||
/* User ABI */
|
||||
#define ATTR_CFG_FLD_event_CFG config
|
||||
#define ATTR_CFG_FLD_event_LO 0
|
||||
#define ATTR_CFG_FLD_event_HI 15
|
||||
#define ATTR_CFG_FLD_long_CFG config1
|
||||
#define ATTR_CFG_FLD_long_LO 0
|
||||
#define ATTR_CFG_FLD_long_HI 0
|
||||
#define ATTR_CFG_FLD_rdpmc_CFG config1
|
||||
#define ATTR_CFG_FLD_rdpmc_LO 1
|
||||
#define ATTR_CFG_FLD_rdpmc_HI 1
|
||||
#define ATTR_CFG_FLD_threshold_count_CFG config1 /* PMEVTYPER.TC[0] */
|
||||
#define ATTR_CFG_FLD_threshold_count_LO 2
|
||||
#define ATTR_CFG_FLD_threshold_count_HI 2
|
||||
#define ATTR_CFG_FLD_threshold_compare_CFG config1 /* PMEVTYPER.TC[2:1] */
|
||||
#define ATTR_CFG_FLD_threshold_compare_LO 3
|
||||
#define ATTR_CFG_FLD_threshold_compare_HI 4
|
||||
#define ATTR_CFG_FLD_threshold_CFG config1 /* PMEVTYPER.TH */
|
||||
#define ATTR_CFG_FLD_threshold_LO 5
|
||||
#define ATTR_CFG_FLD_threshold_HI 16
|
||||
|
||||
GEN_PMU_FORMAT_ATTR(event);
|
||||
GEN_PMU_FORMAT_ATTR(long);
|
||||
GEN_PMU_FORMAT_ATTR(rdpmc);
|
||||
GEN_PMU_FORMAT_ATTR(threshold_count);
|
||||
GEN_PMU_FORMAT_ATTR(threshold_compare);
|
||||
GEN_PMU_FORMAT_ATTR(threshold);
|
||||
|
||||
static int sysctl_perf_user_access __read_mostly;
|
||||
|
||||
static inline bool armv8pmu_event_is_64bit(struct perf_event *event)
|
||||
static bool armv8pmu_event_is_64bit(struct perf_event *event)
|
||||
{
|
||||
return event->attr.config1 & 0x1;
|
||||
return ATTR_CFG_GET_FLD(&event->attr, long);
|
||||
}
|
||||
|
||||
static inline bool armv8pmu_event_want_user_access(struct perf_event *event)
|
||||
static bool armv8pmu_event_want_user_access(struct perf_event *event)
|
||||
{
|
||||
return event->attr.config1 & 0x2;
|
||||
return ATTR_CFG_GET_FLD(&event->attr, rdpmc);
|
||||
}
|
||||
|
||||
static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
|
||||
{
|
||||
u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare);
|
||||
u8 th_count = ATTR_CFG_GET_FLD(attr, threshold_count);
|
||||
|
||||
/*
|
||||
* The count bit is always the bottom bit of the full control field, and
|
||||
* the comparison is the upper two bits, but it's not explicitly
|
||||
* labelled in the Arm ARM. For the Perf interface we split it into two
|
||||
* fields, so reconstruct it here.
|
||||
*/
|
||||
return (th_compare << 1) | th_count;
|
||||
}
|
||||
|
||||
static struct attribute *armv8_pmuv3_format_attrs[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_long.attr,
|
||||
&format_attr_rdpmc.attr,
|
||||
&format_attr_threshold.attr,
|
||||
&format_attr_threshold_compare.attr,
|
||||
&format_attr_threshold_count.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -327,7 +372,7 @@ static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
|
||||
u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK;
|
||||
u32 slots = FIELD_GET(ARMV8_PMU_SLOTS, cpu_pmu->reg_pmmir);
|
||||
|
||||
return sysfs_emit(page, "0x%08x\n", slots);
|
||||
}
|
||||
@ -339,8 +384,7 @@ static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr,
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
|
||||
u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT)
|
||||
& ARMV8_PMU_BUS_SLOTS_MASK;
|
||||
u32 bus_slots = FIELD_GET(ARMV8_PMU_BUS_SLOTS, cpu_pmu->reg_pmmir);
|
||||
|
||||
return sysfs_emit(page, "0x%08x\n", bus_slots);
|
||||
}
|
||||
@ -352,8 +396,7 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
|
||||
u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT)
|
||||
& ARMV8_PMU_BUS_WIDTH_MASK;
|
||||
u32 bus_width = FIELD_GET(ARMV8_PMU_BUS_WIDTH, cpu_pmu->reg_pmmir);
|
||||
u32 val = 0;
|
||||
|
||||
/* Encoded as Log2(number of bytes), plus one */
|
||||
@ -365,10 +408,38 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
|
||||
|
||||
static DEVICE_ATTR_RO(bus_width);
|
||||
|
||||
static u32 threshold_max(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
/*
|
||||
* PMMIR.THWIDTH is readable and non-zero on aarch32, but it would be
|
||||
* impossible to write the threshold in the upper 32 bits of PMEVTYPER.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_ARM))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* The largest value that can be written to PMEVTYPER<n>_EL0.TH is
|
||||
* (2 ^ PMMIR.THWIDTH) - 1.
|
||||
*/
|
||||
return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
|
||||
}
|
||||
|
||||
static ssize_t threshold_max_show(struct device *dev,
|
||||
struct device_attribute *attr, char *page)
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
|
||||
|
||||
return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(threshold_max);
|
||||
|
||||
static struct attribute *armv8_pmuv3_caps_attrs[] = {
|
||||
&dev_attr_slots.attr,
|
||||
&dev_attr_bus_slots.attr,
|
||||
&dev_attr_bus_width.attr,
|
||||
&dev_attr_threshold_max.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -397,7 +468,7 @@ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu)
|
||||
return (IS_ENABLED(CONFIG_ARM64) && is_pmuv3p5(cpu_pmu->pmuver));
|
||||
}
|
||||
|
||||
static inline bool armv8pmu_event_has_user_read(struct perf_event *event)
|
||||
static bool armv8pmu_event_has_user_read(struct perf_event *event)
|
||||
{
|
||||
return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT;
|
||||
}
|
||||
@ -407,7 +478,7 @@ static inline bool armv8pmu_event_has_user_read(struct perf_event *event)
|
||||
* except when we have allocated the 64bit cycle counter (for CPU
|
||||
* cycles event) or when user space counter access is enabled.
|
||||
*/
|
||||
static inline bool armv8pmu_event_is_chained(struct perf_event *event)
|
||||
static bool armv8pmu_event_is_chained(struct perf_event *event)
|
||||
{
|
||||
int idx = event->hw.idx;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
@ -428,36 +499,36 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event)
|
||||
#define ARMV8_IDX_TO_COUNTER(x) \
|
||||
(((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
|
||||
|
||||
static inline u64 armv8pmu_pmcr_read(void)
|
||||
static u64 armv8pmu_pmcr_read(void)
|
||||
{
|
||||
return read_pmcr();
|
||||
}
|
||||
|
||||
static inline void armv8pmu_pmcr_write(u64 val)
|
||||
static void armv8pmu_pmcr_write(u64 val)
|
||||
{
|
||||
val &= ARMV8_PMU_PMCR_MASK;
|
||||
isb();
|
||||
write_pmcr(val);
|
||||
}
|
||||
|
||||
static inline int armv8pmu_has_overflowed(u32 pmovsr)
|
||||
static int armv8pmu_has_overflowed(u32 pmovsr)
|
||||
{
|
||||
return pmovsr & ARMV8_PMU_OVERFLOWED_MASK;
|
||||
}
|
||||
|
||||
static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
|
||||
static int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
|
||||
{
|
||||
return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
|
||||
}
|
||||
|
||||
static inline u64 armv8pmu_read_evcntr(int idx)
|
||||
static u64 armv8pmu_read_evcntr(int idx)
|
||||
{
|
||||
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
|
||||
|
||||
return read_pmevcntrn(counter);
|
||||
}
|
||||
|
||||
static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
|
||||
static u64 armv8pmu_read_hw_counter(struct perf_event *event)
|
||||
{
|
||||
int idx = event->hw.idx;
|
||||
u64 val = armv8pmu_read_evcntr(idx);
|
||||
@ -519,14 +590,14 @@ static u64 armv8pmu_read_counter(struct perf_event *event)
|
||||
return armv8pmu_unbias_long_counter(event, value);
|
||||
}
|
||||
|
||||
static inline void armv8pmu_write_evcntr(int idx, u64 value)
|
||||
static void armv8pmu_write_evcntr(int idx, u64 value)
|
||||
{
|
||||
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
|
||||
|
||||
write_pmevcntrn(counter, value);
|
||||
}
|
||||
|
||||
static inline void armv8pmu_write_hw_counter(struct perf_event *event,
|
||||
static void armv8pmu_write_hw_counter(struct perf_event *event,
|
||||
u64 value)
|
||||
{
|
||||
int idx = event->hw.idx;
|
||||
@ -552,15 +623,22 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
|
||||
armv8pmu_write_hw_counter(event, value);
|
||||
}
|
||||
|
||||
static inline void armv8pmu_write_evtype(int idx, u32 val)
|
||||
static void armv8pmu_write_evtype(int idx, unsigned long val)
|
||||
{
|
||||
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
|
||||
unsigned long mask = ARMV8_PMU_EVTYPE_EVENT |
|
||||
ARMV8_PMU_INCLUDE_EL2 |
|
||||
ARMV8_PMU_EXCLUDE_EL0 |
|
||||
ARMV8_PMU_EXCLUDE_EL1;
|
||||
|
||||
val &= ARMV8_PMU_EVTYPE_MASK;
|
||||
if (IS_ENABLED(CONFIG_ARM64))
|
||||
mask |= ARMV8_PMU_EVTYPE_TC | ARMV8_PMU_EVTYPE_TH;
|
||||
|
||||
val &= mask;
|
||||
write_pmevtypern(counter, val);
|
||||
}
|
||||
|
||||
static inline void armv8pmu_write_event_type(struct perf_event *event)
|
||||
static void armv8pmu_write_event_type(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
@ -594,7 +672,7 @@ static u32 armv8pmu_event_cnten_mask(struct perf_event *event)
|
||||
return mask;
|
||||
}
|
||||
|
||||
static inline void armv8pmu_enable_counter(u32 mask)
|
||||
static void armv8pmu_enable_counter(u32 mask)
|
||||
{
|
||||
/*
|
||||
* Make sure event configuration register writes are visible before we
|
||||
@ -604,7 +682,7 @@ static inline void armv8pmu_enable_counter(u32 mask)
|
||||
write_pmcntenset(mask);
|
||||
}
|
||||
|
||||
static inline void armv8pmu_enable_event_counter(struct perf_event *event)
|
||||
static void armv8pmu_enable_event_counter(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_attr *attr = &event->attr;
|
||||
u32 mask = armv8pmu_event_cnten_mask(event);
|
||||
@ -616,7 +694,7 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event)
|
||||
armv8pmu_enable_counter(mask);
|
||||
}
|
||||
|
||||
static inline void armv8pmu_disable_counter(u32 mask)
|
||||
static void armv8pmu_disable_counter(u32 mask)
|
||||
{
|
||||
write_pmcntenclr(mask);
|
||||
/*
|
||||
@ -626,7 +704,7 @@ static inline void armv8pmu_disable_counter(u32 mask)
|
||||
isb();
|
||||
}
|
||||
|
||||
static inline void armv8pmu_disable_event_counter(struct perf_event *event)
|
||||
static void armv8pmu_disable_event_counter(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_attr *attr = &event->attr;
|
||||
u32 mask = armv8pmu_event_cnten_mask(event);
|
||||
@ -638,18 +716,18 @@ static inline void armv8pmu_disable_event_counter(struct perf_event *event)
|
||||
armv8pmu_disable_counter(mask);
|
||||
}
|
||||
|
||||
static inline void armv8pmu_enable_intens(u32 mask)
|
||||
static void armv8pmu_enable_intens(u32 mask)
|
||||
{
|
||||
write_pmintenset(mask);
|
||||
}
|
||||
|
||||
static inline void armv8pmu_enable_event_irq(struct perf_event *event)
|
||||
static void armv8pmu_enable_event_irq(struct perf_event *event)
|
||||
{
|
||||
u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
|
||||
armv8pmu_enable_intens(BIT(counter));
|
||||
}
|
||||
|
||||
static inline void armv8pmu_disable_intens(u32 mask)
|
||||
static void armv8pmu_disable_intens(u32 mask)
|
||||
{
|
||||
write_pmintenclr(mask);
|
||||
isb();
|
||||
@ -658,13 +736,13 @@ static inline void armv8pmu_disable_intens(u32 mask)
|
||||
isb();
|
||||
}
|
||||
|
||||
static inline void armv8pmu_disable_event_irq(struct perf_event *event)
|
||||
static void armv8pmu_disable_event_irq(struct perf_event *event)
|
||||
{
|
||||
u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
|
||||
armv8pmu_disable_intens(BIT(counter));
|
||||
}
|
||||
|
||||
static inline u32 armv8pmu_getreset_flags(void)
|
||||
static u32 armv8pmu_getreset_flags(void)
|
||||
{
|
||||
u32 value;
|
||||
|
||||
@ -672,7 +750,7 @@ static inline u32 armv8pmu_getreset_flags(void)
|
||||
value = read_pmovsclr();
|
||||
|
||||
/* Write to clear flags */
|
||||
value &= ARMV8_PMU_OVSR_MASK;
|
||||
value &= ARMV8_PMU_OVERFLOWED_MASK;
|
||||
write_pmovsclr(value);
|
||||
|
||||
return value;
|
||||
@ -914,9 +992,15 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
|
||||
struct perf_event_attr *attr)
|
||||
{
|
||||
unsigned long config_base = 0;
|
||||
struct perf_event *perf_event = container_of(attr, struct perf_event,
|
||||
attr);
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
|
||||
u32 th;
|
||||
|
||||
if (attr->exclude_idle)
|
||||
return -EPERM;
|
||||
if (attr->exclude_idle) {
|
||||
pr_debug("ARM performance counters do not support mode exclusion\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're running in hyp mode, then we *are* the hypervisor.
|
||||
@ -945,6 +1029,22 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
|
||||
if (attr->exclude_user)
|
||||
config_base |= ARMV8_PMU_EXCLUDE_EL0;
|
||||
|
||||
/*
|
||||
* If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will
|
||||
* be 0 and will also trigger this check, preventing it from being used.
|
||||
*/
|
||||
th = ATTR_CFG_GET_FLD(attr, threshold);
|
||||
if (th > threshold_max(cpu_pmu)) {
|
||||
pr_debug("PMU event threshold exceeds max value\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64) && th) {
|
||||
config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
|
||||
config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
|
||||
armv8pmu_event_threshold_control(attr));
|
||||
}
|
||||
|
||||
/*
|
||||
* Install the filter into config_base as this is used to
|
||||
* construct the event type.
|
||||
@ -1107,8 +1207,7 @@ static void __armv8pmu_probe_pmu(void *info)
|
||||
probe->present = true;
|
||||
|
||||
/* Read the nb of CNTx counters supported from PMNC */
|
||||
cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT)
|
||||
& ARMV8_PMU_PMCR_N_MASK;
|
||||
cpu_pmu->num_events = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
|
||||
|
||||
/* Add the CPU cycles counter */
|
||||
cpu_pmu->num_events += 1;
|
||||
@ -1221,6 +1320,12 @@ static int name##_pmu_init(struct arm_pmu *cpu_pmu) \
|
||||
return armv8_pmu_init(cpu_pmu, #name, armv8_pmuv3_map_event); \
|
||||
}
|
||||
|
||||
#define PMUV3_INIT_MAP_EVENT(name, map_event) \
|
||||
static int name##_pmu_init(struct arm_pmu *cpu_pmu) \
|
||||
{ \
|
||||
return armv8_pmu_init(cpu_pmu, #name, map_event); \
|
||||
}
|
||||
|
||||
PMUV3_INIT_SIMPLE(armv8_pmuv3)
|
||||
|
||||
PMUV3_INIT_SIMPLE(armv8_cortex_a34)
|
||||
@ -1247,51 +1352,24 @@ PMUV3_INIT_SIMPLE(armv8_neoverse_v1)
|
||||
PMUV3_INIT_SIMPLE(armv8_nvidia_carmel)
|
||||
PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
|
||||
|
||||
static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", armv8_a53_map_event);
|
||||
}
|
||||
|
||||
static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", armv8_a53_map_event);
|
||||
}
|
||||
|
||||
static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", armv8_a57_map_event);
|
||||
}
|
||||
|
||||
static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", armv8_a57_map_event);
|
||||
}
|
||||
|
||||
static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", armv8_a73_map_event);
|
||||
}
|
||||
|
||||
static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", armv8_thunder_map_event);
|
||||
}
|
||||
|
||||
static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", armv8_vulcan_map_event);
|
||||
}
|
||||
PMUV3_INIT_MAP_EVENT(armv8_cortex_a35, armv8_a53_map_event)
|
||||
PMUV3_INIT_MAP_EVENT(armv8_cortex_a53, armv8_a53_map_event)
|
||||
PMUV3_INIT_MAP_EVENT(armv8_cortex_a57, armv8_a57_map_event)
|
||||
PMUV3_INIT_MAP_EVENT(armv8_cortex_a72, armv8_a57_map_event)
|
||||
PMUV3_INIT_MAP_EVENT(armv8_cortex_a73, armv8_a73_map_event)
|
||||
PMUV3_INIT_MAP_EVENT(armv8_cavium_thunder, armv8_thunder_map_event)
|
||||
PMUV3_INIT_MAP_EVENT(armv8_brcm_vulcan, armv8_vulcan_map_event)
|
||||
|
||||
static const struct of_device_id armv8_pmu_of_device_ids[] = {
|
||||
{.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_pmu_init},
|
||||
{.compatible = "arm,cortex-a34-pmu", .data = armv8_cortex_a34_pmu_init},
|
||||
{.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init},
|
||||
{.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
|
||||
{.compatible = "arm,cortex-a35-pmu", .data = armv8_cortex_a35_pmu_init},
|
||||
{.compatible = "arm,cortex-a53-pmu", .data = armv8_cortex_a53_pmu_init},
|
||||
{.compatible = "arm,cortex-a55-pmu", .data = armv8_cortex_a55_pmu_init},
|
||||
{.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
|
||||
{.compatible = "arm,cortex-a57-pmu", .data = armv8_cortex_a57_pmu_init},
|
||||
{.compatible = "arm,cortex-a65-pmu", .data = armv8_cortex_a65_pmu_init},
|
||||
{.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},
|
||||
{.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init},
|
||||
{.compatible = "arm,cortex-a72-pmu", .data = armv8_cortex_a72_pmu_init},
|
||||
{.compatible = "arm,cortex-a73-pmu", .data = armv8_cortex_a73_pmu_init},
|
||||
{.compatible = "arm,cortex-a75-pmu", .data = armv8_cortex_a75_pmu_init},
|
||||
{.compatible = "arm,cortex-a76-pmu", .data = armv8_cortex_a76_pmu_init},
|
||||
{.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init},
|
||||
@ -1309,8 +1387,8 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
|
||||
{.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init},
|
||||
{.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init},
|
||||
{.compatible = "arm,neoverse-v1-pmu", .data = armv8_neoverse_v1_pmu_init},
|
||||
{.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
|
||||
{.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init},
|
||||
{.compatible = "cavium,thunder-pmu", .data = armv8_cavium_thunder_pmu_init},
|
||||
{.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init},
|
||||
{.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init},
|
||||
{.compatible = "nvidia,denver-pmu", .data = armv8_nvidia_denver_pmu_init},
|
||||
{},
|
||||
|
@ -206,28 +206,6 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
|
||||
#define ATTR_CFG_FLD_inv_event_filter_LO 0
|
||||
#define ATTR_CFG_FLD_inv_event_filter_HI 63
|
||||
|
||||
/* Why does everything I do descend into this? */
|
||||
#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
|
||||
(lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
|
||||
|
||||
#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
|
||||
__GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
|
||||
|
||||
#define GEN_PMU_FORMAT_ATTR(name) \
|
||||
PMU_FORMAT_ATTR(name, \
|
||||
_GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \
|
||||
ATTR_CFG_FLD_##name##_LO, \
|
||||
ATTR_CFG_FLD_##name##_HI))
|
||||
|
||||
#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \
|
||||
((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0))
|
||||
|
||||
#define ATTR_CFG_GET_FLD(attr, name) \
|
||||
_ATTR_CFG_GET_FLD(attr, \
|
||||
ATTR_CFG_FLD_##name##_CFG, \
|
||||
ATTR_CFG_FLD_##name##_LO, \
|
||||
ATTR_CFG_FLD_##name##_HI)
|
||||
|
||||
GEN_PMU_FORMAT_ATTR(ts_enable);
|
||||
GEN_PMU_FORMAT_ATTR(pa_enable);
|
||||
GEN_PMU_FORMAT_ATTR(pct_enable);
|
||||
|
792
drivers/perf/dwc_pcie_pmu.c
Normal file
792
drivers/perf/dwc_pcie_pmu.c
Normal file
@ -0,0 +1,792 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Synopsys DesignWare PCIe PMU driver
|
||||
*
|
||||
* Copyright (C) 2021-2023 Alibaba Inc.
|
||||
*/
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define DWC_PCIE_VSEC_RAS_DES_ID 0x02
|
||||
#define DWC_PCIE_EVENT_CNT_CTL 0x8
|
||||
|
||||
/*
|
||||
* Event Counter Data Select includes two parts:
|
||||
* - 27-24: Group number(4-bit: 0..0x7)
|
||||
* - 23-16: Event number(8-bit: 0..0x13) within the Group
|
||||
*
|
||||
* Put them together as in TRM.
|
||||
*/
|
||||
#define DWC_PCIE_CNT_EVENT_SEL GENMASK(27, 16)
|
||||
#define DWC_PCIE_CNT_LANE_SEL GENMASK(11, 8)
|
||||
#define DWC_PCIE_CNT_STATUS BIT(7)
|
||||
#define DWC_PCIE_CNT_ENABLE GENMASK(4, 2)
|
||||
#define DWC_PCIE_PER_EVENT_OFF 0x1
|
||||
#define DWC_PCIE_PER_EVENT_ON 0x3
|
||||
#define DWC_PCIE_EVENT_CLEAR GENMASK(1, 0)
|
||||
#define DWC_PCIE_EVENT_PER_CLEAR 0x1
|
||||
|
||||
#define DWC_PCIE_EVENT_CNT_DATA 0xC
|
||||
|
||||
#define DWC_PCIE_TIME_BASED_ANAL_CTL 0x10
|
||||
#define DWC_PCIE_TIME_BASED_REPORT_SEL GENMASK(31, 24)
|
||||
#define DWC_PCIE_TIME_BASED_DURATION_SEL GENMASK(15, 8)
|
||||
#define DWC_PCIE_DURATION_MANUAL_CTL 0x0
|
||||
#define DWC_PCIE_DURATION_1MS 0x1
|
||||
#define DWC_PCIE_DURATION_10MS 0x2
|
||||
#define DWC_PCIE_DURATION_100MS 0x3
|
||||
#define DWC_PCIE_DURATION_1S 0x4
|
||||
#define DWC_PCIE_DURATION_2S 0x5
|
||||
#define DWC_PCIE_DURATION_4S 0x6
|
||||
#define DWC_PCIE_DURATION_4US 0xFF
|
||||
#define DWC_PCIE_TIME_BASED_TIMER_START BIT(0)
|
||||
#define DWC_PCIE_TIME_BASED_CNT_ENABLE 0x1
|
||||
|
||||
#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW 0x14
|
||||
#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH 0x18
|
||||
|
||||
/* Event attributes */
|
||||
#define DWC_PCIE_CONFIG_EVENTID GENMASK(15, 0)
|
||||
#define DWC_PCIE_CONFIG_TYPE GENMASK(19, 16)
|
||||
#define DWC_PCIE_CONFIG_LANE GENMASK(27, 20)
|
||||
|
||||
#define DWC_PCIE_EVENT_ID(event) FIELD_GET(DWC_PCIE_CONFIG_EVENTID, (event)->attr.config)
|
||||
#define DWC_PCIE_EVENT_TYPE(event) FIELD_GET(DWC_PCIE_CONFIG_TYPE, (event)->attr.config)
|
||||
#define DWC_PCIE_EVENT_LANE(event) FIELD_GET(DWC_PCIE_CONFIG_LANE, (event)->attr.config)
|
||||
|
||||
enum dwc_pcie_event_type {
|
||||
DWC_PCIE_TIME_BASE_EVENT,
|
||||
DWC_PCIE_LANE_EVENT,
|
||||
DWC_PCIE_EVENT_TYPE_MAX,
|
||||
};
|
||||
|
||||
#define DWC_PCIE_LANE_EVENT_MAX_PERIOD GENMASK_ULL(31, 0)
|
||||
#define DWC_PCIE_MAX_PERIOD GENMASK_ULL(63, 0)
|
||||
|
||||
struct dwc_pcie_pmu {
|
||||
struct pmu pmu;
|
||||
struct pci_dev *pdev; /* Root Port device */
|
||||
u16 ras_des_offset;
|
||||
u32 nr_lanes;
|
||||
|
||||
struct list_head pmu_node;
|
||||
struct hlist_node cpuhp_node;
|
||||
struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX];
|
||||
int on_cpu;
|
||||
};
|
||||
|
||||
#define to_dwc_pcie_pmu(p) (container_of(p, struct dwc_pcie_pmu, pmu))
|
||||
|
||||
static int dwc_pcie_pmu_hp_state;
|
||||
static struct list_head dwc_pcie_dev_info_head =
|
||||
LIST_HEAD_INIT(dwc_pcie_dev_info_head);
|
||||
static bool notify;
|
||||
|
||||
struct dwc_pcie_dev_info {
|
||||
struct platform_device *plat_dev;
|
||||
struct pci_dev *pdev;
|
||||
struct list_head dev_node;
|
||||
};
|
||||
|
||||
struct dwc_pcie_vendor_id {
|
||||
int vendor_id;
|
||||
};
|
||||
|
||||
static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = {
|
||||
{.vendor_id = PCI_VENDOR_ID_ALIBABA },
|
||||
{} /* terminator */
|
||||
};
|
||||
|
||||
static ssize_t cpumask_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(dev_get_drvdata(dev));
|
||||
|
||||
return cpumap_print_to_pagebuf(true, buf, cpumask_of(pcie_pmu->on_cpu));
|
||||
}
|
||||
static DEVICE_ATTR_RO(cpumask);
|
||||
|
||||
static struct attribute *dwc_pcie_pmu_cpumask_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group dwc_pcie_cpumask_attr_group = {
|
||||
.attrs = dwc_pcie_pmu_cpumask_attrs,
|
||||
};
|
||||
|
||||
struct dwc_pcie_format_attr {
|
||||
struct device_attribute attr;
|
||||
u64 field;
|
||||
int config;
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(eventid, "config:0-15");
|
||||
PMU_FORMAT_ATTR(type, "config:16-19");
|
||||
PMU_FORMAT_ATTR(lane, "config:20-27");
|
||||
|
||||
static struct attribute *dwc_pcie_format_attrs[] = {
|
||||
&format_attr_type.attr,
|
||||
&format_attr_eventid.attr,
|
||||
&format_attr_lane.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group dwc_pcie_format_attrs_group = {
|
||||
.name = "format",
|
||||
.attrs = dwc_pcie_format_attrs,
|
||||
};
|
||||
|
||||
struct dwc_pcie_event_attr {
|
||||
struct device_attribute attr;
|
||||
enum dwc_pcie_event_type type;
|
||||
u16 eventid;
|
||||
u8 lane;
|
||||
};
|
||||
|
||||
static ssize_t dwc_pcie_event_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct dwc_pcie_event_attr *eattr;
|
||||
|
||||
eattr = container_of(attr, typeof(*eattr), attr);
|
||||
|
||||
if (eattr->type == DWC_PCIE_LANE_EVENT)
|
||||
return sysfs_emit(buf, "eventid=0x%x,type=0x%x,lane=?\n",
|
||||
eattr->eventid, eattr->type);
|
||||
else if (eattr->type == DWC_PCIE_TIME_BASE_EVENT)
|
||||
return sysfs_emit(buf, "eventid=0x%x,type=0x%x\n",
|
||||
eattr->eventid, eattr->type);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define DWC_PCIE_EVENT_ATTR(_name, _type, _eventid, _lane) \
|
||||
(&((struct dwc_pcie_event_attr[]) {{ \
|
||||
.attr = __ATTR(_name, 0444, dwc_pcie_event_show, NULL), \
|
||||
.type = _type, \
|
||||
.eventid = _eventid, \
|
||||
.lane = _lane, \
|
||||
}})[0].attr.attr)
|
||||
|
||||
#define DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(_name, _eventid) \
|
||||
DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_TIME_BASE_EVENT, _eventid, 0)
|
||||
#define DWC_PCIE_PMU_LANE_EVENT_ATTR(_name, _eventid) \
|
||||
DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_LANE_EVENT, _eventid, 0)
|
||||
|
||||
static struct attribute *dwc_pcie_pmu_time_event_attrs[] = {
|
||||
/* Group #0 */
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(one_cycle, 0x00),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_L0S, 0x01),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(RX_L0S, 0x02),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L0, 0x03),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1, 0x04),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_1, 0x05),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_2, 0x06),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(CFG_RCVRY, 0x07),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x08),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x09),
|
||||
|
||||
/* Group #1 */
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_PCIe_TLP_Data_Payload, 0x20),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_PCIe_TLP_Data_Payload, 0x21),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_CCIX_TLP_Data_Payload, 0x22),
|
||||
DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_CCIX_TLP_Data_Payload, 0x23),
|
||||
|
||||
/*
|
||||
* Leave it to the user to specify the lane ID to avoid generating
|
||||
* a list of hundreds of events.
|
||||
*/
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ack_dllp, 0x600),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_update_fc_dllp, 0x601),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ack_dllp, 0x602),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_update_fc_dllp, 0x603),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nulified_tlp, 0x604),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nulified_tlp, 0x605),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tl, 0x606),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_write, 0x700),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_read, 0x701),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_write, 0x702),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_read, 0x703),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_write, 0x704),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_read, 0x705),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_without_data, 0x706),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_with_data, 0x707),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_message_tlp, 0x708),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_atomic, 0x709),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_tlp_with_prefix, 0x70A),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_write, 0x70B),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_read, 0x70C),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_write, 0x70F),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_read, 0x710),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_without_data, 0x711),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_with_data, 0x712),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_message_tlp, 0x713),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_atomic, 0x714),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_tlp_with_prefix, 0x715),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ccix_tlp, 0x716),
|
||||
DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ccix_tlp, 0x717),
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct attribute_group dwc_pcie_event_attrs_group = {
|
||||
.name = "events",
|
||||
.attrs = dwc_pcie_pmu_time_event_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group *dwc_pcie_attr_groups[] = {
|
||||
&dwc_pcie_event_attrs_group,
|
||||
&dwc_pcie_format_attrs_group,
|
||||
&dwc_pcie_cpumask_attr_group,
|
||||
NULL
|
||||
};
|
||||
|
||||
static void dwc_pcie_pmu_lane_event_enable(struct dwc_pcie_pmu *pcie_pmu,
|
||||
bool enable)
|
||||
{
|
||||
struct pci_dev *pdev = pcie_pmu->pdev;
|
||||
u16 ras_des_offset = pcie_pmu->ras_des_offset;
|
||||
|
||||
if (enable)
|
||||
pci_clear_and_set_config_dword(pdev,
|
||||
ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
|
||||
DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
|
||||
else
|
||||
pci_clear_and_set_config_dword(pdev,
|
||||
ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
|
||||
DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF);
|
||||
}
|
||||
|
||||
static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu,
|
||||
bool enable)
|
||||
{
|
||||
struct pci_dev *pdev = pcie_pmu->pdev;
|
||||
u16 ras_des_offset = pcie_pmu->ras_des_offset;
|
||||
|
||||
pci_clear_and_set_config_dword(pdev,
|
||||
ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL,
|
||||
DWC_PCIE_TIME_BASED_TIMER_START, enable);
|
||||
}
|
||||
|
||||
static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event)
|
||||
{
|
||||
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
|
||||
struct pci_dev *pdev = pcie_pmu->pdev;
|
||||
u16 ras_des_offset = pcie_pmu->ras_des_offset;
|
||||
u32 val;
|
||||
|
||||
pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 dwc_pcie_pmu_read_time_based_counter(struct perf_event *event)
|
||||
{
|
||||
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
|
||||
struct pci_dev *pdev = pcie_pmu->pdev;
|
||||
int event_id = DWC_PCIE_EVENT_ID(event);
|
||||
u16 ras_des_offset = pcie_pmu->ras_des_offset;
|
||||
u32 lo, hi, ss;
|
||||
u64 val;
|
||||
|
||||
/*
|
||||
* The 64-bit value of the data counter is spread across two
|
||||
* registers that are not synchronized. In order to read them
|
||||
* atomically, ensure that the high 32 bits match before and after
|
||||
* reading the low 32 bits.
|
||||
*/
|
||||
pci_read_config_dword(pdev,
|
||||
ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH, &hi);
|
||||
do {
|
||||
/* snapshot the high 32 bits */
|
||||
ss = hi;
|
||||
|
||||
pci_read_config_dword(
|
||||
pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW,
|
||||
&lo);
|
||||
pci_read_config_dword(
|
||||
pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH,
|
||||
&hi);
|
||||
} while (hi != ss);
|
||||
|
||||
val = ((u64)hi << 32) | lo;
|
||||
/*
|
||||
* The Group#1 event measures the amount of data processed in 16-byte
|
||||
* units. Simplify the end-user interface by multiplying the counter
|
||||
* at the point of read.
|
||||
*/
|
||||
if (event_id >= 0x20 && event_id <= 0x23)
|
||||
val *= 16;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static void dwc_pcie_pmu_event_update(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
|
||||
u64 delta, prev, now = 0;
|
||||
|
||||
do {
|
||||
prev = local64_read(&hwc->prev_count);
|
||||
|
||||
if (type == DWC_PCIE_LANE_EVENT)
|
||||
now = dwc_pcie_pmu_read_lane_event_counter(event);
|
||||
else if (type == DWC_PCIE_TIME_BASE_EVENT)
|
||||
now = dwc_pcie_pmu_read_time_based_counter(event);
|
||||
|
||||
} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
|
||||
|
||||
delta = (now - prev) & DWC_PCIE_MAX_PERIOD;
|
||||
/* 32-bit counter for Lane Event Counting */
|
||||
if (type == DWC_PCIE_LANE_EVENT)
|
||||
delta &= DWC_PCIE_LANE_EVENT_MAX_PERIOD;
|
||||
|
||||
local64_add(delta, &event->count);
|
||||
}
|
||||
|
||||
static int dwc_pcie_pmu_event_init(struct perf_event *event)
|
||||
{
|
||||
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
|
||||
enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
|
||||
struct perf_event *sibling;
|
||||
u32 lane;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
/* We don't support sampling */
|
||||
if (is_sampling_event(event))
|
||||
return -EINVAL;
|
||||
|
||||
/* We cannot support task bound events */
|
||||
if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (event->group_leader != event &&
|
||||
!is_software_event(event->group_leader))
|
||||
return -EINVAL;
|
||||
|
||||
for_each_sibling_event(sibling, event->group_leader) {
|
||||
if (sibling->pmu != event->pmu && !is_software_event(sibling))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (type < 0 || type >= DWC_PCIE_EVENT_TYPE_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
if (type == DWC_PCIE_LANE_EVENT) {
|
||||
lane = DWC_PCIE_EVENT_LANE(event);
|
||||
if (lane < 0 || lane >= pcie_pmu->nr_lanes)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
event->cpu = pcie_pmu->on_cpu;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
|
||||
enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
|
||||
|
||||
hwc->state = 0;
|
||||
local64_set(&hwc->prev_count, 0);
|
||||
|
||||
if (type == DWC_PCIE_LANE_EVENT)
|
||||
dwc_pcie_pmu_lane_event_enable(pcie_pmu, true);
|
||||
else if (type == DWC_PCIE_TIME_BASE_EVENT)
|
||||
dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true);
|
||||
}
|
||||
|
||||
static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
|
||||
enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (event->hw.state & PERF_HES_STOPPED)
|
||||
return;
|
||||
|
||||
if (type == DWC_PCIE_LANE_EVENT)
|
||||
dwc_pcie_pmu_lane_event_enable(pcie_pmu, false);
|
||||
else if (type == DWC_PCIE_TIME_BASE_EVENT)
|
||||
dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false);
|
||||
|
||||
dwc_pcie_pmu_event_update(event);
|
||||
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
|
||||
struct pci_dev *pdev = pcie_pmu->pdev;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
|
||||
int event_id = DWC_PCIE_EVENT_ID(event);
|
||||
int lane = DWC_PCIE_EVENT_LANE(event);
|
||||
u16 ras_des_offset = pcie_pmu->ras_des_offset;
|
||||
u32 ctrl;
|
||||
|
||||
/* one counter for each type and it is in use */
|
||||
if (pcie_pmu->event[type])
|
||||
return -ENOSPC;
|
||||
|
||||
pcie_pmu->event[type] = event;
|
||||
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
|
||||
|
||||
if (type == DWC_PCIE_LANE_EVENT) {
|
||||
/* EVENT_COUNTER_DATA_REG needs clear manually */
|
||||
ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
|
||||
FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
|
||||
FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF) |
|
||||
FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
|
||||
pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
|
||||
ctrl);
|
||||
} else if (type == DWC_PCIE_TIME_BASE_EVENT) {
|
||||
/*
|
||||
* TIME_BASED_ANAL_DATA_REG is a 64 bit register, we can safely
|
||||
* use it with any manually controlled duration. And it is
|
||||
* cleared when next measurement starts.
|
||||
*/
|
||||
ctrl = FIELD_PREP(DWC_PCIE_TIME_BASED_REPORT_SEL, event_id) |
|
||||
FIELD_PREP(DWC_PCIE_TIME_BASED_DURATION_SEL,
|
||||
DWC_PCIE_DURATION_MANUAL_CTL) |
|
||||
DWC_PCIE_TIME_BASED_CNT_ENABLE;
|
||||
pci_write_config_dword(
|
||||
pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL, ctrl);
|
||||
}
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
dwc_pcie_pmu_event_start(event, PERF_EF_RELOAD);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dwc_pcie_pmu_event_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
|
||||
enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
|
||||
|
||||
dwc_pcie_pmu_event_stop(event, flags | PERF_EF_UPDATE);
|
||||
perf_event_update_userpage(event);
|
||||
pcie_pmu->event[type] = NULL;
|
||||
}
|
||||
|
||||
static void dwc_pcie_pmu_remove_cpuhp_instance(void *hotplug_node)
|
||||
{
|
||||
cpuhp_state_remove_instance_nocalls(dwc_pcie_pmu_hp_state, hotplug_node);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the binded DES capability device info of a PCI device.
|
||||
* @pdev: The PCI device.
|
||||
*/
|
||||
static struct dwc_pcie_dev_info *dwc_pcie_find_dev_info(struct pci_dev *pdev)
|
||||
{
|
||||
struct dwc_pcie_dev_info *dev_info;
|
||||
|
||||
list_for_each_entry(dev_info, &dwc_pcie_dev_info_head, dev_node)
|
||||
if (dev_info->pdev == pdev)
|
||||
return dev_info;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void dwc_pcie_unregister_pmu(void *data)
|
||||
{
|
||||
struct dwc_pcie_pmu *pcie_pmu = data;
|
||||
|
||||
perf_pmu_unregister(&pcie_pmu->pmu);
|
||||
}
|
||||
|
||||
static bool dwc_pcie_match_des_cap(struct pci_dev *pdev)
|
||||
{
|
||||
const struct dwc_pcie_vendor_id *vid;
|
||||
u16 vsec = 0;
|
||||
u32 val;
|
||||
|
||||
if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT))
|
||||
return false;
|
||||
|
||||
for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) {
|
||||
vsec = pci_find_vsec_capability(pdev, vid->vendor_id,
|
||||
DWC_PCIE_VSEC_RAS_DES_ID);
|
||||
if (vsec)
|
||||
break;
|
||||
}
|
||||
if (!vsec)
|
||||
return false;
|
||||
|
||||
pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
|
||||
if (PCI_VNDR_HEADER_REV(val) != 0x04)
|
||||
return false;
|
||||
|
||||
pci_dbg(pdev,
|
||||
"Detected PCIe Vendor-Specific Extended Capability RAS DES\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
static void dwc_pcie_unregister_dev(struct dwc_pcie_dev_info *dev_info)
|
||||
{
|
||||
platform_device_unregister(dev_info->plat_dev);
|
||||
list_del(&dev_info->dev_node);
|
||||
kfree(dev_info);
|
||||
}
|
||||
|
||||
static int dwc_pcie_register_dev(struct pci_dev *pdev)
|
||||
{
|
||||
struct platform_device *plat_dev;
|
||||
struct dwc_pcie_dev_info *dev_info;
|
||||
u32 bdf;
|
||||
|
||||
bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
|
||||
plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", bdf,
|
||||
pdev, sizeof(*pdev));
|
||||
|
||||
if (IS_ERR(plat_dev))
|
||||
return PTR_ERR(plat_dev);
|
||||
|
||||
dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
|
||||
if (!dev_info)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Cache platform device to handle pci device hotplug */
|
||||
dev_info->plat_dev = plat_dev;
|
||||
dev_info->pdev = pdev;
|
||||
list_add(&dev_info->dev_node, &dwc_pcie_dev_info_head);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dwc_pcie_pmu_notifier(struct notifier_block *nb,
|
||||
unsigned long action, void *data)
|
||||
{
|
||||
struct device *dev = data;
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct dwc_pcie_dev_info *dev_info;
|
||||
|
||||
switch (action) {
|
||||
case BUS_NOTIFY_ADD_DEVICE:
|
||||
if (!dwc_pcie_match_des_cap(pdev))
|
||||
return NOTIFY_DONE;
|
||||
if (dwc_pcie_register_dev(pdev))
|
||||
return NOTIFY_BAD;
|
||||
break;
|
||||
case BUS_NOTIFY_DEL_DEVICE:
|
||||
dev_info = dwc_pcie_find_dev_info(pdev);
|
||||
if (!dev_info)
|
||||
return NOTIFY_DONE;
|
||||
dwc_pcie_unregister_dev(dev_info);
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block dwc_pcie_pmu_nb = {
|
||||
.notifier_call = dwc_pcie_pmu_notifier,
|
||||
};
|
||||
|
||||
static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
|
||||
{
|
||||
struct pci_dev *pdev = plat_dev->dev.platform_data;
|
||||
struct dwc_pcie_pmu *pcie_pmu;
|
||||
char *name;
|
||||
u32 bdf, val;
|
||||
u16 vsec;
|
||||
int ret;
|
||||
|
||||
vsec = pci_find_vsec_capability(pdev, pdev->vendor,
|
||||
DWC_PCIE_VSEC_RAS_DES_ID);
|
||||
pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
|
||||
bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
|
||||
name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", bdf);
|
||||
if (!name)
|
||||
return -ENOMEM;
|
||||
|
||||
pcie_pmu = devm_kzalloc(&plat_dev->dev, sizeof(*pcie_pmu), GFP_KERNEL);
|
||||
if (!pcie_pmu)
|
||||
return -ENOMEM;
|
||||
|
||||
pcie_pmu->pdev = pdev;
|
||||
pcie_pmu->ras_des_offset = vsec;
|
||||
pcie_pmu->nr_lanes = pcie_get_width_cap(pdev);
|
||||
pcie_pmu->on_cpu = -1;
|
||||
pcie_pmu->pmu = (struct pmu){
|
||||
.name = name,
|
||||
.parent = &pdev->dev,
|
||||
.module = THIS_MODULE,
|
||||
.attr_groups = dwc_pcie_attr_groups,
|
||||
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = dwc_pcie_pmu_event_init,
|
||||
.add = dwc_pcie_pmu_event_add,
|
||||
.del = dwc_pcie_pmu_event_del,
|
||||
.start = dwc_pcie_pmu_event_start,
|
||||
.stop = dwc_pcie_pmu_event_stop,
|
||||
.read = dwc_pcie_pmu_event_update,
|
||||
};
|
||||
|
||||
/* Add this instance to the list used by the offline callback */
|
||||
ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state,
|
||||
&pcie_pmu->cpuhp_node);
|
||||
if (ret) {
|
||||
pci_err(pdev, "Error %d registering hotplug @%x\n", ret, bdf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Unwind when platform driver removes */
|
||||
ret = devm_add_action_or_reset(&plat_dev->dev,
|
||||
dwc_pcie_pmu_remove_cpuhp_instance,
|
||||
&pcie_pmu->cpuhp_node);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = perf_pmu_register(&pcie_pmu->pmu, name, -1);
|
||||
if (ret) {
|
||||
pci_err(pdev, "Error %d registering PMU @%x\n", ret, bdf);
|
||||
return ret;
|
||||
}
|
||||
ret = devm_add_action_or_reset(&plat_dev->dev, dwc_pcie_unregister_pmu,
|
||||
pcie_pmu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dwc_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
|
||||
{
|
||||
struct dwc_pcie_pmu *pcie_pmu;
|
||||
|
||||
pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
|
||||
if (pcie_pmu->on_cpu == -1)
|
||||
pcie_pmu->on_cpu = cpumask_local_spread(
|
||||
0, dev_to_node(&pcie_pmu->pdev->dev));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
|
||||
{
|
||||
struct dwc_pcie_pmu *pcie_pmu;
|
||||
struct pci_dev *pdev;
|
||||
int node;
|
||||
cpumask_t mask;
|
||||
unsigned int target;
|
||||
|
||||
pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
|
||||
/* Nothing to do if this CPU doesn't own the PMU */
|
||||
if (cpu != pcie_pmu->on_cpu)
|
||||
return 0;
|
||||
|
||||
pcie_pmu->on_cpu = -1;
|
||||
pdev = pcie_pmu->pdev;
|
||||
node = dev_to_node(&pdev->dev);
|
||||
if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
|
||||
cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
|
||||
target = cpumask_any(&mask);
|
||||
else
|
||||
target = cpumask_any_but(cpu_online_mask, cpu);
|
||||
|
||||
if (target >= nr_cpu_ids) {
|
||||
pci_err(pdev, "There is no CPU to set\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This PMU does NOT support interrupt, just migrate context. */
|
||||
perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target);
|
||||
pcie_pmu->on_cpu = target;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct platform_driver dwc_pcie_pmu_driver = {
|
||||
.probe = dwc_pcie_pmu_probe,
|
||||
.driver = {.name = "dwc_pcie_pmu",},
|
||||
};
|
||||
|
||||
static int __init dwc_pcie_pmu_init(void)
|
||||
{
|
||||
struct pci_dev *pdev = NULL;
|
||||
bool found = false;
|
||||
int ret;
|
||||
|
||||
for_each_pci_dev(pdev) {
|
||||
if (!dwc_pcie_match_des_cap(pdev))
|
||||
continue;
|
||||
|
||||
ret = dwc_pcie_register_dev(pdev);
|
||||
if (ret) {
|
||||
pci_dev_put(pdev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
found = true;
|
||||
}
|
||||
if (!found)
|
||||
return -ENODEV;
|
||||
|
||||
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
|
||||
"perf/dwc_pcie_pmu:online",
|
||||
dwc_pcie_pmu_online_cpu,
|
||||
dwc_pcie_pmu_offline_cpu);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
dwc_pcie_pmu_hp_state = ret;
|
||||
|
||||
ret = platform_driver_register(&dwc_pcie_pmu_driver);
|
||||
if (ret)
|
||||
goto platform_driver_register_err;
|
||||
|
||||
ret = bus_register_notifier(&pci_bus_type, &dwc_pcie_pmu_nb);
|
||||
if (ret)
|
||||
goto platform_driver_register_err;
|
||||
notify = true;
|
||||
|
||||
return 0;
|
||||
|
||||
platform_driver_register_err:
|
||||
cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit dwc_pcie_pmu_exit(void)
|
||||
{
|
||||
struct dwc_pcie_dev_info *dev_info, *tmp;
|
||||
|
||||
if (notify)
|
||||
bus_unregister_notifier(&pci_bus_type, &dwc_pcie_pmu_nb);
|
||||
list_for_each_entry_safe(dev_info, tmp, &dwc_pcie_dev_info_head, dev_node)
|
||||
dwc_pcie_unregister_dev(dev_info);
|
||||
platform_driver_unregister(&dwc_pcie_pmu_driver);
|
||||
cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state);
|
||||
}
|
||||
|
||||
module_init(dwc_pcie_pmu_init);
|
||||
module_exit(dwc_pcie_pmu_exit);
|
||||
|
||||
MODULE_DESCRIPTION("PMU driver for DesignWare Cores PCI Express Controller");
|
||||
MODULE_AUTHOR("Shuai Xue <xueshuai@linux.alibaba.com>");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -19,6 +19,8 @@
|
||||
#define COUNTER_READ 0x20
|
||||
|
||||
#define COUNTER_DPCR1 0x30
|
||||
#define COUNTER_MUX_CNTL 0x50
|
||||
#define COUNTER_MASK_COMP 0x54
|
||||
|
||||
#define CNTL_OVER 0x1
|
||||
#define CNTL_CLEAR 0x2
|
||||
@ -32,6 +34,13 @@
|
||||
#define CNTL_CSV_SHIFT 24
|
||||
#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT)
|
||||
|
||||
#define READ_PORT_SHIFT 0
|
||||
#define READ_PORT_MASK (0x7 << READ_PORT_SHIFT)
|
||||
#define READ_CHANNEL_REVERT 0x00000008 /* bit 3 for read channel select */
|
||||
#define WRITE_PORT_SHIFT 8
|
||||
#define WRITE_PORT_MASK (0x7 << WRITE_PORT_SHIFT)
|
||||
#define WRITE_CHANNEL_REVERT 0x00000800 /* bit 11 for write channel select */
|
||||
|
||||
#define EVENT_CYCLES_ID 0
|
||||
#define EVENT_CYCLES_COUNTER 0
|
||||
#define NUM_COUNTERS 4
|
||||
@ -50,6 +59,7 @@ static DEFINE_IDA(ddr_ida);
|
||||
/* DDR Perf hardware feature */
|
||||
#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */
|
||||
#define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */
|
||||
#define DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER 0x4 /* support AXI ID PORT CHANNEL filter */
|
||||
|
||||
struct fsl_ddr_devtype_data {
|
||||
unsigned int quirks; /* quirks needed for different DDR Perf core */
|
||||
@ -82,6 +92,11 @@ static const struct fsl_ddr_devtype_data imx8mp_devtype_data = {
|
||||
.identifier = "i.MX8MP",
|
||||
};
|
||||
|
||||
static const struct fsl_ddr_devtype_data imx8dxl_devtype_data = {
|
||||
.quirks = DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER,
|
||||
.identifier = "i.MX8DXL",
|
||||
};
|
||||
|
||||
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
|
||||
{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
|
||||
{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
|
||||
@ -89,6 +104,7 @@ static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
|
||||
{ .compatible = "fsl,imx8mm-ddr-pmu", .data = &imx8mm_devtype_data},
|
||||
{ .compatible = "fsl,imx8mn-ddr-pmu", .data = &imx8mn_devtype_data},
|
||||
{ .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data},
|
||||
{ .compatible = "fsl,imx8dxl-ddr-pmu", .data = &imx8dxl_devtype_data},
|
||||
{ /* sentinel */ }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
|
||||
@ -144,6 +160,7 @@ static const struct attribute_group ddr_perf_identifier_attr_group = {
|
||||
enum ddr_perf_filter_capabilities {
|
||||
PERF_CAP_AXI_ID_FILTER = 0,
|
||||
PERF_CAP_AXI_ID_FILTER_ENHANCED,
|
||||
PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER,
|
||||
PERF_CAP_AXI_ID_FEAT_MAX,
|
||||
};
|
||||
|
||||
@ -157,6 +174,8 @@ static u32 ddr_perf_filter_cap_get(struct ddr_pmu *pmu, int cap)
|
||||
case PERF_CAP_AXI_ID_FILTER_ENHANCED:
|
||||
quirks &= DDR_CAP_AXI_ID_FILTER_ENHANCED;
|
||||
return quirks == DDR_CAP_AXI_ID_FILTER_ENHANCED;
|
||||
case PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER:
|
||||
return !!(quirks & DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER);
|
||||
default:
|
||||
WARN(1, "unknown filter cap %d\n", cap);
|
||||
}
|
||||
@ -187,6 +206,7 @@ static ssize_t ddr_perf_filter_cap_show(struct device *dev,
|
||||
static struct attribute *ddr_perf_filter_cap_attr[] = {
|
||||
PERF_FILTER_EXT_ATTR_ENTRY(filter, PERF_CAP_AXI_ID_FILTER),
|
||||
PERF_FILTER_EXT_ATTR_ENTRY(enhanced_filter, PERF_CAP_AXI_ID_FILTER_ENHANCED),
|
||||
PERF_FILTER_EXT_ATTR_ENTRY(super_filter, PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER),
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -272,11 +292,15 @@ static const struct attribute_group ddr_perf_events_attr_group = {
|
||||
PMU_FORMAT_ATTR(event, "config:0-7");
|
||||
PMU_FORMAT_ATTR(axi_id, "config1:0-15");
|
||||
PMU_FORMAT_ATTR(axi_mask, "config1:16-31");
|
||||
PMU_FORMAT_ATTR(axi_port, "config2:0-2");
|
||||
PMU_FORMAT_ATTR(axi_channel, "config2:3-3");
|
||||
|
||||
static struct attribute *ddr_perf_format_attrs[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_axi_id.attr,
|
||||
&format_attr_axi_mask.attr,
|
||||
&format_attr_axi_port.attr,
|
||||
&format_attr_axi_channel.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -530,6 +554,7 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
|
||||
int counter;
|
||||
int cfg = event->attr.config;
|
||||
int cfg1 = event->attr.config1;
|
||||
int cfg2 = event->attr.config2;
|
||||
|
||||
if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
|
||||
int i;
|
||||
@ -553,6 +578,26 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER) {
|
||||
if (ddr_perf_is_filtered(event)) {
|
||||
/* revert axi id masking(axi_mask) value */
|
||||
cfg1 ^= AXI_MASKING_REVERT;
|
||||
writel(cfg1, pmu->base + COUNTER_MASK_COMP + ((counter - 1) << 4));
|
||||
|
||||
if (cfg == 0x41) {
|
||||
/* revert axi read channel(axi_channel) value */
|
||||
cfg2 ^= READ_CHANNEL_REVERT;
|
||||
cfg2 |= FIELD_PREP(READ_PORT_MASK, cfg2);
|
||||
} else {
|
||||
/* revert axi write channel(axi_channel) value */
|
||||
cfg2 ^= WRITE_CHANNEL_REVERT;
|
||||
cfg2 |= FIELD_PREP(WRITE_PORT_MASK, cfg2);
|
||||
}
|
||||
|
||||
writel(cfg2, pmu->base + COUNTER_MUX_CNTL + ((counter - 1) << 4));
|
||||
}
|
||||
}
|
||||
|
||||
pmu->events[counter] = event;
|
||||
hwc->idx = counter;
|
||||
|
||||
|
@ -617,7 +617,7 @@ static int ddr_perf_probe(struct platform_device *pdev)
|
||||
|
||||
platform_set_drvdata(pdev, pmu);
|
||||
|
||||
pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
|
||||
pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL);
|
||||
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", pmu->id);
|
||||
if (!name) {
|
||||
ret = -ENOMEM;
|
||||
@ -674,7 +674,7 @@ static int ddr_perf_probe(struct platform_device *pdev)
|
||||
cpuhp_remove_multi_state(pmu->cpuhp_state);
|
||||
cpuhp_state_err:
|
||||
format_string_err:
|
||||
ida_simple_remove(&ddr_ida, pmu->id);
|
||||
ida_free(&ddr_ida, pmu->id);
|
||||
dev_warn(&pdev->dev, "i.MX9 DDR Perf PMU failed (%d), disabled\n", ret);
|
||||
return ret;
|
||||
}
|
||||
@ -688,7 +688,7 @@ static int ddr_perf_remove(struct platform_device *pdev)
|
||||
|
||||
perf_pmu_unregister(&pmu->pmu);
|
||||
|
||||
ida_simple_remove(&ddr_ida, pmu->id);
|
||||
ida_free(&ddr_ida, pmu->id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -383,8 +383,8 @@ static struct attribute *hisi_uc_pmu_events_attr[] = {
|
||||
HISI_PMU_EVENT_ATTR(cpu_rd, 0x10),
|
||||
HISI_PMU_EVENT_ATTR(cpu_rd64, 0x17),
|
||||
HISI_PMU_EVENT_ATTR(cpu_rs64, 0x19),
|
||||
HISI_PMU_EVENT_ATTR(cpu_mru, 0x1a),
|
||||
HISI_PMU_EVENT_ATTR(cycles, 0x9c),
|
||||
HISI_PMU_EVENT_ATTR(cpu_mru, 0x1c),
|
||||
HISI_PMU_EVENT_ATTR(cycles, 0x95),
|
||||
HISI_PMU_EVENT_ATTR(spipe_hit, 0xb3),
|
||||
HISI_PMU_EVENT_ATTR(hpipe_hit, 0xdb),
|
||||
HISI_PMU_EVENT_ATTR(cring_rxdat_cnt, 0xfa),
|
||||
|
@ -35,6 +35,7 @@ int __init numa_add_memblk(int nodeid, u64 start, u64 end);
|
||||
void __init numa_set_distance(int from, int to, int distance);
|
||||
void __init numa_free_distance(void);
|
||||
void __init early_map_cpu_to_node(unsigned int cpu, int nid);
|
||||
int __init early_cpu_to_node(int cpu);
|
||||
void numa_store_cpu_info(unsigned int cpu);
|
||||
void numa_add_cpu(unsigned int cpu);
|
||||
void numa_remove_cpu(unsigned int cpu);
|
||||
@ -46,6 +47,7 @@ static inline void numa_add_cpu(unsigned int cpu) { }
|
||||
static inline void numa_remove_cpu(unsigned int cpu) { }
|
||||
static inline void arch_numa_init(void) { }
|
||||
static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
|
||||
static inline int early_cpu_to_node(int cpu) { return 0; }
|
||||
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
|
@ -1239,6 +1239,8 @@ int pci_read_config_dword(const struct pci_dev *dev, int where, u32 *val);
|
||||
int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val);
|
||||
int pci_write_config_word(const struct pci_dev *dev, int where, u16 val);
|
||||
int pci_write_config_dword(const struct pci_dev *dev, int where, u32 val);
|
||||
void pci_clear_and_set_config_dword(const struct pci_dev *dev, int pos,
|
||||
u32 clear, u32 set);
|
||||
|
||||
int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val);
|
||||
int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val);
|
||||
|
@ -2605,6 +2605,8 @@
|
||||
#define PCI_VENDOR_ID_TEKRAM 0x1de1
|
||||
#define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29
|
||||
|
||||
#define PCI_VENDOR_ID_ALIBABA 0x1ded
|
||||
|
||||
#define PCI_VENDOR_ID_TEHUTI 0x1fc9
|
||||
#define PCI_DEVICE_ID_TEHUTI_3009 0x3009
|
||||
#define PCI_DEVICE_ID_TEHUTI_3010 0x3010
|
||||
|
@ -59,12 +59,6 @@ struct pmu_hw_events {
|
||||
*/
|
||||
DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS);
|
||||
|
||||
/*
|
||||
* Hardware lock to serialize accesses to PMU registers. Needed for the
|
||||
* read/modify/write sequences.
|
||||
*/
|
||||
raw_spinlock_t pmu_lock;
|
||||
|
||||
/*
|
||||
* When using percpu IRQs, we need a percpu dev_id. Place it here as we
|
||||
* already have to allocate this struct per cpu.
|
||||
@ -189,4 +183,26 @@ void armpmu_free_irq(int irq, int cpu);
|
||||
#define ARMV8_SPE_PDEV_NAME "arm,spe-v1"
|
||||
#define ARMV8_TRBE_PDEV_NAME "arm,trbe"
|
||||
|
||||
/* Why does everything I do descend into this? */
|
||||
#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
|
||||
(lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
|
||||
|
||||
#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
|
||||
__GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
|
||||
|
||||
#define GEN_PMU_FORMAT_ATTR(name) \
|
||||
PMU_FORMAT_ATTR(name, \
|
||||
_GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \
|
||||
ATTR_CFG_FLD_##name##_LO, \
|
||||
ATTR_CFG_FLD_##name##_HI))
|
||||
|
||||
#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \
|
||||
((((attr)->cfg) >> lo) & GENMASK_ULL(hi - lo, 0))
|
||||
|
||||
#define ATTR_CFG_GET_FLD(attr, name) \
|
||||
_ATTR_CFG_GET_FLD(attr, \
|
||||
ATTR_CFG_FLD_##name##_CFG, \
|
||||
ATTR_CFG_FLD_##name##_LO, \
|
||||
ATTR_CFG_FLD_##name##_HI)
|
||||
|
||||
#endif /* __ARM_PMU_H__ */
|
||||
|
@ -215,21 +215,27 @@
|
||||
#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
|
||||
#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
|
||||
#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */
|
||||
#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
|
||||
#define ARMV8_PMU_PMCR_N_MASK 0x1f
|
||||
#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */
|
||||
#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */
|
||||
/* Mask for writable bits */
|
||||
#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \
|
||||
ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \
|
||||
ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \
|
||||
ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)
|
||||
|
||||
/*
|
||||
* PMOVSR: counters overflow flag status reg
|
||||
*/
|
||||
#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
|
||||
#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
|
||||
#define ARMV8_PMU_OVSR_P GENMASK(30, 0)
|
||||
#define ARMV8_PMU_OVSR_C BIT(31)
|
||||
/* Mask for writable bits is both P and C fields */
|
||||
#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C)
|
||||
|
||||
/*
|
||||
* PMXEVTYPER: Event selection reg
|
||||
*/
|
||||
#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
|
||||
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
|
||||
#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */
|
||||
#define ARMV8_PMU_EVTYPE_TH GENMASK_ULL(43, 32) /* arm64 only */
|
||||
#define ARMV8_PMU_EVTYPE_TC GENMASK_ULL(63, 61) /* arm64 only */
|
||||
|
||||
/*
|
||||
* Event filters for PMUv3
|
||||
@ -244,19 +250,19 @@
|
||||
/*
|
||||
* PMUSERENR: user enable reg
|
||||
*/
|
||||
#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
|
||||
#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
|
||||
#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
|
||||
#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
|
||||
#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
|
||||
/* Mask for writable bits */
|
||||
#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \
|
||||
ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER)
|
||||
|
||||
/* PMMIR_EL1.SLOTS mask */
|
||||
#define ARMV8_PMU_SLOTS_MASK 0xff
|
||||
|
||||
#define ARMV8_PMU_BUS_SLOTS_SHIFT 8
|
||||
#define ARMV8_PMU_BUS_SLOTS_MASK 0xff
|
||||
#define ARMV8_PMU_BUS_WIDTH_SHIFT 16
|
||||
#define ARMV8_PMU_BUS_WIDTH_MASK 0xf
|
||||
#define ARMV8_PMU_SLOTS GENMASK(7, 0)
|
||||
#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8)
|
||||
#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16)
|
||||
#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
|
||||
|
||||
/*
|
||||
* This code is really good
|
||||
|
@ -218,45 +218,54 @@
|
||||
#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
|
||||
#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
|
||||
#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */
|
||||
#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
|
||||
#define ARMV8_PMU_PMCR_N_MASK 0x1f
|
||||
#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */
|
||||
#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */
|
||||
/* Mask for writable bits */
|
||||
#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \
|
||||
ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \
|
||||
ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \
|
||||
ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)
|
||||
|
||||
/*
|
||||
* PMOVSR: counters overflow flag status reg
|
||||
*/
|
||||
#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
|
||||
#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
|
||||
#define ARMV8_PMU_OVSR_P GENMASK(30, 0)
|
||||
#define ARMV8_PMU_OVSR_C BIT(31)
|
||||
/* Mask for writable bits is both P and C fields */
|
||||
#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C)
|
||||
|
||||
/*
|
||||
* PMXEVTYPER: Event selection reg
|
||||
*/
|
||||
#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
|
||||
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
|
||||
#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */
|
||||
#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32)
|
||||
#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61)
|
||||
|
||||
/*
|
||||
* Event filters for PMUv3
|
||||
*/
|
||||
#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
|
||||
#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
|
||||
#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
|
||||
#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
|
||||
#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
|
||||
#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29)
|
||||
#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28)
|
||||
#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
|
||||
#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26)
|
||||
|
||||
/*
|
||||
* PMUSERENR: user enable reg
|
||||
*/
|
||||
#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
|
||||
#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
|
||||
#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
|
||||
#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
|
||||
#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
|
||||
/* Mask for writable bits */
|
||||
#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \
|
||||
ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER)
|
||||
|
||||
/* PMMIR_EL1.SLOTS mask */
|
||||
#define ARMV8_PMU_SLOTS_MASK 0xff
|
||||
|
||||
#define ARMV8_PMU_BUS_SLOTS_SHIFT 8
|
||||
#define ARMV8_PMU_BUS_SLOTS_MASK 0xff
|
||||
#define ARMV8_PMU_BUS_WIDTH_SHIFT 16
|
||||
#define ARMV8_PMU_BUS_WIDTH_MASK 0xf
|
||||
#define ARMV8_PMU_SLOTS GENMASK(7, 0)
|
||||
#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8)
|
||||
#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16)
|
||||
#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
|
||||
|
||||
/*
|
||||
* This code is really good
|
||||
|
@ -254,6 +254,12 @@ static int write_clone_read(void)
|
||||
putnum(++tests_run); \
|
||||
putstr(" " #name "\n");
|
||||
|
||||
#define skip_test(name) \
|
||||
tests_skipped++; \
|
||||
putstr("ok "); \
|
||||
putnum(++tests_run); \
|
||||
putstr(" # SKIP " #name "\n");
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret, i;
|
||||
@ -283,13 +289,11 @@ int main(int argc, char **argv)
|
||||
} else {
|
||||
putstr("# SME support not present\n");
|
||||
|
||||
for (i = 0; i < EXPECTED_TESTS; i++) {
|
||||
putstr("ok ");
|
||||
putnum(i);
|
||||
putstr(" skipped, TPIDR2 not supported\n");
|
||||
}
|
||||
|
||||
tests_skipped += EXPECTED_TESTS;
|
||||
skip_test(default_value);
|
||||
skip_test(write_read);
|
||||
skip_test(write_sleep_read);
|
||||
skip_test(write_fork_read);
|
||||
skip_test(write_clone_read);
|
||||
}
|
||||
|
||||
print_summary();
|
||||
|
@ -515,6 +515,10 @@ function barf
|
||||
mov x11, x1 // actual data
|
||||
mov x12, x2 // data size
|
||||
|
||||
#ifdef SSVE
|
||||
mrs x13, S3_3_C4_C2_2
|
||||
#endif
|
||||
|
||||
puts "Mismatch: PID="
|
||||
mov x0, x20
|
||||
bl putdec
|
||||
@ -534,6 +538,12 @@ function barf
|
||||
bl dumphex
|
||||
puts "]\n"
|
||||
|
||||
#ifdef SSVE
|
||||
puts "\tSVCR: "
|
||||
mov x0, x13
|
||||
bl putdecn
|
||||
#endif
|
||||
|
||||
mov x8, #__NR_getpid
|
||||
svc #0
|
||||
// fpsimd.c acitivty log dump hack
|
||||
|
@ -66,6 +66,11 @@ static struct vec_data vec_data[] = {
|
||||
},
|
||||
};
|
||||
|
||||
static bool vec_type_supported(struct vec_data *data)
|
||||
{
|
||||
return getauxval(data->hwcap_type) & data->hwcap;
|
||||
}
|
||||
|
||||
static int stdio_read_integer(FILE *f, const char *what, int *val)
|
||||
{
|
||||
int n = 0;
|
||||
@ -564,8 +569,11 @@ static void prctl_set_all_vqs(struct vec_data *data)
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vec_data); i++)
|
||||
for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
|
||||
if (!vec_type_supported(&vec_data[i]))
|
||||
continue;
|
||||
orig_vls[i] = vec_data[i].rdvl();
|
||||
}
|
||||
|
||||
for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
|
||||
vl = sve_vl_from_vq(vq);
|
||||
@ -594,7 +602,7 @@ static void prctl_set_all_vqs(struct vec_data *data)
|
||||
if (&vec_data[i] == data)
|
||||
continue;
|
||||
|
||||
if (!(getauxval(vec_data[i].hwcap_type) & vec_data[i].hwcap))
|
||||
if (!vec_type_supported(&vec_data[i]))
|
||||
continue;
|
||||
|
||||
if (vec_data[i].rdvl() != orig_vls[i]) {
|
||||
@ -765,7 +773,7 @@ int main(void)
|
||||
struct vec_data *data = &vec_data[i];
|
||||
unsigned long supported;
|
||||
|
||||
supported = getauxval(data->hwcap_type) & data->hwcap;
|
||||
supported = vec_type_supported(data);
|
||||
if (!supported)
|
||||
all_supported = false;
|
||||
|
||||
|
@ -333,6 +333,9 @@ function barf
|
||||
// mov w8, #__NR_exit
|
||||
// svc #0
|
||||
// end hack
|
||||
|
||||
mrs x13, S3_3_C4_C2_2
|
||||
|
||||
smstop
|
||||
mov x10, x0 // expected data
|
||||
mov x11, x1 // actual data
|
||||
@ -356,6 +359,9 @@ function barf
|
||||
mov x1, x12
|
||||
bl dumphex
|
||||
puts "]\n"
|
||||
puts "\tSVCR: "
|
||||
mov x0, x13
|
||||
bl putdecn
|
||||
|
||||
mov x8, #__NR_getpid
|
||||
svc #0
|
||||
|
@ -267,6 +267,8 @@ function barf
|
||||
// mov w8, #__NR_exit
|
||||
// svc #0
|
||||
// end hack
|
||||
|
||||
mrs x13, S3_3_C4_C2_2
|
||||
smstop
|
||||
mov x10, x0 // expected data
|
||||
mov x11, x1 // actual data
|
||||
@ -287,6 +289,9 @@ function barf
|
||||
mov x1, x12
|
||||
bl dumphex
|
||||
puts "]\n"
|
||||
puts "\tSVCR: "
|
||||
mov x0, x13
|
||||
bl putdecn
|
||||
|
||||
mov x8, #__NR_getpid
|
||||
svc #0
|
||||
|
@ -42,13 +42,12 @@ struct pmreg_sets {
|
||||
|
||||
static uint64_t get_pmcr_n(uint64_t pmcr)
|
||||
{
|
||||
return (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
|
||||
return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
|
||||
}
|
||||
|
||||
static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
|
||||
{
|
||||
*pmcr = *pmcr & ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
|
||||
*pmcr |= (pmcr_n << ARMV8_PMU_PMCR_N_SHIFT);
|
||||
u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
|
||||
}
|
||||
|
||||
static uint64_t get_counters_mask(uint64_t n)
|
||||
|
Loading…
Reference in New Issue
Block a user