arm64 updates for 6.7:

* Major refactoring of the CPU capability detection logic resulting in
   the removal of the cpus_have_const_cap() function and migrating the
   code to "alternative" branches where possible
 
 * Backtrace/kgdb: use IPIs and pseudo-NMI
 
 * Perf and PMU:
 
   - Add support for Ampere SoC PMUs
 
   - Multi-DTC improvements for larger CMN configurations with multiple
     Debug & Trace Controllers
 
   - Rework the Arm CoreSight PMU driver to allow separate registration of
     vendor backend modules
 
   - Fixes: add missing MODULE_DEVICE_TABLE to the amlogic perf
     driver; use device_get_match_data() in the xgene driver; fix NULL
     pointer dereference in the hisi driver caused by calling
     cpuhp_state_remove_instance(); use-after-free in the hisi driver
 
 * HWCAP updates:
 
   - FEAT_SVE_B16B16 (BFloat16)
 
   - FEAT_LRCPC3 (release consistency model)
 
   - FEAT_LSE128 (128-bit atomic instructions)
 
 * SVE: remove a couple of pseudo registers from the cpufeature code.
   There is logic in place already to detect mismatched SVE features
 
 * Miscellaneous:
 
   - Reduce the default swiotlb size (currently 64MB) if no ZONE_DMA
     bouncing is needed. The buffer is still required for small kmalloc()
     buffers
 
   - Fix module PLT counting with !RANDOMIZE_BASE
 
   - Restrict CPU_BIG_ENDIAN to LLVM IAS 15.x or newer move
     synchronisation code out of the set_ptes() loop
 
   - More compact cpufeature displaying enabled cores
 
   - Kselftest updates for the new CPU features
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE5RElWfyWxS+3PLO2a9axLQDIXvEFAmU7/QUACgkQa9axLQDI
 XvEx3xAAjICmHm+ryKJxS1IGXLYu2DXMcHUjeW6w1SxkK/vKhTMlHRx/CIWDze2l
 eENu7TcDLtTw+Gv9kqg30TSwzLfJhP9oFpX2T5TKkh5qlJlbz8fBtm+as14DTLCZ
 p2sra3J0w4B5JwTVqnj2RHOlEftMKvbyLGRkz3ve6wIUbsp5pXMkxAd/k3wOf0lC
 m6d9w1OMA2sOsw9YCgjcCNQGEzFMJk+13w7K+4w6A8Djn/Jxkt4fAFVn2ZlCiZzD
 NA2lTDWJqGmeGHo3iFdCTensWXmWTqjzxsNEf7PyBk5mBOdzDVxlTfEL7vnJg7gf
 BlTQ/nhIpra7rHQ9q2rwqEzbF+4Tn3uWlQfdDb7+/4goPjDh7tlBhEOYyOwTCEIT
 0t9cCSvBmSCKeXC3lKWWtJ+QJKhZHSmXN84EotTs65KyyfIsi4RuSezvV/+aIL86
 06sHYlYxETuujZP1cgOjf69Wsdsgizx0mqXJXf/xOjp22HFDcL4Bki6Rgi6t5OZj
 GEHG15kSE+eJ+RIpxpuAN8fdrlxYubsVLIksCqK7cZf9zXbQGIlifKAIrYiEx6kz
 FD+o+j/5niRWR6yJZCtCcGxqpSlwnYWPqc1Ds0GES8A/BphWMPozXUAZ0ll4Fnp1
 yyR2/Due/eBsCNESn579kP8989rashubB8vxvdx2fcWVtLC7VgE=
 =QaEo
 -----END PGP SIGNATURE-----

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:
 "No major architecture features this time around, just some new HWCAP
  definitions, support for the Ampere SoC PMUs and a few fixes/cleanups.

  The bulk of the changes is reworking of the CPU capability checking
  code (cpus_have_cap() etc).

   - Major refactoring of the CPU capability detection logic resulting
     in the removal of the cpus_have_const_cap() function and migrating
     the code to "alternative" branches where possible

   - Backtrace/kgdb: use IPIs and pseudo-NMI

   - Perf and PMU:

      - Add support for Ampere SoC PMUs

      - Multi-DTC improvements for larger CMN configurations with
        multiple Debug & Trace Controllers

      - Rework the Arm CoreSight PMU driver to allow separate
        registration of vendor backend modules

      - Fixes: add missing MODULE_DEVICE_TABLE to the amlogic perf
        driver; use device_get_match_data() in the xgene driver; fix
        NULL pointer dereference in the hisi driver caused by calling
        cpuhp_state_remove_instance(); use-after-free in the hisi driver

   - HWCAP updates:

      - FEAT_SVE_B16B16 (BFloat16)

      - FEAT_LRCPC3 (release consistency model)

      - FEAT_LSE128 (128-bit atomic instructions)

   - SVE: remove a couple of pseudo registers from the cpufeature code.
     There is logic in place already to detect mismatched SVE features

   - Miscellaneous:

      - Reduce the default swiotlb size (currently 64MB) if no ZONE_DMA
        bouncing is needed. The buffer is still required for small
        kmalloc() buffers

      - Fix module PLT counting with !RANDOMIZE_BASE

      - Restrict CPU_BIG_ENDIAN to LLVM IAS 15.x or newer move
        synchronisation code out of the set_ptes() loop

      - More compact cpufeature displaying enabled cores

      - Kselftest updates for the new CPU features"

 * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (83 commits)
  arm64: Restrict CPU_BIG_ENDIAN to GNU as or LLVM IAS 15.x or newer
  arm64: module: Fix PLT counting when CONFIG_RANDOMIZE_BASE=n
  arm64, irqchip/gic-v3, ACPI: Move MADT GICC enabled check into a helper
  perf: hisi: Fix use-after-free when register pmu fails
  drivers/perf: hisi_pcie: Initialize event->cpu only on success
  drivers/perf: hisi_pcie: Check the type first in pmu::event_init()
  arm64: cpufeature: Change DBM to display enabled cores
  arm64: cpufeature: Display the set of cores with a feature
  perf/arm-cmn: Enable per-DTC counter allocation
  perf/arm-cmn: Rework DTC counters (again)
  perf/arm-cmn: Fix DTC domain detection
  drivers: perf: arm_pmuv3: Drop some unused arguments from armv8_pmu_init()
  drivers: perf: arm_pmuv3: Read PMMIR_EL1 unconditionally
  drivers/perf: hisi: use cpuhp_state_remove_instance_nocalls() for hisi_hns3_pmu uninit process
  clocksource/drivers/arm_arch_timer: limit XGene-1 workaround
  arm64: Remove system_uses_lse_atomics()
  arm64: Mark the 'addr' argument to set_ptes() and __set_pte_at() as unused
  drivers/perf: xgene: Use device_get_match_data()
  perf/amlogic: add missing MODULE_DEVICE_TABLE
  arm64/mm: Hoist synchronization out of set_ptes() loop
  ...
This commit is contained in:
Linus Torvalds 2023-11-01 09:34:55 -10:00
commit 56ec8e4cd8
89 changed files with 1459 additions and 734 deletions

View File

@ -0,0 +1,29 @@
.. SPDX-License-Identifier: GPL-2.0
============================================
Ampere SoC Performance Monitoring Unit (PMU)
============================================
Ampere SoC PMU is a generic PMU IP that follows Arm CoreSight PMU architecture.
Therefore, the driver is implemented as a submodule of arm_cspmu driver. At the
first phase it's used for counting MCU events on AmpereOne.
MCU PMU events
--------------
The PMU driver supports setting filters for "rank", "bank", and "threshold".
Note, that the filters are per PMU instance rather than per event.
Example for perf tool use::
/ # perf list ampere
ampere_mcu_pmu_0/act_sent/ [Kernel PMU event]
<...>
ampere_mcu_pmu_1/rd_sent/ [Kernel PMU event]
<...>
/ # perf stat -a -e ampere_mcu_pmu_0/act_sent,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
sleep 1

View File

@ -22,3 +22,4 @@ Performance monitor support
nvidia-pmu
meson-ddr-pmu
cxl
ampere_cspmu

View File

@ -268,6 +268,8 @@ infrastructure:
+------------------------------+---------+---------+
| SHA3 | [35-32] | y |
+------------------------------+---------+---------+
| B16B16 | [27-24] | y |
+------------------------------+---------+---------+
| BF16 | [23-20] | y |
+------------------------------+---------+---------+
| BitPerm | [19-16] | y |

View File

@ -308,6 +308,15 @@ HWCAP2_MOPS
HWCAP2_HBC
Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001.
HWCAP2_SVE_B16B16
Functionality implied by ID_AA64ZFR0_EL1.B16B16 == 0b0001.
HWCAP2_LRCPC3
Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0011.
HWCAP2_LSE128
Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0011.
4. Unused AT_HWCAP bits
-----------------------

View File

@ -164,9 +164,6 @@ static int xen_starting_cpu(unsigned int cpu)
BUG_ON(err);
per_cpu(xen_vcpu, cpu) = vcpup;
if (!xen_kernel_unmapped_at_usr())
xen_setup_runstate_info(cpu);
after_register_vcpu_info:
enable_percpu_irq(xen_events_irq, 0);
return 0;
@ -523,9 +520,6 @@ static int __init xen_guest_init(void)
return -EINVAL;
}
if (!xen_kernel_unmapped_at_usr())
xen_time_setup_guest();
if (xen_initial_domain())
pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
@ -535,7 +529,13 @@ static int __init xen_guest_init(void)
}
early_initcall(xen_guest_init);
static int __init xen_pm_init(void)
static int xen_starting_runstate_cpu(unsigned int cpu)
{
xen_setup_runstate_info(cpu);
return 0;
}
static int __init xen_late_init(void)
{
if (!xen_domain())
return -ENODEV;
@ -548,9 +548,16 @@ static int __init xen_pm_init(void)
do_settimeofday64(&ts);
}
return 0;
if (xen_kernel_unmapped_at_usr())
return 0;
xen_time_setup_guest();
return cpuhp_setup_state(CPUHP_AP_ARM_XEN_RUNSTATE_STARTING,
"arm/xen_runstate:starting",
xen_starting_runstate_cpu, NULL);
}
late_initcall(xen_pm_init);
late_initcall(xen_late_init);
/* empty stubs */

View File

@ -1368,6 +1368,8 @@ choice
config CPU_BIG_ENDIAN
bool "Build big-endian kernel"
depends on !LD_IS_LLD || LLD_VERSION >= 130000
# https://github.com/llvm/llvm-project/commit/1379b150991f70a5782e9a143c2ba5308da1161c
depends on AS_IS_GNU || AS_VERSION >= 150000
help
Say Y if you plan on running a kernel with a big-endian userspace.

View File

@ -6,5 +6,5 @@ generic-y += qspinlock.h
generic-y += parport.h
generic-y += user.h
generated-y += cpucaps.h
generated-y += cpucap-defs.h
generated-y += sysreg-defs.h

View File

@ -226,8 +226,8 @@ alternative_endif
static __always_inline bool
alternative_has_cap_likely(const unsigned long cpucap)
{
compiletime_assert(cpucap < ARM64_NCAPS,
"cpucap must be < ARM64_NCAPS");
if (!cpucap_is_possible(cpucap))
return false;
asm_volatile_goto(
ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops)
@ -244,8 +244,8 @@ alternative_has_cap_likely(const unsigned long cpucap)
static __always_inline bool
alternative_has_cap_unlikely(const unsigned long cpucap)
{
compiletime_assert(cpucap < ARM64_NCAPS,
"cpucap must be < ARM64_NCAPS");
if (!cpucap_is_possible(cpucap))
return false;
asm_volatile_goto(
ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap])

View File

@ -79,6 +79,14 @@ static inline u64 gic_read_iar_cavium_thunderx(void)
return 0x3ff;
}
static u64 __maybe_unused gic_read_iar(void)
{
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_CAVIUM_23154))
return gic_read_iar_cavium_thunderx();
else
return gic_read_iar_common();
}
static inline void gic_write_ctlr(u32 val)
{
write_sysreg_s(val, SYS_ICC_CTLR_EL1);

View File

@ -63,7 +63,7 @@ static __always_inline bool __cpu_has_rng(void)
{
if (unlikely(!system_capabilities_finalized() && !preemptible()))
return this_cpu_has_cap(ARM64_HAS_RNG);
return cpus_have_const_cap(ARM64_HAS_RNG);
return alternative_has_cap_unlikely(ARM64_HAS_RNG);
}
static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)

View File

@ -132,7 +132,7 @@ void flush_dcache_folio(struct folio *);
static __always_inline void icache_inval_all_pou(void)
{
if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
if (alternative_has_cap_unlikely(ARM64_HAS_CACHE_DIC))
return;
asm("ic ialluis");

View File

@ -63,12 +63,6 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64smfr0;
struct cpuinfo_32bit aarch32;
/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
u64 reg_zcr;
/* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */
u64 reg_smcr;
};
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);

View File

@ -0,0 +1,67 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ASM_CPUCAPS_H
#define __ASM_CPUCAPS_H
#include <asm/cpucap-defs.h>
#ifndef __ASSEMBLY__
#include <linux/types.h>
/*
* Check whether a cpucap is possible at compiletime.
*/
static __always_inline bool
cpucap_is_possible(const unsigned int cap)
{
compiletime_assert(__builtin_constant_p(cap),
"cap must be a constant");
compiletime_assert(cap < ARM64_NCAPS,
"cap must be < ARM64_NCAPS");
switch (cap) {
case ARM64_HAS_PAN:
return IS_ENABLED(CONFIG_ARM64_PAN);
case ARM64_HAS_EPAN:
return IS_ENABLED(CONFIG_ARM64_EPAN);
case ARM64_SVE:
return IS_ENABLED(CONFIG_ARM64_SVE);
case ARM64_SME:
case ARM64_SME2:
case ARM64_SME_FA64:
return IS_ENABLED(CONFIG_ARM64_SME);
case ARM64_HAS_CNP:
return IS_ENABLED(CONFIG_ARM64_CNP);
case ARM64_HAS_ADDRESS_AUTH:
case ARM64_HAS_GENERIC_AUTH:
return IS_ENABLED(CONFIG_ARM64_PTR_AUTH);
case ARM64_HAS_GIC_PRIO_MASKING:
return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI);
case ARM64_MTE:
return IS_ENABLED(CONFIG_ARM64_MTE);
case ARM64_BTI:
return IS_ENABLED(CONFIG_ARM64_BTI);
case ARM64_HAS_TLB_RANGE:
return IS_ENABLED(CONFIG_ARM64_TLB_RANGE);
case ARM64_UNMAP_KERNEL_AT_EL0:
return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0);
case ARM64_WORKAROUND_843419:
return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419);
case ARM64_WORKAROUND_1742098:
return IS_ENABLED(CONFIG_ARM64_ERRATUM_1742098);
case ARM64_WORKAROUND_2645198:
return IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198);
case ARM64_WORKAROUND_2658417:
return IS_ENABLED(CONFIG_ARM64_ERRATUM_2658417);
case ARM64_WORKAROUND_CAVIUM_23154:
return IS_ENABLED(CONFIG_CAVIUM_ERRATUM_23154);
case ARM64_WORKAROUND_NVIDIA_CARMEL_CNP:
return IS_ENABLED(CONFIG_NVIDIA_CARMEL_CNP_ERRATUM);
case ARM64_WORKAROUND_REPEAT_TLBI:
return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI);
}
return true;
}
#endif /* __ASSEMBLY__ */
#endif /* __ASM_CPUCAPS_H */

View File

@ -23,6 +23,7 @@
#include <linux/bug.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/cpumask.h>
/*
* CPU feature register tracking
@ -380,6 +381,7 @@ struct arm64_cpu_capabilities {
* method is robust against being called multiple times.
*/
const struct arm64_cpu_capabilities *match_list;
const struct cpumask *cpus;
};
static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
@ -438,6 +440,11 @@ unsigned long cpu_get_elf_hwcap2(void);
#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name))
#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
static __always_inline bool boot_capabilities_finalized(void)
{
return alternative_has_cap_likely(ARM64_ALWAYS_BOOT);
}
static __always_inline bool system_capabilities_finalized(void)
{
return alternative_has_cap_likely(ARM64_ALWAYS_SYSTEM);
@ -450,6 +457,8 @@ static __always_inline bool system_capabilities_finalized(void)
*/
static __always_inline bool cpus_have_cap(unsigned int num)
{
if (__builtin_constant_p(num) && !cpucap_is_possible(num))
return false;
if (num >= ARM64_NCAPS)
return false;
return arch_test_bit(num, system_cpucaps);
@ -458,55 +467,37 @@ static __always_inline bool cpus_have_cap(unsigned int num)
/*
* Test for a capability without a runtime check.
*
* Before capabilities are finalized, this returns false.
* After capabilities are finalized, this is patched to avoid a runtime check.
* Before boot capabilities are finalized, this will BUG().
* After boot capabilities are finalized, this is patched to avoid a runtime
* check.
*
* @num must be a compile-time constant.
*/
static __always_inline bool __cpus_have_const_cap(int num)
static __always_inline bool cpus_have_final_boot_cap(int num)
{
if (num >= ARM64_NCAPS)
return false;
return alternative_has_cap_unlikely(num);
if (boot_capabilities_finalized())
return alternative_has_cap_unlikely(num);
else
BUG();
}
/*
* Test for a capability without a runtime check.
*
* Before capabilities are finalized, this will BUG().
* After capabilities are finalized, this is patched to avoid a runtime check.
* Before system capabilities are finalized, this will BUG().
* After system capabilities are finalized, this is patched to avoid a runtime
* check.
*
* @num must be a compile-time constant.
*/
static __always_inline bool cpus_have_final_cap(int num)
{
if (system_capabilities_finalized())
return __cpus_have_const_cap(num);
return alternative_has_cap_unlikely(num);
else
BUG();
}
/*
* Test for a capability, possibly with a runtime check for non-hyp code.
*
* For hyp code, this behaves the same as cpus_have_final_cap().
*
* For non-hyp code:
* Before capabilities are finalized, this behaves as cpus_have_cap().
* After capabilities are finalized, this is patched to avoid a runtime check.
*
* @num must be a compile-time constant.
*/
static __always_inline bool cpus_have_const_cap(int num)
{
if (is_hyp_code())
return cpus_have_final_cap(num);
else if (system_capabilities_finalized())
return __cpus_have_const_cap(num);
else
return cpus_have_cap(num);
}
static inline int __attribute_const__
cpuid_feature_extract_signed_field_width(u64 features, int field, int width)
{
@ -626,7 +617,9 @@ static inline bool id_aa64pfr1_mte(u64 pfr1)
return val >= ID_AA64PFR1_EL1_MTE_MTE2;
}
void __init setup_cpu_features(void);
void __init setup_system_features(void);
void __init setup_user_features(void);
void check_local_cpu_capabilities(void);
u64 read_sanitised_ftr_reg(u32 id);
@ -735,13 +728,12 @@ static inline bool system_supports_mixed_endian(void)
static __always_inline bool system_supports_fpsimd(void)
{
return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
return alternative_has_cap_likely(ARM64_HAS_FPSIMD);
}
static inline bool system_uses_hw_pan(void)
{
return IS_ENABLED(CONFIG_ARM64_PAN) &&
cpus_have_const_cap(ARM64_HAS_PAN);
return alternative_has_cap_unlikely(ARM64_HAS_PAN);
}
static inline bool system_uses_ttbr0_pan(void)
@ -752,26 +744,22 @@ static inline bool system_uses_ttbr0_pan(void)
static __always_inline bool system_supports_sve(void)
{
return IS_ENABLED(CONFIG_ARM64_SVE) &&
cpus_have_const_cap(ARM64_SVE);
return alternative_has_cap_unlikely(ARM64_SVE);
}
static __always_inline bool system_supports_sme(void)
{
return IS_ENABLED(CONFIG_ARM64_SME) &&
cpus_have_const_cap(ARM64_SME);
return alternative_has_cap_unlikely(ARM64_SME);
}
static __always_inline bool system_supports_sme2(void)
{
return IS_ENABLED(CONFIG_ARM64_SME) &&
cpus_have_const_cap(ARM64_SME2);
return alternative_has_cap_unlikely(ARM64_SME2);
}
static __always_inline bool system_supports_fa64(void)
{
return IS_ENABLED(CONFIG_ARM64_SME) &&
cpus_have_const_cap(ARM64_SME_FA64);
return alternative_has_cap_unlikely(ARM64_SME_FA64);
}
static __always_inline bool system_supports_tpidr2(void)
@ -781,20 +769,17 @@ static __always_inline bool system_supports_tpidr2(void)
static __always_inline bool system_supports_cnp(void)
{
return IS_ENABLED(CONFIG_ARM64_CNP) &&
cpus_have_const_cap(ARM64_HAS_CNP);
return alternative_has_cap_unlikely(ARM64_HAS_CNP);
}
static inline bool system_supports_address_auth(void)
{
return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH);
return cpus_have_final_boot_cap(ARM64_HAS_ADDRESS_AUTH);
}
static inline bool system_supports_generic_auth(void)
{
return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH);
return alternative_has_cap_unlikely(ARM64_HAS_GENERIC_AUTH);
}
static inline bool system_has_full_ptr_auth(void)
@ -804,14 +789,12 @@ static inline bool system_has_full_ptr_auth(void)
static __always_inline bool system_uses_irq_prio_masking(void)
{
return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
cpus_have_const_cap(ARM64_HAS_GIC_PRIO_MASKING);
return alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
}
static inline bool system_supports_mte(void)
{
return IS_ENABLED(CONFIG_ARM64_MTE) &&
cpus_have_const_cap(ARM64_MTE);
return alternative_has_cap_unlikely(ARM64_MTE);
}
static inline bool system_has_prio_mask_debugging(void)
@ -822,13 +805,18 @@ static inline bool system_has_prio_mask_debugging(void)
static inline bool system_supports_bti(void)
{
return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI);
return cpus_have_final_cap(ARM64_BTI);
}
static inline bool system_supports_bti_kernel(void)
{
return IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) &&
cpus_have_final_boot_cap(ARM64_BTI);
}
static inline bool system_supports_tlb_range(void)
{
return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) &&
cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE);
}
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);

View File

@ -86,7 +86,8 @@
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define ARM_CPU_PART_CORTEX_A78C 0xD4B
#define APM_CPU_PART_POTENZA 0x000
#define APM_CPU_PART_XGENE 0x000
#define APM_CPU_VAR_POTENZA 0x00
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2

View File

@ -32,6 +32,32 @@
#define VFP_STATE_SIZE ((32 * 8) + 4)
#endif
static inline unsigned long cpacr_save_enable_kernel_sve(void)
{
unsigned long old = read_sysreg(cpacr_el1);
unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL1EN;
write_sysreg(old | set, cpacr_el1);
isb();
return old;
}
static inline unsigned long cpacr_save_enable_kernel_sme(void)
{
unsigned long old = read_sysreg(cpacr_el1);
unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_SMEN_EL1EN;
write_sysreg(old | set, cpacr_el1);
isb();
return old;
}
static inline void cpacr_restore(unsigned long cpacr)
{
write_sysreg(cpacr, cpacr_el1);
isb();
}
/*
* When we defined the maximum SVE vector length we defined the ABI so
* that the maximum vector length included all the reserved for future
@ -123,12 +149,12 @@ extern void sme_save_state(void *state, int zt);
extern void sme_load_state(void const *state, int zt);
struct arm64_cpu_capabilities;
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void);
extern u64 read_smcr_features(void);
/*

View File

@ -139,6 +139,9 @@
#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16)
#define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS)
#define KERNEL_HWCAP_HBC __khwcap2_feature(HBC)
#define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16)
#define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3)
#define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128)
/*
* This yields a mask that user programs can use to figure out what

View File

@ -6,6 +6,9 @@
#include <asm-generic/irq.h>
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
struct pt_regs;
int set_handle_irq(void (*handle_irq)(struct pt_regs *));

View File

@ -21,12 +21,6 @@
* exceptions should be unmasked.
*/
static __always_inline bool __irqflags_uses_pmr(void)
{
return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
}
static __always_inline void __daif_local_irq_enable(void)
{
barrier();
@ -49,7 +43,7 @@ static __always_inline void __pmr_local_irq_enable(void)
static inline void arch_local_irq_enable(void)
{
if (__irqflags_uses_pmr()) {
if (system_uses_irq_prio_masking()) {
__pmr_local_irq_enable();
} else {
__daif_local_irq_enable();
@ -77,7 +71,7 @@ static __always_inline void __pmr_local_irq_disable(void)
static inline void arch_local_irq_disable(void)
{
if (__irqflags_uses_pmr()) {
if (system_uses_irq_prio_masking()) {
__pmr_local_irq_disable();
} else {
__daif_local_irq_disable();
@ -99,7 +93,7 @@ static __always_inline unsigned long __pmr_local_save_flags(void)
*/
static inline unsigned long arch_local_save_flags(void)
{
if (__irqflags_uses_pmr()) {
if (system_uses_irq_prio_masking()) {
return __pmr_local_save_flags();
} else {
return __daif_local_save_flags();
@ -118,7 +112,7 @@ static __always_inline bool __pmr_irqs_disabled_flags(unsigned long flags)
static inline bool arch_irqs_disabled_flags(unsigned long flags)
{
if (__irqflags_uses_pmr()) {
if (system_uses_irq_prio_masking()) {
return __pmr_irqs_disabled_flags(flags);
} else {
return __daif_irqs_disabled_flags(flags);
@ -137,7 +131,7 @@ static __always_inline bool __pmr_irqs_disabled(void)
static inline bool arch_irqs_disabled(void)
{
if (__irqflags_uses_pmr()) {
if (system_uses_irq_prio_masking()) {
return __pmr_irqs_disabled();
} else {
return __daif_irqs_disabled();
@ -169,7 +163,7 @@ static __always_inline unsigned long __pmr_local_irq_save(void)
static inline unsigned long arch_local_irq_save(void)
{
if (__irqflags_uses_pmr()) {
if (system_uses_irq_prio_masking()) {
return __pmr_local_irq_save();
} else {
return __daif_local_irq_save();
@ -196,7 +190,7 @@ static __always_inline void __pmr_local_irq_restore(unsigned long flags)
*/
static inline void arch_local_irq_restore(unsigned long flags)
{
if (__irqflags_uses_pmr()) {
if (system_uses_irq_prio_masking()) {
__pmr_local_irq_restore(flags);
} else {
__daif_local_irq_restore(flags);

View File

@ -71,14 +71,14 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
if (has_vhe() || has_hvhe())
vcpu->arch.hcr_el2 |= HCR_E2H;
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
/* route synchronous external abort exceptions to EL2 */
vcpu->arch.hcr_el2 |= HCR_TEA;
/* trap error record accesses */
vcpu->arch.hcr_el2 |= HCR_TERR;
}
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
vcpu->arch.hcr_el2 |= HCR_FWB;
} else {
/*

View File

@ -1052,7 +1052,7 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
static inline bool kvm_system_needs_idmapped_vectors(void)
{
return cpus_have_const_cap(ARM64_SPECTRE_V3A);
return cpus_have_final_cap(ARM64_SPECTRE_V3A);
}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}

View File

@ -218,7 +218,7 @@ static inline void __clean_dcache_guest_page(void *va, size_t size)
* faulting in pages. Furthermore, FWB implies IDC, so cleaning to
* PoU is not required either in this case.
*/
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
return;
kvm_flush_dcache_to_poc(va, size);

View File

@ -16,14 +16,9 @@
#include <asm/atomic_lse.h>
#include <asm/cpucaps.h>
static __always_inline bool system_uses_lse_atomics(void)
{
return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS);
}
#define __lse_ll_sc_body(op, ...) \
({ \
system_uses_lse_atomics() ? \
alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS) ? \
__lse_##op(__VA_ARGS__) : \
__ll_sc_##op(__VA_ARGS__); \
})
@ -34,8 +29,6 @@ static __always_inline bool system_uses_lse_atomics(void)
#else /* CONFIG_ARM64_LSE_ATOMICS */
static inline bool system_uses_lse_atomics(void) { return false; }
#define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__)
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc

View File

@ -57,7 +57,7 @@ typedef struct {
static inline bool arm64_kernel_unmapped_at_el0(void)
{
return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
return alternative_has_cap_unlikely(ARM64_UNMAP_KERNEL_AT_EL0);
}
extern void arm64_memblock_init(void);

View File

@ -152,7 +152,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
*/
static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
static inline void __cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap, bool cnp)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
@ -162,17 +162,8 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
/* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) {
/*
* cpu_replace_ttbr1() is used when there's a boot CPU
* up (i.e. cpufeature framework is not up yet) and
* latter only when we enable CNP via cpufeature's
* enable() callback.
* Also we rely on the system_cpucaps bit being set before
* calling the enable() function.
*/
if (cnp)
ttbr1 |= TTBR_CNP_BIT;
}
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
@ -189,6 +180,21 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
cpu_uninstall_idmap();
}
static inline void cpu_enable_swapper_cnp(void)
{
__cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir, true);
}
static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
{
/*
* Only for early TTBR1 replacement before cpucaps are finalized and
* before we've decided whether to use CNP.
*/
WARN_ON(system_capabilities_finalized());
__cpu_replace_ttbr1(pgdp, idmap, false);
}
/*
* It would be nice to return ASIDs back to the allocator, but unfortunately
* that introduces a race with a generation rollover where we could erroneously

View File

@ -44,8 +44,7 @@ struct plt_entry {
static inline bool is_forbidden_offset_for_adrp(void *place)
{
return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
cpus_have_const_cap(ARM64_WORKAROUND_843419) &&
return cpus_have_final_cap(ARM64_WORKAROUND_843419) &&
((u64)place & 0xfff) >= 0xff8;
}

View File

@ -90,7 +90,7 @@ static inline bool try_page_mte_tagging(struct page *page)
}
void mte_zero_clear_page_tags(void *addr);
void mte_sync_tags(pte_t pte);
void mte_sync_tags(pte_t pte, unsigned int nr_pages);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
@ -122,7 +122,7 @@ static inline bool try_page_mte_tagging(struct page *page)
static inline void mte_zero_clear_page_tags(void *addr)
{
}
static inline void mte_sync_tags(pte_t pte)
static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)

View File

@ -75,11 +75,7 @@ extern bool arm64_use_ng_mappings;
* If we have userspace only BTI we don't want to mark kernel pages
* guarded even if the system does support BTI.
*/
#ifdef CONFIG_ARM64_BTI_KERNEL
#define PTE_MAYBE_GP (system_supports_bti() ? PTE_GP : 0)
#else
#define PTE_MAYBE_GP 0
#endif
#define PTE_MAYBE_GP (system_supports_bti_kernel() ? PTE_GP : 0)
#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO)

View File

@ -325,8 +325,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
__func__, pte_val(old_pte), pte_val(pte));
}
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
{
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
@ -339,24 +338,22 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
*/
if (system_supports_mte() && pte_access_permitted(pte, false) &&
!pte_special(pte) && pte_tagged(pte))
mte_sync_tags(pte);
__check_safe_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
mte_sync_tags(pte, nr_pages);
}
static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned int nr)
static inline void set_ptes(struct mm_struct *mm,
unsigned long __always_unused addr,
pte_t *ptep, pte_t pte, unsigned int nr)
{
page_table_check_ptes_set(mm, ptep, pte, nr);
__sync_cache_and_tags(pte, nr);
for (;;) {
__set_pte_at(mm, addr, ptep, pte);
__check_safe_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
if (--nr == 0)
break;
ptep++;
addr += PAGE_SIZE;
pte_val(pte) += PAGE_SIZE;
}
}
@ -531,18 +528,29 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
static inline void __set_pte_at(struct mm_struct *mm,
unsigned long __always_unused addr,
pte_t *ptep, pte_t pte, unsigned int nr)
{
__sync_cache_and_tags(pte, nr);
__check_safe_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
page_table_check_pmd_set(mm, pmdp, pmd);
return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd),
PMD_SIZE >> PAGE_SHIFT);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
page_table_check_pud_set(mm, pudp, pud);
return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud),
PUD_SIZE >> PAGE_SHIFT);
}
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))

View File

@ -89,9 +89,9 @@ extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
extern void arch_send_wakeup_ipi(unsigned int cpu);
#else
static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
static inline void arch_send_wakeup_ipi(unsigned int cpu)
{
BUILD_BUG();
}

View File

@ -73,7 +73,7 @@ static __always_inline void arm64_apply_bp_hardening(void)
{
struct bp_hardening_data *d;
if (!cpus_have_const_cap(ARM64_SPECTRE_V2))
if (!alternative_has_cap_unlikely(ARM64_SPECTRE_V2))
return;
d = this_cpu_ptr(&bp_hardening_data);

View File

@ -105,7 +105,7 @@ static inline unsigned long get_trans_granule(void)
#define __tlbi_level(op, addr, level) do { \
u64 arg = addr; \
\
if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \
level) { \
u64 ttl = level & 3; \
ttl |= get_trans_granule() << 2; \
@ -284,16 +284,15 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
{
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
/*
* TLB flush deferral is not required on systems which are affected by
* ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
* will have two consecutive TLBI instructions with a dsb(ish) in between
* defeating the purpose (i.e save overall 'dsb ish' cost).
*/
if (unlikely(cpus_have_const_cap(ARM64_WORKAROUND_REPEAT_TLBI)))
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI))
return false;
#endif
return true;
}

View File

@ -62,7 +62,7 @@ DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector);
static inline const char *
arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot)
{
if (arm64_kernel_unmapped_at_el0())
if (cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
return (char *)(TRAMP_VALIAS + SZ_2K * slot);
WARN_ON_ONCE(slot == EL1_VECTOR_KPTI);

View File

@ -104,5 +104,8 @@
#define HWCAP2_SME_F16F16 (1UL << 42)
#define HWCAP2_MOPS (1UL << 43)
#define HWCAP2_HBC (1UL << 44)
#define HWCAP2_SVE_B16B16 (1UL << 45)
#define HWCAP2_LRCPC3 (1UL << 46)
#define HWCAP2_LSE128 (1UL << 47)
#endif /* _UAPI__ASM_HWCAP_H */

View File

@ -103,7 +103,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
&mailbox->entry_point);
writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
arch_send_wakeup_ipi_mask(cpumask_of(cpu));
arch_send_wakeup_ipi(cpu);
return 0;
}

View File

@ -121,22 +121,6 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0);
}
static DEFINE_RAW_SPINLOCK(reg_user_mask_modification);
static void __maybe_unused
cpu_clear_bf16_from_user_emulation(const struct arm64_cpu_capabilities *__unused)
{
struct arm64_ftr_reg *regp;
regp = get_arm64_ftr_reg(SYS_ID_AA64ISAR1_EL1);
if (!regp)
return;
raw_spin_lock(&reg_user_mask_modification);
if (regp->user_mask & ID_AA64ISAR1_EL1_BF16_MASK)
regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK;
raw_spin_unlock(&reg_user_mask_modification);
}
#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \
.matches = is_affected_midr_range, \
.midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max)
@ -727,7 +711,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
/* Cortex-A510 r0p0 - r1p1 */
ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
.cpu_enable = cpu_clear_bf16_from_user_emulation,
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_2966298

View File

@ -278,6 +278,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_B16B16_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
@ -611,18 +613,6 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = {
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_zcr[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_WIDTH, 0), /* LEN */
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_smcr[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_WIDTH, 0), /* LEN */
ARM64_FTR_END,
};
/*
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
@ -735,10 +725,6 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
/* Op1 = 0, CRn = 1, CRm = 2 */
ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr),
/* Op1 = 1, CRn = 0, CRm = 0 */
ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
@ -1040,22 +1026,26 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
info->reg_zcr = read_zcr_features();
init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
unsigned long cpacr = cpacr_save_enable_kernel_sve();
vec_init_vq_map(ARM64_VEC_SVE);
cpacr_restore(cpacr);
}
if (IS_ENABLED(CONFIG_ARM64_SME) &&
id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
info->reg_smcr = read_smcr_features();
unsigned long cpacr = cpacr_save_enable_kernel_sme();
/*
* We mask out SMPS since even if the hardware
* supports priorities the kernel does not at present
* and we block access to them.
*/
info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
vec_init_vq_map(ARM64_VEC_SME);
cpacr_restore(cpacr);
}
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
@ -1289,32 +1279,34 @@ void update_cpu_features(int cpu,
taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
/* Probe vector lengths */
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
info->reg_zcr = read_zcr_features();
taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
info->reg_zcr, boot->reg_zcr);
if (!system_capabilities_finalized()) {
unsigned long cpacr = cpacr_save_enable_kernel_sve();
/* Probe vector lengths */
if (!system_capabilities_finalized())
vec_update_vq_map(ARM64_VEC_SVE);
cpacr_restore(cpacr);
}
}
if (IS_ENABLED(CONFIG_ARM64_SME) &&
id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
info->reg_smcr = read_smcr_features();
unsigned long cpacr = cpacr_save_enable_kernel_sme();
/*
* We mask out SMPS since even if the hardware
* supports priorities the kernel does not at present
* and we block access to them.
*/
info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu,
info->reg_smcr, boot->reg_smcr);
/* Probe vector lengths */
if (!system_capabilities_finalized())
vec_update_vq_map(ARM64_VEC_SME);
cpacr_restore(cpacr);
}
/*
@ -1564,14 +1556,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _
MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
}
static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
{
u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
return cpuid_feature_extract_signed_field(pfr0,
ID_AA64PFR0_EL1_FP_SHIFT) < 0;
}
static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
int scope)
{
@ -1621,7 +1605,7 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
if (is_kdump_kernel())
return false;
if (cpus_have_const_cap(ARM64_WORKAROUND_NVIDIA_CARMEL_CNP))
if (cpus_have_cap(ARM64_WORKAROUND_NVIDIA_CARMEL_CNP))
return false;
return has_cpuid_feature(entry, scope);
@ -1754,16 +1738,15 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(int), int flags);
static phys_addr_t kpti_ng_temp_alloc;
static phys_addr_t __initdata kpti_ng_temp_alloc;
static phys_addr_t kpti_ng_pgd_alloc(int shift)
static phys_addr_t __init kpti_ng_pgd_alloc(int shift)
{
kpti_ng_temp_alloc -= PAGE_SIZE;
return kpti_ng_temp_alloc;
}
static void
kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
static int __init __kpti_install_ng_mappings(void *__unused)
{
typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
extern kpti_remap_fn idmap_kpti_install_ng_mappings;
@ -1776,20 +1759,6 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
pgd_t *kpti_ng_temp_pgd;
u64 alloc = 0;
if (__this_cpu_read(this_cpu_vector) == vectors) {
const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
__this_cpu_write(this_cpu_vector, v);
}
/*
* We don't need to rewrite the page-tables if either we've done
* it already or we have KASLR enabled and therefore have not
* created any global mappings at all.
*/
if (arm64_use_ng_mappings)
return;
remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
if (!cpu) {
@ -1826,14 +1795,39 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
free_pages(alloc, order);
arm64_use_ng_mappings = true;
}
return 0;
}
static void __init kpti_install_ng_mappings(void)
{
/*
* We don't need to rewrite the page-tables if either we've done
* it already or we have KASLR enabled and therefore have not
* created any global mappings at all.
*/
if (arm64_use_ng_mappings)
return;
stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask);
}
#else
static void
kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
static inline void kpti_install_ng_mappings(void)
{
}
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
static void cpu_enable_kpti(struct arm64_cpu_capabilities const *cap)
{
if (__this_cpu_read(this_cpu_vector) == vectors) {
const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
__this_cpu_write(this_cpu_vector, v);
}
}
static int __init parse_kpti(char *str)
{
bool enabled;
@ -1848,6 +1842,8 @@ static int __init parse_kpti(char *str)
early_param("kpti", parse_kpti);
#ifdef CONFIG_ARM64_HW_AFDBM
static struct cpumask dbm_cpus __read_mostly;
static inline void __cpu_enable_hw_dbm(void)
{
u64 tcr = read_sysreg(tcr_el1) | TCR_HD;
@ -1883,35 +1879,22 @@ static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap)
static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap)
{
if (cpu_can_use_dbm(cap))
if (cpu_can_use_dbm(cap)) {
__cpu_enable_hw_dbm();
cpumask_set_cpu(smp_processor_id(), &dbm_cpus);
}
}
static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
int __unused)
{
static bool detected = false;
/*
* DBM is a non-conflicting feature. i.e, the kernel can safely
* run a mix of CPUs with and without the feature. So, we
* unconditionally enable the capability to allow any late CPU
* to use the feature. We only enable the control bits on the
* CPU, if it actually supports.
*
* We have to make sure we print the "feature" detection only
* when at least one CPU actually uses it. So check if this CPU
* can actually use it and print the message exactly once.
*
* This is safe as all CPUs (including secondary CPUs - due to the
* LOCAL_CPU scope - and the hotplugged CPUs - via verification)
* goes through the "matches" check exactly once. Also if a CPU
* matches the criteria, it is guaranteed that the CPU will turn
* the DBM on, as the capability is unconditionally enabled.
* CPU, if it is supported.
*/
if (!detected && cpu_can_use_dbm(cap)) {
detected = true;
pr_info("detected: Hardware dirty bit management\n");
}
return true;
}
@ -1944,8 +1927,6 @@ int get_cpu_with_amu_feat(void)
static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
{
if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) {
pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
smp_processor_id());
cpumask_set_cpu(smp_processor_id(), &amu_cpus);
/* 0 reference values signal broken/disabled counters */
@ -2190,12 +2171,23 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
}
#endif /* CONFIG_ARM64_MTE */
static void user_feature_fixup(void)
{
if (cpus_have_cap(ARM64_WORKAROUND_2658417)) {
struct arm64_ftr_reg *regp;
regp = get_arm64_ftr_reg(SYS_ID_AA64ISAR1_EL1);
if (regp)
regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK;
}
}
static void elf_hwcap_fixup(void)
{
#ifdef CONFIG_ARM64_ERRATUM_1742098
if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
#ifdef CONFIG_COMPAT
if (cpus_have_cap(ARM64_WORKAROUND_1742098))
compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
#endif /* ARM64_ERRATUM_1742098 */
#endif /* CONFIG_COMPAT */
}
#ifdef CONFIG_KVM
@ -2351,7 +2343,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.desc = "Kernel page table isolation (KPTI)",
.capability = ARM64_UNMAP_KERNEL_AT_EL0,
.type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE,
.cpu_enable = kpti_install_ng_mappings,
.cpu_enable = cpu_enable_kpti,
.matches = unmap_kernel_at_el0,
/*
* The ID feature fields below are used to indicate that
@ -2361,11 +2353,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, CSV3, IMP)
},
{
/* FP/SIMD is not implemented */
.capability = ARM64_HAS_NO_FPSIMD,
.type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE,
.min_field_value = 0,
.matches = has_no_fpsimd,
.capability = ARM64_HAS_FPSIMD,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.cpu_enable = cpu_enable_fpsimd,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, FP, IMP)
},
#ifdef CONFIG_ARM64_PMEM
{
@ -2388,7 +2380,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.desc = "Scalable Vector Extension",
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_SVE,
.cpu_enable = sve_kernel_enable,
.cpu_enable = cpu_enable_sve,
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, SVE, IMP)
},
@ -2405,16 +2397,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_AMU_EXTN
{
/*
* The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y.
* Therefore, don't provide .desc as we don't want the detection
* message to be shown until at least one CPU is detected to
* support the feature.
*/
.desc = "Activity Monitors Unit (AMU)",
.capability = ARM64_HAS_AMU_EXTN,
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.matches = has_amu,
.cpu_enable = cpu_amu_enable,
.cpus = &amu_cpus,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, AMU, IMP)
},
#endif /* CONFIG_ARM64_AMU_EXTN */
@ -2454,18 +2442,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
},
#ifdef CONFIG_ARM64_HW_AFDBM
{
/*
* Since we turn this on always, we don't want the user to
* think that the feature is available when it may not be.
* So hide the description.
*
* .desc = "Hardware pagetable Dirty Bit Management",
*
*/
.desc = "Hardware dirty bit management",
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.capability = ARM64_HW_DBM,
.matches = has_hw_dbm,
.cpu_enable = cpu_enable_hw_dbm,
.cpus = &dbm_cpus,
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
},
#endif
@ -2641,7 +2623,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_SME,
.matches = has_cpuid_feature,
.cpu_enable = sme_kernel_enable,
.cpu_enable = cpu_enable_sme,
ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, IMP)
},
/* FA64 should be sorted after the base SME capability */
@ -2650,7 +2632,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_SME_FA64,
.matches = has_cpuid_feature,
.cpu_enable = fa64_kernel_enable,
.cpu_enable = cpu_enable_fa64,
ARM64_CPUID_FIELDS(ID_AA64SMFR0_EL1, FA64, IMP)
},
{
@ -2658,7 +2640,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_SME2,
.matches = has_cpuid_feature,
.cpu_enable = sme2_kernel_enable,
.cpu_enable = cpu_enable_sme2,
ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, SME2)
},
#endif /* CONFIG_ARM64_SME */
@ -2787,6 +2769,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, SHA2, SHA512, CAP_HWCAP, KERNEL_HWCAP_SHA512),
HWCAP_CAP(ID_AA64ISAR0_EL1, CRC32, IMP, CAP_HWCAP, KERNEL_HWCAP_CRC32),
HWCAP_CAP(ID_AA64ISAR0_EL1, ATOMIC, IMP, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
HWCAP_CAP(ID_AA64ISAR0_EL1, ATOMIC, FEAT_LSE128, CAP_HWCAP, KERNEL_HWCAP_LSE128),
HWCAP_CAP(ID_AA64ISAR0_EL1, RDM, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
HWCAP_CAP(ID_AA64ISAR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SHA3),
HWCAP_CAP(ID_AA64ISAR0_EL1, SM3, IMP, CAP_HWCAP, KERNEL_HWCAP_SM3),
@ -2807,6 +2790,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR1_EL1, FCMA, IMP, CAP_HWCAP, KERNEL_HWCAP_FCMA),
HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, IMP, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, LRCPC2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, LRCPC3, CAP_HWCAP, KERNEL_HWCAP_LRCPC3),
HWCAP_CAP(ID_AA64ISAR1_EL1, FRINTTS, IMP, CAP_HWCAP, KERNEL_HWCAP_FRINT),
HWCAP_CAP(ID_AA64ISAR1_EL1, SB, IMP, CAP_HWCAP, KERNEL_HWCAP_SB),
HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_BF16),
@ -2821,6 +2805,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
HWCAP_CAP(ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
HWCAP_CAP(ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
HWCAP_CAP(ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16),
HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
HWCAP_CAP(ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
@ -2981,7 +2966,7 @@ static void update_cpu_capabilities(u16 scope_mask)
!caps->matches(caps, cpucap_default_scope(caps)))
continue;
if (caps->desc)
if (caps->desc && !caps->cpus)
pr_info("detected: %s\n", caps->desc);
__set_bit(caps->capability, system_cpucaps);
@ -3153,36 +3138,28 @@ static void verify_local_elf_hwcaps(void)
static void verify_sve_features(void)
{
u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
u64 zcr = read_zcr_features();
unsigned long cpacr = cpacr_save_enable_kernel_sve();
unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;
unsigned int len = zcr & ZCR_ELx_LEN_MASK;
if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SVE)) {
if (vec_verify_vq_map(ARM64_VEC_SVE)) {
pr_crit("CPU%d: SVE: vector length support mismatch\n",
smp_processor_id());
cpu_die_early();
}
/* Add checks on other ZCR bits here if necessary */
cpacr_restore(cpacr);
}
static void verify_sme_features(void)
{
u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
u64 smcr = read_smcr_features();
unsigned long cpacr = cpacr_save_enable_kernel_sme();
unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK;
unsigned int len = smcr & SMCR_ELx_LEN_MASK;
if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) {
if (vec_verify_vq_map(ARM64_VEC_SME)) {
pr_crit("CPU%d: SME: vector length support mismatch\n",
smp_processor_id());
cpu_die_early();
}
/* Add checks on other SMCR bits here if necessary */
cpacr_restore(cpacr);
}
static void verify_hyp_capabilities(void)
@ -3289,7 +3266,6 @@ EXPORT_SYMBOL_GPL(this_cpu_has_cap);
* This helper function is used in a narrow window when,
* - The system wide safe registers are set with all the SMP CPUs and,
* - The SYSTEM_FEATURE system_cpucaps may not have been set.
* In all other cases cpus_have_{const_}cap() should be used.
*/
static bool __maybe_unused __system_matches_cap(unsigned int n)
{
@ -3328,23 +3304,50 @@ unsigned long cpu_get_elf_hwcap2(void)
return elf_hwcap[1];
}
static void __init setup_system_capabilities(void)
void __init setup_system_features(void)
{
int i;
/*
* We have finalised the system-wide safe feature
* registers, finalise the capabilities that depend
* on it. Also enable all the available capabilities,
* that are not enabled already.
* The system-wide safe feature feature register values have been
* finalized. Finalize and log the available system capabilities.
*/
update_cpu_capabilities(SCOPE_SYSTEM);
if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
!cpus_have_cap(ARM64_HAS_PAN))
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
/*
* Enable all the available capabilities which have not been enabled
* already.
*/
enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
kpti_install_ng_mappings();
sve_setup();
sme_setup();
/*
* Check for sane CTR_EL0.CWG value.
*/
if (!cache_type_cwg())
pr_warn("No Cache Writeback Granule information, assuming %d\n",
ARCH_DMA_MINALIGN);
for (i = 0; i < ARM64_NCAPS; i++) {
const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
if (caps && caps->cpus && caps->desc &&
cpumask_any(caps->cpus) < nr_cpu_ids)
pr_info("detected: %s on CPU%*pbl\n",
caps->desc, cpumask_pr_args(caps->cpus));
}
}
void __init setup_cpu_features(void)
void __init setup_user_features(void)
{
u32 cwg;
user_feature_fixup();
setup_system_capabilities();
setup_elf_hwcaps(arm64_elf_hwcaps);
if (system_supports_32bit_el0()) {
@ -3352,20 +3355,7 @@ void __init setup_cpu_features(void)
elf_hwcap_fixup();
}
if (system_uses_ttbr0_pan())
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
sve_setup();
sme_setup();
minsigstksz_setup();
/*
* Check for sane CTR_EL0.CWG value.
*/
cwg = cache_type_cwg();
if (!cwg)
pr_warn("No Cache Writeback Granule information, assuming %d\n",
ARCH_DMA_MINALIGN);
}
static int enable_mismatched_32bit_el0(unsigned int cpu)
@ -3422,7 +3412,7 @@ subsys_initcall_sync(init_32bit_el0_mask);
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{
cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
cpu_enable_swapper_cnp();
}
/*

View File

@ -127,6 +127,9 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_F16F16] = "smef16f16",
[KERNEL_HWCAP_MOPS] = "mops",
[KERNEL_HWCAP_HBC] = "hbc",
[KERNEL_HWCAP_SVE_B16B16] = "sveb16b16",
[KERNEL_HWCAP_LRCPC3] = "lrcpc3",
[KERNEL_HWCAP_LSE128] = "lse128",
};
#ifdef CONFIG_COMPAT

View File

@ -113,8 +113,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
if (md->attribute & EFI_MEMORY_XP)
pte = set_pte_bit(pte, __pgprot(PTE_PXN));
else if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) &&
system_supports_bti() && spd->has_bti)
else if (system_supports_bti_kernel() && spd->has_bti)
pte = set_pte_bit(pte, __pgprot(PTE_GP));
set_pte(ptep, pte);
return 0;

View File

@ -1160,44 +1160,20 @@ static void __init sve_efi_setup(void)
panic("Cannot allocate percpu memory for EFI SVE save/restore");
}
/*
* Enable SVE for EL1.
* Intended for use by the cpufeatures code during CPU boot.
*/
void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
isb();
}
/*
* Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
* vector length.
*
* Use only if SVE is present.
* This function clobbers the SVE vector length.
*/
u64 read_zcr_features(void)
{
/*
* Set the maximum possible VL, and write zeroes to all other
* bits to see if they stick.
*/
sve_kernel_enable(NULL);
write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
/* Return LEN value that would be written to get the maximum VL */
return sve_vq_from_vl(sve_get_vl()) - 1;
}
void __init sve_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SVE];
u64 zcr;
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
int max_bit;
if (!system_supports_sve())
if (!cpus_have_cap(ARM64_SVE))
return;
/*
@ -1208,17 +1184,8 @@ void __init sve_setup(void)
if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))
set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);
zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
/*
* Sanity-check that the max VL we determined through CPU features
* corresponds properly to sve_vq_map. If not, do our best:
*/
if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE,
info->max_vl)))
info->max_vl = find_supported_vector_length(ARM64_VEC_SVE,
info->max_vl);
max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
/*
* For the default VL, pick the maximum supported value <= 64.
@ -1296,7 +1263,7 @@ static void sme_free(struct task_struct *task)
task->thread.sme_state = NULL;
}
void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)
{
/* Set priority for all PEs to architecturally defined minimum */
write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
@ -1311,80 +1278,48 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
isb();
}
/*
* This must be called after sme_kernel_enable(), we rely on the
* feature table being sorted to ensure this.
*/
void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
void cpu_enable_sme2(const struct arm64_cpu_capabilities *__always_unused p)
{
/* This must be enabled after SME */
BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME);
/* Allow use of ZT0 */
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
SYS_SMCR_EL1);
}
/*
* This must be called after sme_kernel_enable(), we rely on the
* feature table being sorted to ensure this.
*/
void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
void cpu_enable_fa64(const struct arm64_cpu_capabilities *__always_unused p)
{
/* This must be enabled after SME */
BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME);
/* Allow use of FA64 */
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
SYS_SMCR_EL1);
}
/*
* Read the pseudo-SMCR used by cpufeatures to identify the supported
* vector length.
*
* Use only if SME is present.
* This function clobbers the SME vector length.
*/
u64 read_smcr_features(void)
{
sme_kernel_enable(NULL);
/*
* Set the maximum possible VL.
*/
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
SYS_SMCR_EL1);
/* Return LEN value that would be written to get the maximum VL */
return sve_vq_from_vl(sme_get_vl()) - 1;
}
void __init sme_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SME];
u64 smcr;
int min_bit;
int min_bit, max_bit;
if (!system_supports_sme())
if (!cpus_have_cap(ARM64_SME))
return;
/*
* SME doesn't require any particular vector length be
* supported but it does require at least one. We should have
* disabled the feature entirely while bringing up CPUs but
* let's double check here.
* let's double check here. The bitmap is SVE_VQ_MAP sized for
* sharing with SVE.
*/
WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
/*
* Sanity-check that the max VL we determined through CPU features
* corresponds properly to sme_vq_map. If not, do our best:
*/
if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
info->max_vl)))
info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
info->max_vl);
max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
WARN_ON(info->min_vl > info->max_vl);
@ -1529,8 +1464,17 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
*/
void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
{
/* TODO: implement lazy context saving/restoring */
WARN_ON(1);
/* Even if we chose not to use FPSIMD, the hardware could still trap: */
if (!system_supports_fpsimd()) {
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
/*
* When FPSIMD is enabled, we should never take a trap unless something
* has gone very wrong.
*/
BUG();
}
/*
@ -1771,13 +1715,23 @@ void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
void fpsimd_restore_current_state(void)
{
/*
* For the tasks that were created before we detected the absence of
* FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(),
* e.g, init. This could be then inherited by the children processes.
* If we later detect that the system doesn't support FP/SIMD,
* we must clear the flag for all the tasks to indicate that the
* FPSTATE is clean (as we can't have one) to avoid looping for ever in
* do_notify_resume().
* TIF_FOREIGN_FPSTATE is set on the init task and copied by
* arch_dup_task_struct() regardless of whether FP/SIMD is detected.
* Thus user threads can have this set even when FP/SIMD hasn't been
* detected.
*
* When FP/SIMD is detected, begin_new_exec() will set
* TIF_FOREIGN_FPSTATE via flush_thread() -> fpsimd_flush_thread(),
* and fpsimd_thread_switch() will set TIF_FOREIGN_FPSTATE when
* switching tasks. We detect FP/SIMD before we exec the first user
* process, ensuring this has TIF_FOREIGN_FPSTATE set and
* do_notify_resume() will call fpsimd_restore_current_state() to
* install the user FP/SIMD context.
*
* When FP/SIMD is not detected, nothing else will clear or set
* TIF_FOREIGN_FPSTATE prior to the first return to userspace, and
* we must clear TIF_FOREIGN_FPSTATE to avoid do_notify_resume()
* looping forever calling fpsimd_restore_current_state().
*/
if (!system_supports_fpsimd()) {
clear_thread_flag(TIF_FOREIGN_FPSTATE);
@ -2110,6 +2064,13 @@ static inline void fpsimd_hotplug_init(void)
static inline void fpsimd_hotplug_init(void) { }
#endif
void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p)
{
unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN;
write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1);
isb();
}
/*
* FP/SIMD support code initialisation.
*/

View File

@ -20,7 +20,7 @@
* ensure that interrupts are not masked at the PMR (because the core will
* not wake up if we block the wake up signal in the interrupt controller).
*/
void noinstr cpu_do_idle(void)
void __cpuidle cpu_do_idle(void)
{
struct arm_cpuidle_irq_context context;
@ -35,7 +35,7 @@ void noinstr cpu_do_idle(void)
/*
* This is our default idle handler.
*/
void noinstr arch_cpu_idle(void)
void __cpuidle arch_cpu_idle(void)
{
/*
* This should do all the clock switching and wait for interrupt

View File

@ -167,9 +167,6 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
switch (ELF64_R_TYPE(rela[i].r_info)) {
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
break;
/*
* We only have to consider branch targets that resolve
* to symbols that are defined in a different section.
@ -203,8 +200,7 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
case R_AARCH64_ADR_PREL_PG_HI21:
if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) ||
!cpus_have_const_cap(ARM64_WORKAROUND_843419))
if (!cpus_have_final_cap(ARM64_WORKAROUND_843419))
break;
/*
@ -239,13 +235,13 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
}
}
if (IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
cpus_have_const_cap(ARM64_WORKAROUND_843419))
if (cpus_have_final_cap(ARM64_WORKAROUND_843419)) {
/*
* Add some slack so we can skip PLT slots that may trigger
* the erratum due to the placement of the ADRP instruction.
*/
ret += DIV_ROUND_UP(ret, (SZ_4K / sizeof(struct plt_entry)));
}
return ret;
}
@ -269,9 +265,6 @@ static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela,
{
int i = 0, j = numrels - 1;
if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
return 0;
while (i < j) {
if (branch_rela_needs_plt(syms, &rela[i], dstidx))
i++;

View File

@ -35,10 +35,10 @@ DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
#endif
void mte_sync_tags(pte_t pte)
void mte_sync_tags(pte_t pte, unsigned int nr_pages)
{
struct page *page = pte_page(pte);
long i, nr_pages = compound_nr(page);
unsigned int i;
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {

View File

@ -454,7 +454,7 @@ static void ssbs_thread_switch(struct task_struct *next)
* If all CPUs implement the SSBS extension, then we just need to
* context-switch the PSTATE field.
*/
if (cpus_have_const_cap(ARM64_SSBS))
if (alternative_has_cap_unlikely(ARM64_SSBS))
return;
spectre_v4_enable_task_mitigation(next);

View File

@ -972,7 +972,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
* When KPTI is in use, the vectors are switched when exiting to
* user-space.
*/
if (arm64_kernel_unmapped_at_el0())
if (cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
return;
write_sysreg(v, vbar_el1);

View File

@ -32,7 +32,9 @@
#include <linux/irq_work.h>
#include <linux/kernel_stat.h>
#include <linux/kexec.h>
#include <linux/kgdb.h>
#include <linux/kvm_host.h>
#include <linux/nmi.h>
#include <asm/alternative.h>
#include <asm/atomic.h>
@ -72,13 +74,19 @@ enum ipi_msg_type {
IPI_CPU_CRASH_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
IPI_WAKEUP,
NR_IPI
NR_IPI,
/*
* Any enum >= NR_IPI and < MAX_IPI is special and not tracable
* with trace_ipi_*
*/
IPI_CPU_BACKTRACE = NR_IPI,
IPI_KGDB_ROUNDUP,
MAX_IPI
};
static int ipi_irq_base __read_mostly;
static int nr_ipi __read_mostly = NR_IPI;
static struct irq_desc *ipi_desc[NR_IPI] __read_mostly;
static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
static void ipi_setup(int cpu);
@ -431,9 +439,10 @@ static void __init hyp_mode_check(void)
void __init smp_cpus_done(unsigned int max_cpus)
{
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
setup_cpu_features();
setup_system_features();
hyp_mode_check();
apply_alternatives_all();
setup_user_features();
mark_linear_text_alias_ro();
}
@ -520,7 +529,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
{
u64 hwid = processor->arm_mpidr;
if (!(processor->flags & ACPI_MADT_ENABLED)) {
if (!acpi_gicc_is_usable(processor)) {
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
return;
}
@ -764,7 +773,6 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
[IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
[IPI_TIMER] = "Timer broadcast interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
[IPI_WAKEUP] = "CPU wake-up interrupts",
};
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
@ -797,13 +805,6 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
}
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
{
smp_cross_call(mask, IPI_WAKEUP);
}
#endif
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
@ -854,6 +855,38 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs
#endif
}
static void arm64_backtrace_ipi(cpumask_t *mask)
{
__ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
/*
* NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name,
* nothing about it truly needs to be implemented using an NMI, it's
* just that it's _allowed_ to work with NMIs. If ipi_should_be_nmi()
* returned false our backtrace attempt will just use a regular IPI.
*/
nmi_trigger_cpumask_backtrace(mask, exclude_cpu, arm64_backtrace_ipi);
}
#ifdef CONFIG_KGDB
void kgdb_roundup_cpus(void)
{
int this_cpu = raw_smp_processor_id();
int cpu;
for_each_online_cpu(cpu) {
/* No need to roundup ourselves */
if (cpu == this_cpu)
continue;
__ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu);
}
}
#endif
/*
* Main handler for inter-processor interrupts
*/
@ -897,13 +930,17 @@ static void do_handle_IPI(int ipinr)
break;
#endif
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
case IPI_WAKEUP:
WARN_ONCE(!acpi_parking_protocol_valid(cpu),
"CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
cpu);
case IPI_CPU_BACKTRACE:
/*
* NOTE: in some cases this _won't_ be NMI context. See the
* comment in arch_trigger_cpumask_backtrace().
*/
nmi_cpu_backtrace(get_irq_regs());
break;
case IPI_KGDB_ROUNDUP:
kgdb_nmicallback(cpu, get_irq_regs());
break;
#endif
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
@ -926,6 +963,25 @@ static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
__ipi_send_mask(ipi_desc[ipinr], target);
}
static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
{
DECLARE_STATIC_KEY_FALSE(supports_pseudo_nmis);
if (!system_uses_irq_prio_masking() ||
!static_branch_likely(&supports_pseudo_nmis))
return false;
switch (ipi) {
case IPI_CPU_STOP:
case IPI_CPU_CRASH_STOP:
case IPI_CPU_BACKTRACE:
case IPI_KGDB_ROUNDUP:
return true;
default:
return false;
}
}
static void ipi_setup(int cpu)
{
int i;
@ -933,8 +989,14 @@ static void ipi_setup(int cpu)
if (WARN_ON_ONCE(!ipi_irq_base))
return;
for (i = 0; i < nr_ipi; i++)
enable_percpu_irq(ipi_irq_base + i, 0);
for (i = 0; i < nr_ipi; i++) {
if (ipi_should_be_nmi(i)) {
prepare_percpu_nmi(ipi_irq_base + i);
enable_percpu_nmi(ipi_irq_base + i, 0);
} else {
enable_percpu_irq(ipi_irq_base + i, 0);
}
}
}
#ifdef CONFIG_HOTPLUG_CPU
@ -945,8 +1007,14 @@ static void ipi_teardown(int cpu)
if (WARN_ON_ONCE(!ipi_irq_base))
return;
for (i = 0; i < nr_ipi; i++)
disable_percpu_irq(ipi_irq_base + i);
for (i = 0; i < nr_ipi; i++) {
if (ipi_should_be_nmi(i)) {
disable_percpu_nmi(ipi_irq_base + i);
teardown_percpu_nmi(ipi_irq_base + i);
} else {
disable_percpu_irq(ipi_irq_base + i);
}
}
}
#endif
@ -954,15 +1022,23 @@ void __init set_smp_ipi_range(int ipi_base, int n)
{
int i;
WARN_ON(n < NR_IPI);
nr_ipi = min(n, NR_IPI);
WARN_ON(n < MAX_IPI);
nr_ipi = min(n, MAX_IPI);
for (i = 0; i < nr_ipi; i++) {
int err;
err = request_percpu_irq(ipi_base + i, ipi_handler,
"IPI", &cpu_number);
WARN_ON(err);
if (ipi_should_be_nmi(i)) {
err = request_percpu_nmi(ipi_base + i, ipi_handler,
"IPI", &cpu_number);
WARN(err, "Could not request IPI %d as NMI, err=%d\n",
i, err);
} else {
err = request_percpu_irq(ipi_base + i, ipi_handler,
"IPI", &cpu_number);
WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
i, err);
}
ipi_desc[i] = irq_to_desc(ipi_base + i);
irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
@ -979,6 +1055,17 @@ void arch_smp_send_reschedule(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
}
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
void arch_send_wakeup_ipi(unsigned int cpu)
{
/*
* We use a scheduler IPI to wake the CPU as this avoids the need for a
* dedicated IPI and we can safely handle spurious scheduler IPIs.
*/
smp_send_reschedule(cpu);
}
#endif
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{

View File

@ -55,13 +55,13 @@ void notrace __cpu_suspend_exit(void)
/* Restore CnP bit in TTBR1_EL1 */
if (system_supports_cnp())
cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
cpu_enable_swapper_cnp();
/*
* PSTATE was not saved over suspend/resume, re-enable any detected
* features that might not have been set correctly.
*/
if (cpus_have_const_cap(ARM64_HAS_DIT))
if (alternative_has_cap_unlikely(ARM64_HAS_DIT))
set_pstate_dit(1);
__uaccess_enable_hw_pan();
@ -98,6 +98,15 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
struct sleep_stack_data state;
struct arm_cpuidle_irq_context context;
/*
* Some portions of CPU state (e.g. PSTATE.{PAN,DIT}) are initialized
* before alternatives are patched, but are only restored by
* __cpu_suspend_exit() after alternatives are patched. To avoid
* accidentally losing these bits we must not attempt to suspend until
* after alternatives have been patched.
*/
WARN_ON(!system_capabilities_finalized());
/* Report any MTE async fault before going to suspend */
mte_suspend_enter();

View File

@ -31,7 +31,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
if (fatal_signal_pending(current))
return 0;
if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
if (cpus_have_final_cap(ARM64_WORKAROUND_1542419)) {
/*
* The workaround requires an inner-shareable tlbi.
* We pick the reserved-ASID to minimise the impact.

View File

@ -631,7 +631,7 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
int rt = ESR_ELx_SYS64_ISS_RT(esr);
unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
if (cpus_have_final_cap(ARM64_WORKAROUND_1542419)) {
/* Hide DIC so that we can trap the unnecessary maintenance...*/
val &= ~BIT(CTR_EL0_DIC_SHIFT);

View File

@ -212,7 +212,7 @@ static int __setup_additional_pages(enum vdso_abi abi,
if (IS_ERR(ret))
goto up_fail;
if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
if (system_supports_bti_kernel())
gp_flags = VM_ARM64_BTI;
vdso_base += VVAR_NR_PAGES * PAGE_SIZE;

View File

@ -284,7 +284,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = kvm_arm_pvtime_supported();
break;
case KVM_CAP_ARM_EL1_32BIT:
r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1);
r = cpus_have_final_cap(ARM64_HAS_32BIT_EL1);
break;
case KVM_CAP_GUEST_DEBUG_HW_BPS:
r = get_num_brps();
@ -296,7 +296,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = kvm_arm_support_pmu_v3();
break;
case KVM_CAP_ARM_INJECT_SERROR_ESR:
r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
r = cpus_have_final_cap(ARM64_HAS_RAS_EXTN);
break;
case KVM_CAP_ARM_VM_IPA_SIZE:
r = get_kvm_ipa_limit();
@ -1207,7 +1207,7 @@ static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu,
if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features))
return 0;
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1))
if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1))
return -EINVAL;
/* MTE is incompatible with AArch32 */
@ -1777,7 +1777,7 @@ static void hyp_install_host_vector(void)
* Call initialization code, and switch to the full blown HYP code.
* If the cpucaps haven't been finalized yet, something has gone very
* wrong, and hyp will crash and burn when it uses any
* cpus_have_const_cap() wrapper.
* cpus_have_*_cap() wrapper.
*/
BUG_ON(!system_capabilities_finalized());
params = this_cpu_ptr_nvhe_sym(kvm_init_params);
@ -2310,7 +2310,7 @@ static int __init init_hyp_mode(void)
if (is_protected_kvm_enabled()) {
if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) &&
cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH))
cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH))
pkvm_hyp_init_ptrauth();
init_cpu_logical_map();

View File

@ -815,7 +815,7 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
events->exception.serror_has_esr = cpus_have_final_cap(ARM64_HAS_RAS_EXTN);
if (events->exception.serror_pending && events->exception.serror_has_esr)
events->exception.serror_esr = vcpu_get_vsesr(vcpu);
@ -837,7 +837,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
bool ext_dabt_pending = events->exception.ext_dabt_pending;
if (serror_pending && has_esr) {
if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
return -EINVAL;
if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
@ -874,7 +874,7 @@ u32 __attribute_const__ kvm_target_cpu(void)
break;
case ARM_CPU_IMP_APM:
switch (part_number) {
case APM_CPU_PART_POTENZA:
case APM_CPU_PART_XGENE:
return KVM_ARM_TARGET_XGENE_POTENZA;
}
break;

View File

@ -401,7 +401,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
if (device)
return -EINVAL;
if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
if (system_supports_bti_kernel())
attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP;
} else {
attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
@ -664,7 +664,7 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
static bool stage2_has_fwb(struct kvm_pgtable *pgt)
{
if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
return false;
return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);

View File

@ -1578,7 +1578,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (device)
prot |= KVM_PGTABLE_PROT_DEVICE;
else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
prot |= KVM_PGTABLE_PROT_X;
/*

View File

@ -207,7 +207,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
* CPU left in the system, and certainly not from non-secure
* software).
*/
if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
kvm_set_way_flush(vcpu);
return true;

View File

@ -684,7 +684,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (kvm_vgic_global_state.vcpu_base == 0)
kvm_info("disabling GICv2 emulation\n");
if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
group0_trap = true;
group1_trap = true;
}

View File

@ -27,7 +27,7 @@ void __delay(unsigned long cycles)
{
cycles_t start = get_cycles();
if (cpus_have_const_cap(ARM64_HAS_WFXT)) {
if (alternative_has_cap_unlikely(ARM64_HAS_WFXT)) {
u64 end = start + cycles;
/*

View File

@ -571,7 +571,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
/* Write implies read */
vm_flags |= VM_WRITE;
/* If EPAN is absent then exec implies read */
if (!cpus_have_const_cap(ARM64_HAS_EPAN))
if (!alternative_has_cap_unlikely(ARM64_HAS_EPAN))
vm_flags |= VM_EXEC;
}

View File

@ -544,8 +544,7 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) &&
cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) {
/*
* Break-before-make (BBM) is required for all user space mappings
* when the permission changes from executable to non-executable

View File

@ -16,6 +16,7 @@
#include <linux/nodemask.h>
#include <linux/initrd.h>
#include <linux/gfp.h>
#include <linux/math.h>
#include <linux/memblock.h>
#include <linux/sort.h>
#include <linux/of.h>
@ -493,8 +494,16 @@ void __init mem_init(void)
{
bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit);
if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC))
if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb) {
/*
* If no bouncing needed for ZONE_DMA, reduce the swiotlb
* buffer for kmalloc() bouncing to 1MB per 1GB of RAM.
*/
unsigned long size =
DIV_ROUND_UP(memblock_phys_mem_size(), 1024);
swiotlb_adjust_size(min(swiotlb_size_or_default(), size));
swiotlb = true;
}
swiotlb_init(swiotlb, SWIOTLB_VERBOSE);

View File

@ -68,7 +68,7 @@ static int __init adjust_protection_map(void)
* With Enhanced PAN we can honour the execute-only permissions as
* there is no PAN override with such mappings.
*/
if (cpus_have_const_cap(ARM64_HAS_EPAN)) {
if (cpus_have_cap(ARM64_HAS_EPAN)) {
protection_map[VM_EXEC] = PAGE_EXECONLY;
protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
}

View File

@ -1469,8 +1469,7 @@ early_initcall(prevent_bootmem_remove_init);
pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) &&
cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) {
/*
* Break-before-make (BBM) is required for all user space mappings
* when the permission changes from executable to non-executable

View File

@ -405,8 +405,7 @@ SYM_FUNC_START(__cpu_setup)
tlbi vmalle1 // Invalidate local TLB
dsb nsh
mov x1, #3 << 20
msr cpacr_el1, x1 // Enable FP/ASIMD
msr cpacr_el1, xzr // Reset cpacr_el1
mov x1, #1 << 12 // Reset mdscr_el1 and disable
msr mdscr_el1, x1 // access to the DCC from EL0
isb // Unmask debug exceptions now,

View File

@ -3,7 +3,7 @@
gen := arch/$(ARCH)/include/generated
kapi := $(gen)/asm
kapi-hdrs-y := $(kapi)/cpucaps.h $(kapi)/sysreg-defs.h
kapi-hdrs-y := $(kapi)/cpucap-defs.h $(kapi)/sysreg-defs.h
targets += $(addprefix ../../../, $(kapi-hdrs-y))
@ -17,7 +17,7 @@ quiet_cmd_gen_cpucaps = GEN $@
quiet_cmd_gen_sysreg = GEN $@
cmd_gen_sysreg = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@
$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
$(kapi)/cpucap-defs.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
$(call if_changed,gen_cpucaps)
$(kapi)/sysreg-defs.h: $(src)/gen-sysreg.awk $(src)/sysreg FORCE

View File

@ -27,6 +27,7 @@ HAS_ECV_CNTPOFF
HAS_EPAN
HAS_EVT
HAS_FGT
HAS_FPSIMD
HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
HAS_GENERIC_AUTH_ARCH_QARMA5
@ -39,7 +40,6 @@ HAS_LDAPR
HAS_LSE_ATOMICS
HAS_MOPS
HAS_NESTED_VIRT
HAS_NO_FPSIMD
HAS_NO_HW_PREFETCH
HAS_PAN
HAS_S1PIE

View File

@ -15,8 +15,8 @@ function fatal(msg) {
/^#/ { next }
BEGIN {
print "#ifndef __ASM_CPUCAPS_H"
print "#define __ASM_CPUCAPS_H"
print "#ifndef __ASM_CPUCAP_DEFS_H"
print "#define __ASM_CPUCAP_DEFS_H"
print ""
print "/* Generated file - do not edit */"
cap_num = 0
@ -31,7 +31,7 @@ BEGIN {
END {
printf("#define ARM64_NCAPS\t\t\t\t\t%d\n", cap_num)
print ""
print "#endif /* __ASM_CPUCAPS_H */"
print "#endif /* __ASM_CPUCAP_DEFS_H */"
}
# Any lines not handled by previous rules are unexpected

View File

@ -1026,7 +1026,11 @@ UnsignedEnum 35:32 SHA3
0b0000 NI
0b0001 IMP
EndEnum
Res0 31:24
Res0 31:28
UnsignedEnum 27:24 B16B16
0b0000 NI
0b0001 IMP
EndEnum
UnsignedEnum 23:20 BF16
0b0000 NI
0b0001 IMP
@ -1235,6 +1239,7 @@ EndEnum
UnsignedEnum 23:20 ATOMIC
0b0000 NI
0b0010 IMP
0b0011 FEAT_LSE128
EndEnum
UnsignedEnum 19:16 CRC32
0b0000 NI
@ -1305,6 +1310,7 @@ UnsignedEnum 23:20 LRCPC
0b0000 NI
0b0001 IMP
0b0010 LRCPC2
0b0011 LRCPC3
EndEnum
UnsignedEnum 19:16 FCMA
0b0000 NI

View File

@ -90,7 +90,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
struct acpi_madt_generic_interrupt *gicc =
container_of(entry, struct acpi_madt_generic_interrupt, header);
if (!(gicc->flags & ACPI_MADT_ENABLED))
if (!acpi_gicc_is_usable(gicc))
return -ENODEV;
/* device_declaration means Device object in DSDT, in the

View File

@ -836,8 +836,9 @@ static u64 __arch_timer_check_delta(void)
* Note that TVAL is signed, thus has only 31 of its
* 32 bits to express magnitude.
*/
MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
APM_CPU_PART_POTENZA)),
MIDR_REV_RANGE(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
APM_CPU_PART_XGENE),
APM_CPU_VAR_POTENZA, 0x0, 0xf),
{},
};
@ -917,7 +918,7 @@ static void arch_timer_evtstrm_enable(unsigned int divider)
#ifdef CONFIG_ARM64
/* ECV is likely to require a large divider. Use the EVNTIS flag. */
if (cpus_have_const_cap(ARM64_HAS_ECV) && divider > 15) {
if (cpus_have_final_cap(ARM64_HAS_ECV) && divider > 15) {
cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE;
divider -= 8;
}
@ -955,6 +956,30 @@ static void arch_timer_configure_evtstream(void)
arch_timer_evtstrm_enable(max(0, lsb));
}
static int arch_timer_evtstrm_starting_cpu(unsigned int cpu)
{
arch_timer_configure_evtstream();
return 0;
}
static int arch_timer_evtstrm_dying_cpu(unsigned int cpu)
{
cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
return 0;
}
static int __init arch_timer_evtstrm_register(void)
{
if (!arch_timer_evt || !evtstrm_enable)
return 0;
return cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_EVTSTRM_STARTING,
"clockevents/arm/arch_timer_evtstrm:starting",
arch_timer_evtstrm_starting_cpu,
arch_timer_evtstrm_dying_cpu);
}
core_initcall(arch_timer_evtstrm_register);
static void arch_counter_set_user_access(void)
{
u32 cntkctl = arch_timer_get_cntkctl();
@ -1016,8 +1041,6 @@ static int arch_timer_starting_cpu(unsigned int cpu)
}
arch_counter_set_user_access();
if (evtstrm_enable)
arch_timer_configure_evtstream();
return 0;
}
@ -1164,8 +1187,6 @@ static int arch_timer_dying_cpu(unsigned int cpu)
{
struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
arch_timer_stop(clk);
return 0;
}
@ -1279,6 +1300,7 @@ static int __init arch_timer_register(void)
out_free:
free_percpu(arch_timer_evt);
arch_timer_evt = NULL;
out:
return err;
}

View File

@ -78,6 +78,13 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
#define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
#define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer)
/*
* There are 16 SGIs, though we only actually use 8 in Linux. The other 8 SGIs
* are potentially stolen by the secure side. Some code, especially code dealing
* with hwirq IDs, is simplified by accounting for all 16.
*/
#define SGI_NR 16
/*
* The behaviours of RPR and PMR registers differ depending on the value of
* SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the
@ -99,7 +106,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
* - Figure 4-7 Secure read of the priority field for a Non-secure Group 1
* interrupt.
*/
static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities);
EXPORT_SYMBOL(gic_nonsecure_priorities);
@ -125,8 +132,8 @@ EXPORT_SYMBOL(gic_nonsecure_priorities);
__priority; \
})
/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
static refcount_t *ppi_nmi_refs;
/* rdist_nmi_refs[n] == number of cpus having the rdist interrupt n set as NMI */
static refcount_t *rdist_nmi_refs;
static struct gic_kvm_info gic_v3_kvm_info __initdata;
static DEFINE_PER_CPU(bool, has_rss);
@ -270,17 +277,6 @@ static void gic_redist_wait_for_rwp(void)
gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP);
}
#ifdef CONFIG_ARM64
static u64 __maybe_unused gic_read_iar(void)
{
if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154))
return gic_read_iar_cavium_thunderx();
else
return gic_read_iar_common();
}
#endif
static void gic_enable_redist(bool enable)
{
void __iomem *rbase;
@ -519,9 +515,22 @@ static u32 __gic_get_ppi_index(irq_hw_number_t hwirq)
}
}
static u32 gic_get_ppi_index(struct irq_data *d)
static u32 __gic_get_rdist_index(irq_hw_number_t hwirq)
{
return __gic_get_ppi_index(d->hwirq);
switch (__get_intid_range(hwirq)) {
case SGI_RANGE:
case PPI_RANGE:
return hwirq;
case EPPI_RANGE:
return hwirq - EPPI_BASE_INTID + 32;
default:
unreachable();
}
}
static u32 gic_get_rdist_index(struct irq_data *d)
{
return __gic_get_rdist_index(d->hwirq);
}
static int gic_irq_nmi_setup(struct irq_data *d)
@ -545,11 +554,14 @@ static int gic_irq_nmi_setup(struct irq_data *d)
/* desc lock should already be held */
if (gic_irq_in_rdist(d)) {
u32 idx = gic_get_ppi_index(d);
u32 idx = gic_get_rdist_index(d);
/* Setting up PPI as NMI, only switch handler for first NMI */
if (!refcount_inc_not_zero(&ppi_nmi_refs[idx])) {
refcount_set(&ppi_nmi_refs[idx], 1);
/*
* Setting up a percpu interrupt as NMI, only switch handler
* for first NMI
*/
if (!refcount_inc_not_zero(&rdist_nmi_refs[idx])) {
refcount_set(&rdist_nmi_refs[idx], 1);
desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
}
} else {
@ -582,10 +594,10 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
/* desc lock should already be held */
if (gic_irq_in_rdist(d)) {
u32 idx = gic_get_ppi_index(d);
u32 idx = gic_get_rdist_index(d);
/* Tearing down NMI, only switch handler for last NMI */
if (refcount_dec_and_test(&ppi_nmi_refs[idx]))
if (refcount_dec_and_test(&rdist_nmi_refs[idx]))
desc->handle_irq = handle_percpu_devid_irq;
} else {
desc->handle_irq = handle_fasteoi_irq;
@ -1279,10 +1291,10 @@ static void gic_cpu_init(void)
rbase = gic_data_rdist_sgi_base();
/* Configure SGIs/PPIs as non-secure Group-1 */
for (i = 0; i < gic_data.ppi_nr + 16; i += 32)
for (i = 0; i < gic_data.ppi_nr + SGI_NR; i += 32)
writel_relaxed(~0, rbase + GICR_IGROUPR0 + i / 8);
gic_cpu_config(rbase, gic_data.ppi_nr + 16, gic_redist_wait_for_rwp);
gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR, gic_redist_wait_for_rwp);
/* initialise system registers */
gic_cpu_sys_reg_init();
@ -1952,12 +1964,13 @@ static void gic_enable_nmi_support(void)
return;
}
ppi_nmi_refs = kcalloc(gic_data.ppi_nr, sizeof(*ppi_nmi_refs), GFP_KERNEL);
if (!ppi_nmi_refs)
rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR,
sizeof(*rdist_nmi_refs), GFP_KERNEL);
if (!rdist_nmi_refs)
return;
for (i = 0; i < gic_data.ppi_nr; i++)
refcount_set(&ppi_nmi_refs[i], 0);
for (i = 0; i < gic_data.ppi_nr + SGI_NR; i++)
refcount_set(&rdist_nmi_refs[i], 0);
pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n",
gic_has_relaxed_pmr_sync() ? "relaxed" : "forced");
@ -2074,6 +2087,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
gic_dist_init();
gic_cpu_init();
gic_enable_nmi_support();
gic_smp_init();
gic_cpu_pm_init();
@ -2086,8 +2100,6 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
gicv2m_init(handle, gic_data.domain);
}
gic_enable_nmi_support();
return 0;
out_free:
@ -2380,8 +2392,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header,
u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
void __iomem *redist_base;
/* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */
if (!(gicc->flags & ACPI_MADT_ENABLED))
if (!acpi_gicc_is_usable(gicc))
return 0;
redist_base = ioremap(gicc->gicr_base_address, size);
@ -2431,7 +2442,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
* If GICC is enabled and has valid gicr base address, then it means
* GICR base is presented via GICC
*/
if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) {
if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) {
acpi_data.enabled_rdists++;
return 0;
}
@ -2440,7 +2451,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
* It's perfectly valid firmware can pass disabled GICC entry, driver
* should not treat as errors, skip the entry instead of probe fail.
*/
if (!(gicc->flags & ACPI_MADT_ENABLED))
if (!acpi_gicc_is_usable(gicc))
return 0;
return -ENODEV;
@ -2499,8 +2510,7 @@ static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *hea
int maint_irq_mode;
static int first_madt = true;
/* Skip unusable CPUs */
if (!(gicc->flags & ACPI_MADT_ENABLED))
if (!acpi_gicc_is_usable(gicc))
return 0;
maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ?

View File

@ -377,6 +377,7 @@ static const struct of_device_id meson_ddr_pmu_dt_match[] = {
},
{}
};
MODULE_DEVICE_TABLE(of, meson_ddr_pmu_dt_match);
static struct platform_driver g12_ddr_pmu_driver = {
.probe = g12_ddr_pmu_probe,

View File

@ -112,7 +112,9 @@
#define CMN_DTM_PMEVCNTSR 0x240
#define CMN_DTM_UNIT_INFO 0x0910
#define CMN650_DTM_UNIT_INFO 0x0910
#define CMN_DTM_UNIT_INFO 0x0960
#define CMN_DTM_UNIT_INFO_DTC_DOMAIN GENMASK_ULL(1, 0)
#define CMN_DTM_NUM_COUNTERS 4
/* Want more local counters? Why not replicate the whole DTM! Ugh... */
@ -279,16 +281,13 @@ struct arm_cmn_node {
u16 id, logid;
enum cmn_node_type type;
int dtm;
union {
/* DN/HN-F/CXHA */
struct {
u8 val : 4;
u8 count : 4;
} occupid[SEL_MAX];
/* XP */
u8 dtc;
};
u8 dtm;
s8 dtc;
/* DN/HN-F/CXHA */
struct {
u8 val : 4;
u8 count : 4;
} occupid[SEL_MAX];
union {
u8 event[4];
__le32 event_sel;
@ -538,12 +537,12 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
seq_puts(s, "\n |");
for (x = 0; x < cmn->mesh_x; x++) {
u8 dtc = cmn->xps[xp_base + x].dtc;
s8 dtc = cmn->xps[xp_base + x].dtc;
if (dtc & (dtc - 1))
if (dtc < 0)
seq_puts(s, " DTC ?? |");
else
seq_printf(s, " DTC %ld |", __ffs(dtc));
seq_printf(s, " DTC %d |", dtc);
}
seq_puts(s, "\n |");
for (x = 0; x < cmn->mesh_x; x++)
@ -587,8 +586,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
struct arm_cmn_hw_event {
struct arm_cmn_node *dn;
u64 dtm_idx[4];
unsigned int dtc_idx;
u8 dtcs_used;
s8 dtc_idx[CMN_MAX_DTCS];
u8 num_dns;
u8 dtm_offset;
bool wide_sel;
@ -598,6 +596,10 @@ struct arm_cmn_hw_event {
#define for_each_hw_dn(hw, dn, i) \
for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
/* @i is the DTC number, @idx is the counter index on that DTC */
#define for_each_hw_dtc_idx(hw, i, idx) \
for (int i = 0, idx; i < CMN_MAX_DTCS; i++) if ((idx = hw->dtc_idx[i]) >= 0)
static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
{
BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
@ -1427,12 +1429,11 @@ static void arm_cmn_init_counter(struct perf_event *event)
{
struct arm_cmn *cmn = to_cmn(event->pmu);
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx);
u64 count;
for (i = 0; hw->dtcs_used & (1U << i); i++) {
writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt);
cmn->dtc[i].counters[hw->dtc_idx] = event;
for_each_hw_dtc_idx(hw, i, idx) {
writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx));
cmn->dtc[i].counters[idx] = event;
}
count = arm_cmn_read_dtm(cmn, hw, false);
@ -1445,11 +1446,9 @@ static void arm_cmn_event_read(struct perf_event *event)
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
u64 delta, new, prev;
unsigned long flags;
unsigned int i;
if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) {
i = __ffs(hw->dtcs_used);
delta = arm_cmn_read_cc(cmn->dtc + i);
if (CMN_EVENT_TYPE(event) == CMN_TYPE_DTC) {
delta = arm_cmn_read_cc(cmn->dtc + hw->dtc_idx[0]);
local64_add(delta, &event->count);
return;
}
@ -1459,8 +1458,8 @@ static void arm_cmn_event_read(struct perf_event *event)
delta = new - prev;
local_irq_save(flags);
for (i = 0; hw->dtcs_used & (1U << i); i++) {
new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx);
for_each_hw_dtc_idx(hw, i, idx) {
new = arm_cmn_read_counter(cmn->dtc + i, idx);
delta += new << 16;
}
local_irq_restore(flags);
@ -1516,7 +1515,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
int i;
if (type == CMN_TYPE_DTC) {
i = __ffs(hw->dtcs_used);
i = hw->dtc_idx[0];
writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
cmn->dtc[i].cc_active = true;
} else if (type == CMN_TYPE_WP) {
@ -1547,7 +1546,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
int i;
if (type == CMN_TYPE_DTC) {
i = __ffs(hw->dtcs_used);
i = hw->dtc_idx[0];
cmn->dtc[i].cc_active = false;
} else if (type == CMN_TYPE_WP) {
int wp_idx = arm_cmn_wp_idx(event);
@ -1571,7 +1570,7 @@ struct arm_cmn_val {
u8 dtm_count[CMN_MAX_DTMS];
u8 occupid[CMN_MAX_DTMS][SEL_MAX];
u8 wp[CMN_MAX_DTMS][4];
int dtc_count;
int dtc_count[CMN_MAX_DTCS];
bool cycles;
};
@ -1592,7 +1591,8 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
return;
}
val->dtc_count++;
for_each_hw_dtc_idx(hw, dtc, idx)
val->dtc_count[dtc]++;
for_each_hw_dn(hw, dn, i) {
int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
@ -1639,8 +1639,9 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
goto done;
}
if (val->dtc_count == CMN_DT_NUM_COUNTERS)
goto done;
for (i = 0; i < CMN_MAX_DTCS; i++)
if (val->dtc_count[i] == CMN_DT_NUM_COUNTERS)
goto done;
for_each_hw_dn(hw, dn, i) {
int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
@ -1733,12 +1734,19 @@ static int arm_cmn_event_init(struct perf_event *event)
hw->dn = arm_cmn_node(cmn, type);
if (!hw->dn)
return -EINVAL;
memset(hw->dtc_idx, -1, sizeof(hw->dtc_idx));
for (dn = hw->dn; dn->type == type; dn++) {
if (bynodeid && dn->id != nodeid) {
hw->dn++;
continue;
}
hw->num_dns++;
if (dn->dtc < 0)
memset(hw->dtc_idx, 0, cmn->num_dtcs);
else
hw->dtc_idx[dn->dtc] = 0;
if (bynodeid)
break;
}
@ -1750,12 +1758,6 @@ static int arm_cmn_event_init(struct perf_event *event)
nodeid, nid.x, nid.y, nid.port, nid.dev, type);
return -EINVAL;
}
/*
* Keep assuming non-cycles events count in all DTC domains; turns out
* it's hard to make a worthwhile optimisation around this, short of
* going all-in with domain-local counter allocation as well.
*/
hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
return arm_cmn_validate_group(cmn, event);
}
@ -1781,46 +1783,48 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
}
memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
for (i = 0; hw->dtcs_used & (1U << i); i++)
cmn->dtc[i].counters[hw->dtc_idx] = NULL;
for_each_hw_dtc_idx(hw, j, idx)
cmn->dtc[j].counters[idx] = NULL;
}
static int arm_cmn_event_add(struct perf_event *event, int flags)
{
struct arm_cmn *cmn = to_cmn(event->pmu);
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
struct arm_cmn_dtc *dtc = &cmn->dtc[0];
struct arm_cmn_node *dn;
enum cmn_node_type type = CMN_EVENT_TYPE(event);
unsigned int i, dtc_idx, input_sel;
unsigned int input_sel, i = 0;
if (type == CMN_TYPE_DTC) {
i = 0;
while (cmn->dtc[i].cycles)
if (++i == cmn->num_dtcs)
return -ENOSPC;
cmn->dtc[i].cycles = event;
hw->dtc_idx = CMN_DT_NUM_COUNTERS;
hw->dtcs_used = 1U << i;
hw->dtc_idx[0] = i;
if (flags & PERF_EF_START)
arm_cmn_event_start(event, 0);
return 0;
}
/* Grab a free global counter first... */
dtc_idx = 0;
while (dtc->counters[dtc_idx])
if (++dtc_idx == CMN_DT_NUM_COUNTERS)
return -ENOSPC;
/* Grab the global counters first... */
for_each_hw_dtc_idx(hw, j, idx) {
if (cmn->part == PART_CMN600 && j > 0) {
idx = hw->dtc_idx[0];
} else {
idx = 0;
while (cmn->dtc[j].counters[idx])
if (++idx == CMN_DT_NUM_COUNTERS)
goto free_dtms;
}
hw->dtc_idx[j] = idx;
}
hw->dtc_idx = dtc_idx;
/* ...then the local counters to feed it. */
/* ...then the local counters to feed them */
for_each_hw_dn(hw, dn, i) {
struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
unsigned int dtm_idx, shift;
unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
u64 reg;
dtm_idx = 0;
@ -1839,11 +1843,11 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
tmp = dtm->wp_event[wp_idx ^ 1];
if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
goto free_dtms;
input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
dtm->wp_event[wp_idx] = dtc_idx;
dtm->wp_event[wp_idx] = hw->dtc_idx[d];
writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
} else {
struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
@ -1863,7 +1867,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
dtm->input_sel[dtm_idx] = input_sel;
shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx);
dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift);
dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift;
dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, hw->dtc_idx[d]) << shift;
dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx);
reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low;
writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG);
@ -1891,7 +1895,7 @@ static void arm_cmn_event_del(struct perf_event *event, int flags)
arm_cmn_event_stop(event, PERF_EF_UPDATE);
if (type == CMN_TYPE_DTC)
cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL;
cmn->dtc[hw->dtc_idx[0]].cycles = NULL;
else
arm_cmn_event_clear(cmn, event, hw->num_dns);
}
@ -2072,7 +2076,6 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
{
struct arm_cmn_node *dn, *xp;
int dtc_idx = 0;
u8 dtcs_present = (1 << cmn->num_dtcs) - 1;
cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL);
if (!cmn->dtc)
@ -2082,23 +2085,26 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP);
if (cmn->part == PART_CMN600 && cmn->num_dtcs > 1) {
/* We do at least know that a DTC's XP must be in that DTC's domain */
dn = arm_cmn_node(cmn, CMN_TYPE_DTC);
for (int i = 0; i < cmn->num_dtcs; i++)
arm_cmn_node_to_xp(cmn, dn + i)->dtc = i;
}
for (dn = cmn->dns; dn->type; dn++) {
if (dn->type == CMN_TYPE_XP) {
dn->dtc &= dtcs_present;
if (dn->type == CMN_TYPE_XP)
continue;
}
xp = arm_cmn_node_to_xp(cmn, dn);
dn->dtc = xp->dtc;
dn->dtm = xp->dtm;
if (cmn->multi_dtm)
dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
if (dn->type == CMN_TYPE_DTC) {
int err;
/* We do at least know that a DTC's XP must be in that DTC's domain */
if (xp->dtc == 0xf)
xp->dtc = 1 << dtc_idx;
err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
if (err)
return err;
}
@ -2117,6 +2123,16 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
return 0;
}
static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_region)
{
int offset = CMN_DTM_UNIT_INFO;
if (cmn->part == PART_CMN650 || cmn->part == PART_CI700)
offset = CMN650_DTM_UNIT_INFO;
return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset));
}
static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
{
int level;
@ -2246,9 +2262,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
cmn->mesh_x = xp->logid;
if (cmn->part == PART_CMN600)
xp->dtc = 0xf;
xp->dtc = -1;
else
xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO);
xp->dtc = arm_cmn_dtc_domain(cmn, xp_region);
xp->dtm = dtm - cmn->dtms;
arm_cmn_init_dtm(dtm++, xp, 0);

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
tristate "ARM Coresight Architecture PMU"
@ -10,3 +10,20 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
based on ARM CoreSight PMU architecture. Note that this PMU
architecture does not have relationship with the ARM CoreSight
Self-Hosted Tracing.
config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
tristate "NVIDIA Coresight Architecture PMU"
depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
help
Provides NVIDIA specific attributes for performance monitoring unit
(PMU) devices based on ARM CoreSight PMU architecture.
config AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU
tristate "Ampere Coresight Architecture PMU"
depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
help
Provides Ampere specific attributes for performance monitoring unit
(PMU) devices based on ARM CoreSight PMU architecture.
In the first phase, the driver enables support on MCU PMU used in
AmpereOne SoC family.

View File

@ -1,6 +1,10 @@
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
arm_cspmu_module-y := arm_cspmu.o
obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
obj-$(CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += ampere_cspmu.o

View File

@ -0,0 +1,272 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Ampere SoC PMU (Performance Monitor Unit)
*
* Copyright (c) 2023, Ampere Computing LLC
*/
#include <linux/io.h>
#include <linux/module.h>
#include <linux/topology.h>
#include "arm_cspmu.h"
#define PMAUXR0 0xD80
#define PMAUXR1 0xD84
#define PMAUXR2 0xD88
#define PMAUXR3 0xD8C
#define to_ampere_cspmu_ctx(cspmu) ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx))
struct ampere_cspmu_ctx {
const char *name;
struct attribute **event_attr;
struct attribute **format_attr;
};
static DEFINE_IDA(mcu_pmu_ida);
#define SOC_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \
static inline u32 get_##_name(const struct perf_event *event) \
{ \
return FIELD_GET(GENMASK_ULL(_end, _start), \
event->attr._config); \
} \
SOC_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 8);
SOC_PMU_EVENT_ATTR_EXTRACTOR(threshold, config1, 0, 7);
SOC_PMU_EVENT_ATTR_EXTRACTOR(rank, config1, 8, 23);
SOC_PMU_EVENT_ATTR_EXTRACTOR(bank, config1, 24, 55);
static struct attribute *ampereone_mcu_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(cycle_count, 0x00),
ARM_CSPMU_EVENT_ATTR(act_sent, 0x01),
ARM_CSPMU_EVENT_ATTR(pre_sent, 0x02),
ARM_CSPMU_EVENT_ATTR(rd_sent, 0x03),
ARM_CSPMU_EVENT_ATTR(rda_sent, 0x04),
ARM_CSPMU_EVENT_ATTR(wr_sent, 0x05),
ARM_CSPMU_EVENT_ATTR(wra_sent, 0x06),
ARM_CSPMU_EVENT_ATTR(pd_entry_vld, 0x07),
ARM_CSPMU_EVENT_ATTR(sref_entry_vld, 0x08),
ARM_CSPMU_EVENT_ATTR(prea_sent, 0x09),
ARM_CSPMU_EVENT_ATTR(pre_sb_sent, 0x0a),
ARM_CSPMU_EVENT_ATTR(ref_sent, 0x0b),
ARM_CSPMU_EVENT_ATTR(rfm_sent, 0x0c),
ARM_CSPMU_EVENT_ATTR(ref_sb_sent, 0x0d),
ARM_CSPMU_EVENT_ATTR(rfm_sb_sent, 0x0e),
ARM_CSPMU_EVENT_ATTR(rd_rda_sent, 0x0f),
ARM_CSPMU_EVENT_ATTR(wr_wra_sent, 0x10),
ARM_CSPMU_EVENT_ATTR(raw_hazard, 0x11),
ARM_CSPMU_EVENT_ATTR(war_hazard, 0x12),
ARM_CSPMU_EVENT_ATTR(waw_hazard, 0x13),
ARM_CSPMU_EVENT_ATTR(rar_hazard, 0x14),
ARM_CSPMU_EVENT_ATTR(raw_war_waw_hazard, 0x15),
ARM_CSPMU_EVENT_ATTR(hprd_lprd_wr_req_vld, 0x16),
ARM_CSPMU_EVENT_ATTR(lprd_req_vld, 0x17),
ARM_CSPMU_EVENT_ATTR(hprd_req_vld, 0x18),
ARM_CSPMU_EVENT_ATTR(hprd_lprd_req_vld, 0x19),
ARM_CSPMU_EVENT_ATTR(prefetch_tgt, 0x1a),
ARM_CSPMU_EVENT_ATTR(wr_req_vld, 0x1b),
ARM_CSPMU_EVENT_ATTR(partial_wr_req_vld, 0x1c),
ARM_CSPMU_EVENT_ATTR(rd_retry, 0x1d),
ARM_CSPMU_EVENT_ATTR(wr_retry, 0x1e),
ARM_CSPMU_EVENT_ATTR(retry_gnt, 0x1f),
ARM_CSPMU_EVENT_ATTR(rank_change, 0x20),
ARM_CSPMU_EVENT_ATTR(dir_change, 0x21),
ARM_CSPMU_EVENT_ATTR(rank_dir_change, 0x22),
ARM_CSPMU_EVENT_ATTR(rank_active, 0x23),
ARM_CSPMU_EVENT_ATTR(rank_idle, 0x24),
ARM_CSPMU_EVENT_ATTR(rank_pd, 0x25),
ARM_CSPMU_EVENT_ATTR(rank_sref, 0x26),
ARM_CSPMU_EVENT_ATTR(queue_fill_gt_thresh, 0x27),
ARM_CSPMU_EVENT_ATTR(queue_rds_gt_thresh, 0x28),
ARM_CSPMU_EVENT_ATTR(queue_wrs_gt_thresh, 0x29),
ARM_CSPMU_EVENT_ATTR(phy_updt_complt, 0x2a),
ARM_CSPMU_EVENT_ATTR(tz_fail, 0x2b),
ARM_CSPMU_EVENT_ATTR(dram_errc, 0x2c),
ARM_CSPMU_EVENT_ATTR(dram_errd, 0x2d),
ARM_CSPMU_EVENT_ATTR(read_data_return, 0x32),
ARM_CSPMU_EVENT_ATTR(chi_wr_data_delta, 0x33),
ARM_CSPMU_EVENT_ATTR(zq_start, 0x34),
ARM_CSPMU_EVENT_ATTR(zq_latch, 0x35),
ARM_CSPMU_EVENT_ATTR(wr_fifo_full, 0x36),
ARM_CSPMU_EVENT_ATTR(info_fifo_full, 0x37),
ARM_CSPMU_EVENT_ATTR(cmd_fifo_full, 0x38),
ARM_CSPMU_EVENT_ATTR(dfi_nop, 0x39),
ARM_CSPMU_EVENT_ATTR(dfi_cmd, 0x3a),
ARM_CSPMU_EVENT_ATTR(rd_run_len, 0x3b),
ARM_CSPMU_EVENT_ATTR(wr_run_len, 0x3c),
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
NULL,
};
static struct attribute *ampereone_mcu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_ATTR(threshold, "config1:0-7"),
ARM_CSPMU_FORMAT_ATTR(rank, "config1:8-23"),
ARM_CSPMU_FORMAT_ATTR(bank, "config1:24-55"),
NULL,
};
static struct attribute **
ampere_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
{
const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
return ctx->event_attr;
}
static struct attribute **
ampere_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
{
const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
return ctx->format_attr;
}
static const char *
ampere_cspmu_get_name(const struct arm_cspmu *cspmu)
{
const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
return ctx->name;
}
static u32 ampere_cspmu_event_filter(const struct perf_event *event)
{
/*
* PMEVFILTR or PMCCFILTR aren't used in Ampere SoC PMU but are marked
* as RES0. Make sure, PMCCFILTR is written zero.
*/
return 0;
}
static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
struct hw_perf_event *hwc,
u32 filter)
{
struct perf_event *event;
unsigned int idx;
u32 threshold, rank, bank;
/*
* At this point, all the events have the same filter settings.
* Therefore, take the first event and use its configuration.
*/
idx = find_first_bit(cspmu->hw_events.used_ctrs,
cspmu->cycle_counter_logical_idx);
event = cspmu->hw_events.events[idx];
threshold = get_threshold(event);
rank = get_rank(event);
bank = get_bank(event);
writel(threshold, cspmu->base0 + PMAUXR0);
writel(rank, cspmu->base0 + PMAUXR1);
writel(bank, cspmu->base0 + PMAUXR2);
}
static int ampere_cspmu_validate_configs(struct perf_event *event,
struct perf_event *event2)
{
if (get_threshold(event) != get_threshold(event2) ||
get_rank(event) != get_rank(event2) ||
get_bank(event) != get_bank(event2))
return -EINVAL;
return 0;
}
static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
struct perf_event *new)
{
struct perf_event *curr, *leader = new->group_leader;
unsigned int idx;
int ret;
ret = ampere_cspmu_validate_configs(new, leader);
if (ret)
return ret;
/* We compare the global filter settings to the existing events */
idx = find_first_bit(cspmu->hw_events.used_ctrs,
cspmu->cycle_counter_logical_idx);
/* This is the first event, thus any configuration is fine */
if (idx == cspmu->cycle_counter_logical_idx)
return 0;
curr = cspmu->hw_events.events[idx];
return ampere_cspmu_validate_configs(curr, new);
}
static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
const char *name_pattern)
{
struct device *dev = cspmu->dev;
int id;
id = ida_alloc(&mcu_pmu_ida, GFP_KERNEL);
if (id < 0)
return ERR_PTR(id);
return devm_kasprintf(dev, GFP_KERNEL, name_pattern, id);
}
static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
{
struct device *dev = cspmu->dev;
struct ampere_cspmu_ctx *ctx;
struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->event_attr = ampereone_mcu_pmu_event_attrs;
ctx->format_attr = ampereone_mcu_format_attrs;
ctx->name = ampere_cspmu_format_name(cspmu, "ampere_mcu_pmu_%d");
if (IS_ERR_OR_NULL(ctx->name))
return ctx->name ? PTR_ERR(ctx->name) : -ENOMEM;
cspmu->impl.ctx = ctx;
impl_ops->event_filter = ampere_cspmu_event_filter;
impl_ops->set_ev_filter = ampere_cspmu_set_ev_filter;
impl_ops->validate_event = ampere_cspmu_validate_event;
impl_ops->get_name = ampere_cspmu_get_name;
impl_ops->get_event_attrs = ampere_cspmu_get_event_attrs;
impl_ops->get_format_attrs = ampere_cspmu_get_format_attrs;
return 0;
}
/* Match all Ampere Coresight PMU devices */
static const struct arm_cspmu_impl_match ampere_cspmu_param = {
.pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE,
.module = THIS_MODULE,
.impl_init_ops = ampere_cspmu_init_ops
};
static int __init ampere_cspmu_init(void)
{
int ret;
ret = arm_cspmu_impl_register(&ampere_cspmu_param);
if (ret)
pr_err("ampere_cspmu backend registration error: %d\n", ret);
return ret;
}
static void __exit ampere_cspmu_exit(void)
{
arm_cspmu_impl_unregister(&ampere_cspmu_param);
}
module_init(ampere_cspmu_init);
module_exit(ampere_cspmu_exit);
MODULE_LICENSE("GPL");

View File

@ -16,7 +16,7 @@
* The user should refer to the vendor technical documentation to get details
* about the supported events.
*
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
@ -26,11 +26,11 @@
#include <linux/interrupt.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include "arm_cspmu.h"
#include "nvidia_cspmu.h"
#define PMUNAME "arm_cspmu"
#define DRVNAME "arm-cs-arch-pmu"
@ -112,11 +112,13 @@
*/
#define HILOHI_MAX_POLL 1000
/* JEDEC-assigned JEP106 identification code */
#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
static unsigned long arm_cspmu_cpuhp_state;
static DEFINE_MUTEX(arm_cspmu_lock);
static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
struct hw_perf_event *hwc, u32 filter);
static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev)
{
return *(struct acpi_apmt_node **)dev_get_platdata(dev);
@ -373,27 +375,45 @@ static struct attribute_group arm_cspmu_cpumask_attr_group = {
.attrs = arm_cspmu_cpumask_attrs,
};
struct impl_match {
u32 pmiidr;
u32 mask;
int (*impl_init_ops)(struct arm_cspmu *cspmu);
static struct arm_cspmu_impl_match impl_match[] = {
{
.module_name = "nvidia_cspmu",
.pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA,
.pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
.module = NULL,
.impl_init_ops = NULL,
},
{
.module_name = "ampere_cspmu",
.pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE,
.pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
.module = NULL,
.impl_init_ops = NULL,
},
{0}
};
static const struct impl_match impl_match[] = {
{
.pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
.mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
.impl_init_ops = nv_cspmu_init_ops
},
{}
};
static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr)
{
struct arm_cspmu_impl_match *match = impl_match;
for (; match->pmiidr_val; match++) {
u32 mask = match->pmiidr_mask;
if ((match->pmiidr_val & mask) == (pmiidr & mask))
return match;
}
return NULL;
}
static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
{
int ret;
int ret = 0;
struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev);
const struct impl_match *match = impl_match;
struct arm_cspmu_impl_match *match;
/*
* Get PMU implementer and product id from APMT node.
@ -405,17 +425,36 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
readl(cspmu->base0 + PMIIDR);
/* Find implementer specific attribute ops. */
for (; match->pmiidr; match++) {
const u32 mask = match->mask;
match = arm_cspmu_impl_match_get(cspmu->impl.pmiidr);
if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
ret = match->impl_init_ops(cspmu);
if (ret)
return ret;
/* Load implementer module and initialize the callbacks. */
if (match) {
mutex_lock(&arm_cspmu_lock);
break;
if (match->impl_init_ops) {
/* Prevent unload until PMU registration is done. */
if (try_module_get(match->module)) {
cspmu->impl.module = match->module;
cspmu->impl.match = match;
ret = match->impl_init_ops(cspmu);
if (ret)
module_put(match->module);
} else {
WARN(1, "arm_cspmu failed to get module: %s\n",
match->module_name);
ret = -EINVAL;
}
} else {
request_module_nowait(match->module_name);
ret = -EPROBE_DEFER;
}
}
mutex_unlock(&arm_cspmu_lock);
if (ret)
return ret;
} else
cspmu->impl.module = THIS_MODULE;
/* Use default callbacks if implementer doesn't provide one. */
CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
@ -426,6 +465,7 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type);
CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter);
CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible);
CHECK_DEFAULT_IMPL_OPS(impl_ops, set_ev_filter);
return 0;
}
@ -478,11 +518,6 @@ arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
struct attribute_group **attr_groups = NULL;
struct device *dev = cspmu->dev;
const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
int ret;
ret = arm_cspmu_init_impl_ops(cspmu);
if (ret)
return NULL;
cspmu->identifier = impl_ops->get_identifier(cspmu);
cspmu->name = impl_ops->get_name(cspmu);
@ -549,7 +584,7 @@ static void arm_cspmu_disable(struct pmu *pmu)
static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
struct perf_event *event)
{
int idx;
int idx, ret;
struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
if (supports_cycle_counter(cspmu)) {
@ -583,6 +618,12 @@ static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
if (idx >= cspmu->num_logical_ctrs)
return -EAGAIN;
if (cspmu->impl.ops.validate_event) {
ret = cspmu->impl.ops.validate_event(cspmu, event);
if (ret)
return ret;
}
set_bit(idx, hw_events->used_ctrs);
return idx;
@ -696,7 +737,10 @@ static void arm_cspmu_write_counter(struct perf_event *event, u64 val)
if (use_64b_counter_reg(cspmu)) {
offset = counter_offset(sizeof(u64), event->hw.idx);
writeq(val, cspmu->base1 + offset);
if (cspmu->has_atomic_dword)
writeq(val, cspmu->base1 + offset);
else
lo_hi_writeq(val, cspmu->base1 + offset);
} else {
offset = counter_offset(sizeof(u32), event->hw.idx);
@ -789,9 +833,9 @@ static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu,
writel(hwc->config, cspmu->base0 + offset);
}
static inline void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
struct hw_perf_event *hwc,
u32 filter)
static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
struct hw_perf_event *hwc,
u32 filter)
{
u32 offset = PMEVFILTR + (4 * hwc->idx);
@ -823,7 +867,7 @@ static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
arm_cspmu_set_cc_filter(cspmu, filter);
} else {
arm_cspmu_set_event(cspmu, hwc);
arm_cspmu_set_ev_filter(cspmu, hwc, filter);
cspmu->impl.ops.set_ev_filter(cspmu, hwc, filter);
}
hwc->state = 0;
@ -1147,7 +1191,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
cspmu->pmu = (struct pmu){
.task_ctx_nr = perf_invalid_context,
.module = THIS_MODULE,
.module = cspmu->impl.module,
.pmu_enable = arm_cspmu_enable,
.pmu_disable = arm_cspmu_disable,
.event_init = arm_cspmu_event_init,
@ -1194,11 +1238,17 @@ static int arm_cspmu_device_probe(struct platform_device *pdev)
if (ret)
return ret;
ret = arm_cspmu_register_pmu(cspmu);
ret = arm_cspmu_init_impl_ops(cspmu);
if (ret)
return ret;
return 0;
ret = arm_cspmu_register_pmu(cspmu);
/* Matches arm_cspmu_init_impl_ops() above. */
if (cspmu->impl.module != THIS_MODULE)
module_put(cspmu->impl.module);
return ret;
}
static int arm_cspmu_device_remove(struct platform_device *pdev)
@ -1298,6 +1348,75 @@ static void __exit arm_cspmu_exit(void)
cpuhp_remove_multi_state(arm_cspmu_cpuhp_state);
}
int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match)
{
struct arm_cspmu_impl_match *match;
int ret = 0;
match = arm_cspmu_impl_match_get(impl_match->pmiidr_val);
if (match) {
mutex_lock(&arm_cspmu_lock);
if (!match->impl_init_ops) {
match->module = impl_match->module;
match->impl_init_ops = impl_match->impl_init_ops;
} else {
/* Broken match table may contain non-unique entries */
WARN(1, "arm_cspmu backend already registered for module: %s, pmiidr: 0x%x, mask: 0x%x\n",
match->module_name,
match->pmiidr_val,
match->pmiidr_mask);
ret = -EINVAL;
}
mutex_unlock(&arm_cspmu_lock);
if (!ret)
ret = driver_attach(&arm_cspmu_driver.driver);
} else {
pr_err("arm_cspmu reg failed, unable to find a match for pmiidr: 0x%x\n",
impl_match->pmiidr_val);
ret = -EINVAL;
}
return ret;
}
EXPORT_SYMBOL_GPL(arm_cspmu_impl_register);
static int arm_cspmu_match_device(struct device *dev, const void *match)
{
struct arm_cspmu *cspmu = platform_get_drvdata(to_platform_device(dev));
return (cspmu && cspmu->impl.match == match) ? 1 : 0;
}
void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match)
{
struct device *dev;
struct arm_cspmu_impl_match *match;
match = arm_cspmu_impl_match_get(impl_match->pmiidr_val);
if (WARN_ON(!match))
return;
/* Unbind the driver from all matching backend devices. */
while ((dev = driver_find_device(&arm_cspmu_driver.driver, NULL,
match, arm_cspmu_match_device)))
device_release_driver(dev);
mutex_lock(&arm_cspmu_lock);
match->module = NULL;
match->impl_init_ops = NULL;
mutex_unlock(&arm_cspmu_lock);
}
EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
module_init(arm_cspmu_init);
module_exit(arm_cspmu_exit);

View File

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0
*
* ARM CoreSight Architecture PMU driver.
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
@ -69,6 +69,10 @@
#define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0)
#define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20)
/* JEDEC-assigned JEP106 identification code */
#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
#define ARM_CSPMU_IMPL_ID_AMPERE 0xA16
struct arm_cspmu;
/* This tracks the events assigned to each counter in the PMU. */
@ -101,14 +105,34 @@ struct arm_cspmu_impl_ops {
u32 (*event_type)(const struct perf_event *event);
/* Decode filter value from configs */
u32 (*event_filter)(const struct perf_event *event);
/* Set event filter */
void (*set_ev_filter)(struct arm_cspmu *cspmu,
struct hw_perf_event *hwc, u32 filter);
/* Implementation specific event validation */
int (*validate_event)(struct arm_cspmu *cspmu,
struct perf_event *event);
/* Hide/show unsupported events */
umode_t (*event_attr_is_visible)(struct kobject *kobj,
struct attribute *attr, int unused);
};
/* Vendor/implementer registration parameter. */
struct arm_cspmu_impl_match {
/* Backend module. */
struct module *module;
const char *module_name;
/* PMIIDR value/mask. */
u32 pmiidr_val;
u32 pmiidr_mask;
/* Callback to vendor backend to init arm_cspmu_impl::ops. */
int (*impl_init_ops)(struct arm_cspmu *cspmu);
};
/* Vendor/implementer descriptor. */
struct arm_cspmu_impl {
u32 pmiidr;
struct module *module;
struct arm_cspmu_impl_match *match;
struct arm_cspmu_impl_ops ops;
void *ctx;
};
@ -147,4 +171,10 @@ ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
struct device_attribute *attr,
char *buf);
/* Register vendor backend. */
int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match);
/* Unregister vendor backend. */
void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match);
#endif /* __ARM_CSPMU_H__ */

View File

@ -1,14 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
/* Support for NVIDIA specific attributes. */
#include <linux/module.h>
#include <linux/topology.h>
#include "nvidia_cspmu.h"
#include "arm_cspmu.h"
#define NV_PCIE_PORT_COUNT 10ULL
#define NV_PCIE_FILTER_ID_MASK GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
@ -351,7 +352,7 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
return name;
}
int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
{
u32 prodid;
struct nv_cspmu_ctx *ctx;
@ -395,6 +396,31 @@ int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
return 0;
}
EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
/* Match all NVIDIA Coresight PMU devices */
static const struct arm_cspmu_impl_match nv_cspmu_param = {
.pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA,
.module = THIS_MODULE,
.impl_init_ops = nv_cspmu_init_ops
};
static int __init nvidia_cspmu_init(void)
{
int ret;
ret = arm_cspmu_impl_register(&nv_cspmu_param);
if (ret)
pr_err("nvidia_cspmu backend registration error: %d\n", ret);
return ret;
}
static void __exit nvidia_cspmu_exit(void)
{
arm_cspmu_impl_unregister(&nv_cspmu_param);
}
module_init(nvidia_cspmu_init);
module_exit(nvidia_cspmu_exit);
MODULE_LICENSE("GPL v2");

View File

@ -1,17 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
/* Support for NVIDIA specific attributes. */
#ifndef __NVIDIA_CSPMU_H__
#define __NVIDIA_CSPMU_H__
#include "arm_cspmu.h"
/* Allocate NVIDIA descriptor. */
int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
#endif /* __NVIDIA_CSPMU_H__ */

View File

@ -1126,7 +1126,7 @@ static void __armv8pmu_probe_pmu(void *info)
pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
/* store PMMIR register for sysfs */
if (is_pmuv3p4(pmuver) && (pmceid_raw[1] & BIT(31)))
if (is_pmuv3p4(pmuver))
cpu_pmu->reg_pmmir = read_pmmir();
else
cpu_pmu->reg_pmmir = 0;
@ -1187,10 +1187,7 @@ static void armv8_pmu_register_sysctl_table(void)
}
static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
int (*map_event)(struct perf_event *event),
const struct attribute_group *events,
const struct attribute_group *format,
const struct attribute_group *caps)
int (*map_event)(struct perf_event *event))
{
int ret = armv8pmu_probe_pmu(cpu_pmu);
if (ret)
@ -1212,27 +1209,17 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
cpu_pmu->name = name;
cpu_pmu->map_event = map_event;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ?
events : &armv8_pmuv3_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ?
format : &armv8_pmuv3_format_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ?
caps : &armv8_pmuv3_caps_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = &armv8_pmuv3_caps_attr_group;
armv8_pmu_register_sysctl_table();
return 0;
}
static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name,
int (*map_event)(struct perf_event *event))
{
return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL);
}
#define PMUV3_INIT_SIMPLE(name) \
static int name##_pmu_init(struct arm_pmu *cpu_pmu) \
{ \
return armv8_pmu_init_nogroups(cpu_pmu, #name, armv8_pmuv3_map_event);\
return armv8_pmu_init(cpu_pmu, #name, armv8_pmuv3_map_event); \
}
PMUV3_INIT_SIMPLE(armv8_pmuv3)
@ -1263,44 +1250,37 @@ PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35",
armv8_a53_map_event);
return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", armv8_a53_map_event);
}
static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53",
armv8_a53_map_event);
return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", armv8_a53_map_event);
}
static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57",
armv8_a57_map_event);
return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", armv8_a57_map_event);
}
static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72",
armv8_a57_map_event);
return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", armv8_a57_map_event);
}
static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73",
armv8_a73_map_event);
return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", armv8_a73_map_event);
}
static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder",
armv8_thunder_map_event);
return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", armv8_thunder_map_event);
}
static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan",
armv8_vulcan_map_event);
return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", armv8_vulcan_map_event);
}
static const struct of_device_id armv8_pmu_of_device_ids[] = {

View File

@ -353,16 +353,15 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event)
struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
event->cpu = pcie_pmu->on_cpu;
/* Check the type first before going on, otherwise it's not our event */
if (event->attr.type != event->pmu->type)
return -ENOENT;
if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event)))
hwc->event_base = HISI_PCIE_EXT_CNT;
else
hwc->event_base = HISI_PCIE_CNT;
if (event->attr.type != event->pmu->type)
return -ENOENT;
/* Sampling is not supported. */
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EOPNOTSUPP;
@ -373,6 +372,8 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event)
if (!hisi_pcie_pmu_validate_event_group(event))
return -EINVAL;
event->cpu = pcie_pmu->on_cpu;
return 0;
}

View File

@ -505,8 +505,8 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
ret = perf_pmu_register(&pa_pmu->pmu, name, -1);
if (ret) {
dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret);
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
&pa_pmu->node);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
&pa_pmu->node);
return ret;
}

View File

@ -450,8 +450,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
ret = perf_pmu_register(&sllc_pmu->pmu, name, -1);
if (ret) {
dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret);
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
&sllc_pmu->node);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
&sllc_pmu->node);
return ret;
}

View File

@ -1556,8 +1556,8 @@ static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1);
if (ret) {
pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret);
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
&hns3_pmu->node);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
&hns3_pmu->node);
}
return ret;
@ -1568,8 +1568,8 @@ static void hns3_pmu_uninit_pmu(struct pci_dev *pdev)
struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev);
perf_pmu_unregister(&hns3_pmu->pmu);
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
&hns3_pmu->node);
cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
&hns3_pmu->node);
}
static int hns3_pmu_init_dev(struct pci_dev *pdev)

View File

@ -16,11 +16,9 @@
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_fdt.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/property.h>
#include <linux/regmap.h>
#include <linux/slab.h>
@ -1731,6 +1729,12 @@ static const struct xgene_pmu_data xgene_pmu_v2_data = {
.id = PCP_PMU_V2,
};
#ifdef CONFIG_ACPI
static const struct xgene_pmu_data xgene_pmu_v3_data = {
.id = PCP_PMU_V3,
};
#endif
static const struct xgene_pmu_ops xgene_pmu_ops = {
.mask_int = xgene_pmu_mask_int,
.unmask_int = xgene_pmu_unmask_int,
@ -1773,9 +1777,9 @@ static const struct of_device_id xgene_pmu_of_match[] = {
MODULE_DEVICE_TABLE(of, xgene_pmu_of_match);
#ifdef CONFIG_ACPI
static const struct acpi_device_id xgene_pmu_acpi_match[] = {
{"APMC0D5B", PCP_PMU_V1},
{"APMC0D5C", PCP_PMU_V2},
{"APMC0D83", PCP_PMU_V3},
{"APMC0D5B", (kernel_ulong_t)&xgene_pmu_data},
{"APMC0D5C", (kernel_ulong_t)&xgene_pmu_v2_data},
{"APMC0D83", (kernel_ulong_t)&xgene_pmu_v3_data},
{},
};
MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
@ -1831,7 +1835,6 @@ static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
static int xgene_pmu_probe(struct platform_device *pdev)
{
const struct xgene_pmu_data *dev_data;
const struct of_device_id *of_id;
struct xgene_pmu *xgene_pmu;
int irq, rc;
int version;
@ -1850,24 +1853,10 @@ static int xgene_pmu_probe(struct platform_device *pdev)
xgene_pmu->dev = &pdev->dev;
platform_set_drvdata(pdev, xgene_pmu);
version = -EINVAL;
of_id = of_match_device(xgene_pmu_of_match, &pdev->dev);
if (of_id) {
dev_data = (const struct xgene_pmu_data *) of_id->data;
version = dev_data->id;
}
#ifdef CONFIG_ACPI
if (ACPI_COMPANION(&pdev->dev)) {
const struct acpi_device_id *acpi_id;
acpi_id = acpi_match_device(xgene_pmu_acpi_match, &pdev->dev);
if (acpi_id)
version = (int) acpi_id->driver_data;
}
#endif
if (version < 0)
dev_data = device_get_match_data(&pdev->dev);
if (!dev_data)
return -ENODEV;
version = dev_data->id;
if (version == PCP_PMU_V3)
xgene_pmu->ops = &xgene_pmu_v3_ops;

View File

@ -256,6 +256,11 @@ acpi_table_parse_cedt(enum acpi_cedt_type id,
int acpi_parse_mcfg (struct acpi_table_header *header);
void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc)
{
return gicc->flags & ACPI_MADT_ENABLED;
}
/* the following numa functions are architecture-dependent */
void acpi_numa_slit_init (struct acpi_table_slit *slit);

View File

@ -172,6 +172,7 @@ enum cpuhp_state {
CPUHP_AP_ARM_L2X0_STARTING,
CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_EVTSTRM_STARTING,
CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
CPUHP_AP_JCORE_TIMER_STARTING,
CPUHP_AP_ARM_TWD_STARTING,
@ -189,6 +190,7 @@ enum cpuhp_state {
/* Must be the last timer callback */
CPUHP_AP_DUMMY_TIMER_STARTING,
CPUHP_AP_ARM_XEN_STARTING,
CPUHP_AP_ARM_XEN_RUNSTATE_STARTING,
CPUHP_AP_ARM_CORESIGHT_STARTING,
CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
CPUHP_AP_ARM64_ISNDEP_STARTING,

View File

@ -81,6 +81,20 @@ static void lrcpc_sigill(void)
asm volatile(".inst 0xb8bfc3e0" : : : );
}
static void lse128_sigill(void)
{
u64 __attribute__ ((aligned (16))) mem[2] = { 10, 20 };
register u64 *memp asm ("x0") = mem;
register u64 val0 asm ("x1") = 5;
register u64 val1 asm ("x2") = 4;
/* SWPP X1, X2, [X0] */
asm volatile(".inst 0x19228001"
: "+r" (memp), "+r" (val0), "+r" (val1)
:
: "cc", "memory");
}
static void mops_sigill(void)
{
char dst[1], src[1];
@ -226,6 +240,12 @@ static void sveaes_sigill(void)
asm volatile(".inst 0x4522e400" : : : "z0");
}
static void sveb16b16_sigill(void)
{
/* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */
asm volatile(".inst 0xC1E41C00" : : : );
}
static void svepmull_sigill(void)
{
/* PMULLB Z0.Q, Z0.D, Z0.D */
@ -289,6 +309,19 @@ static void uscat_sigbus(void)
asm volatile(".inst 0xb820003f" : : : );
}
static void lrcpc3_sigill(void)
{
int data[2] = { 1, 2 };
register int *src asm ("x0") = data;
register int data0 asm ("w2") = 0;
register int data1 asm ("w3") = 0;
/* LDIAPP w2, w3, [x0] */
asm volatile(".inst 0x99431802"
: "=r" (data0), "=r" (data1) : "r" (src) :);
}
static const struct hwcap_data {
const char *name;
unsigned long at_hwcap;
@ -348,6 +381,13 @@ static const struct hwcap_data {
.cpuinfo = "ilrcpc",
.sigill_fn = ilrcpc_sigill,
},
{
.name = "LRCPC3",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_LRCPC3,
.cpuinfo = "lrcpc3",
.sigill_fn = lrcpc3_sigill,
},
{
.name = "LSE",
.at_hwcap = AT_HWCAP,
@ -364,6 +404,13 @@ static const struct hwcap_data {
.sigbus_fn = uscat_sigbus,
.sigbus_reliable = true,
},
{
.name = "LSE128",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_LSE128,
.cpuinfo = "lse128",
.sigill_fn = lse128_sigill,
},
{
.name = "MOPS",
.at_hwcap = AT_HWCAP2,
@ -493,6 +540,13 @@ static const struct hwcap_data {
.cpuinfo = "sveaes",
.sigill_fn = sveaes_sigill,
},
{
.name = "SVE2 B16B16",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SVE_B16B16,
.cpuinfo = "sveb16b16",
.sigill_fn = sveb16b16_sigill,
},
{
.name = "SVE2 PMULL",
.at_hwcap = AT_HWCAP2,

View File

@ -473,6 +473,13 @@ function _start
// mov x8, #__NR_sched_yield // Encourage preemption
// svc #0
#ifdef SSVE
mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=0,SM=1
and x1, x0, #3
cmp x1, #1
b.ne svcr_barf
#endif
mov x21, #0
0: mov x0, x21
bl check_zreg
@ -553,3 +560,15 @@ function vl_barf
mov x1, #1
svc #0
endfunction
function svcr_barf
mov x10, x0
puts "Bad SVCR: "
mov x0, x10
bl putdecn
mov x8, #__NR_exit
mov x1, #1
svc #0
endfunction