diff --git a/MAINTAINERS b/MAINTAINERS index a87ddad78e26..6b8d31591228 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1120,6 +1120,14 @@ L: linux-i2c@vger.kernel.org S: Supported F: drivers/i2c/busses/i2c-amd-asf-plat.c +AMD NODE DRIVER +M: Mario Limonciello +M: Yazen Ghannam +L: linux-kernel@vger.kernel.org +S: Supported +F: arch/x86/include/asm/amd_node.h +F: arch/x86/kernel/amd_node.c + AMD PDS CORE DRIVER M: Shannon Nelson M: Brett Creeley diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0f7e3acf37e3..f31dade6c127 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1189,7 +1189,7 @@ config X86_MCE_INTEL config X86_MCE_AMD def_bool y prompt "AMD MCE features" - depends on X86_MCE && X86_LOCAL_APIC && AMD_NB + depends on X86_MCE && X86_LOCAL_APIC help Additional support for AMD specific MCE features such as the DRAM Error Threshold. @@ -3129,6 +3129,10 @@ config TS5500 endif # X86_32 config AMD_NB + def_bool y + depends on AMD_NODE + +config AMD_NODE def_bool y depends on CPU_SUP_AMD && PCI diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 60fc3ed72830..90aabe1fd3b6 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -664,7 +664,7 @@ void __init hv_vtom_init(void) x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; /* Set WB as the default cache mode. */ - mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); + guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); } #endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index d0caac26533f..4c4efb93045e 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -4,7 +4,7 @@ #include #include -#include +#include struct amd_nb_bus_dev_range { u8 bus; @@ -21,49 +21,16 @@ extern int amd_numa_init(void); extern int amd_get_subcaches(int); extern int amd_set_subcaches(int, unsigned long); -int __must_check amd_smn_read(u16 node, u32 address, u32 *value); -int __must_check amd_smn_write(u16 node, u32 address, u32 value); - struct amd_l3_cache { unsigned indices; u8 subcaches[4]; }; -struct threshold_block { - unsigned int block; /* Number within bank */ - unsigned int bank; /* MCA bank the block belongs to */ - unsigned int cpu; /* CPU which controls MCA bank */ - u32 address; /* MSR address for the block */ - u16 interrupt_enable; /* Enable/Disable APIC interrupt */ - bool interrupt_capable; /* Bank can generate an interrupt. */ - - u16 threshold_limit; /* - * Value upon which threshold - * interrupt is generated. - */ - - struct kobject kobj; /* sysfs object */ - struct list_head miscj; /* - * List of threshold blocks - * within a bank. - */ -}; - -struct threshold_bank { - struct kobject *kobj; - struct threshold_block *blocks; - - /* initialized to the number of CPUs on the node sharing this bank */ - refcount_t cpus; - unsigned int shared; -}; - struct amd_northbridge { struct pci_dev *root; struct pci_dev *misc; struct pci_dev *link; struct amd_l3_cache l3_cache; - struct threshold_bank *bank4; }; struct amd_northbridge_info { @@ -82,23 +49,6 @@ u16 amd_nb_num(void); bool amd_nb_has_feature(unsigned int feature); struct amd_northbridge *node_to_amd_nb(int node); -static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) -{ - struct pci_dev *misc; - int i; - - for (i = 0; i != amd_nb_num(); i++) { - misc = node_to_amd_nb(i)->misc; - - if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) && - PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn)) - return i; - } - - WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev)); - return 0; -} - static inline bool amd_gart_present(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) diff --git a/arch/x86/include/asm/amd_node.h b/arch/x86/include/asm/amd_node.h new file mode 100644 index 000000000000..113ad3e8ee40 --- /dev/null +++ b/arch/x86/include/asm/amd_node.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Node helper functions and common defines + * + * Copyright (c) 2024, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + * + * Note: + * Items in this file may only be used in a single place. + * However, it's prudent to keep all AMD Node functionality + * in a unified place rather than spreading throughout the + * kernel. + */ + +#ifndef _ASM_X86_AMD_NODE_H_ +#define _ASM_X86_AMD_NODE_H_ + +#include + +#define MAX_AMD_NUM_NODES 8 +#define AMD_NODE0_PCI_SLOT 0x18 + +struct pci_dev *amd_node_get_func(u16 node, u8 func); +struct pci_dev *amd_node_get_root(u16 node); + +static inline u16 amd_num_nodes(void) +{ + return topology_amd_nodes_per_pkg() * topology_max_packages(); +} + +int __must_check amd_smn_read(u16 node, u32 address, u32 *value); +int __must_check amd_smn_write(u16 node, u32 address, u32 value); + +#endif /*_ASM_X86_AMD_NODE_H_*/ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4543cf2eb5e8..eb2db07ef39c 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -276,7 +276,7 @@ static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} #endif -int mce_available(struct cpuinfo_x86 *c); +bool mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); bool mce_is_correctable(struct mce *m); bool mce_usable_address(struct mce *m); @@ -296,7 +296,7 @@ enum mcp_flags { void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); -int mce_notify_irq(void); +bool mce_notify_irq(void); DECLARE_PER_CPU(struct mce, injectm); @@ -386,8 +386,6 @@ static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif -static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } - unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len); #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 4218248083d9..c69e269937c5 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -58,8 +58,8 @@ struct mtrr_state_type { */ # ifdef CONFIG_MTRR void mtrr_bp_init(void); -void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var, - mtrr_type def_type); +void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var, + mtrr_type def_type); extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform); extern void mtrr_save_fixed_ranges(void *); extern void mtrr_save_state(void); @@ -75,9 +75,9 @@ void mtrr_disable(void); void mtrr_enable(void); void mtrr_generic_set_state(void); # else -static inline void mtrr_overwrite_state(struct mtrr_var_range *var, - unsigned int num_var, - mtrr_type def_type) +static inline void guest_force_mtrr_state(struct mtrr_var_range *var, + unsigned int num_var, + mtrr_type def_type) { } diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f7918980667a..b43eb7e384eb 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -119,6 +119,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_AMD_NB) += amd_nb.o +obj-$(CONFIG_AMD_NODE) += amd_node.o obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 9fe9972d2071..11fac09e3a8c 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -15,66 +15,8 @@ #include #include -#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 -#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 -#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 -#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 -#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5 -#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4 -#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5 -#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8 -#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8 -#define PCI_DEVICE_ID_AMD_1AH_M00H_ROOT 0x153a -#define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 -#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122 -#define PCI_DEVICE_ID_AMD_MI200_ROOT 0x14bb -#define PCI_DEVICE_ID_AMD_MI300_ROOT 0x14f8 - -#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 -#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec -#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 -#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c -#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 -#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728 -#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 -#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1 -#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d -#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e -#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4 -#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 -#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc -#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4 0x12c4 -#define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F4 0x16fc -#define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4 0x124c -#define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4 0x12bc -#define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4 -#define PCI_DEVICE_ID_AMD_MI300_DF_F4 0x152c - -/* Protect the PCI config register pairs used for SMN. */ -static DEFINE_MUTEX(smn_mutex); - static u32 *flush_words; -static const struct pci_device_id amd_root_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_ROOT) }, - {} -}; - -#define PCI_DEVICE_ID_AMD_CNB17H_F4 0x1704 - static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, @@ -84,70 +26,6 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M70H_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_DF_F3) }, - {} -}; - -static const struct pci_device_id amd_nb_link_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_DF_F4) }, - {} -}; - -static const struct pci_device_id hygon_root_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_ROOT) }, - {} -}; - -static const struct pci_device_id hygon_nb_misc_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) }, - {} -}; - -static const struct pci_device_id hygon_nb_link_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_DF_F4) }, {} }; @@ -178,176 +56,37 @@ struct amd_northbridge *node_to_amd_nb(int node) } EXPORT_SYMBOL_GPL(node_to_amd_nb); -static struct pci_dev *next_northbridge(struct pci_dev *dev, - const struct pci_device_id *ids) -{ - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (!dev) - break; - } while (!pci_match_id(ids, dev)); - return dev; -} - -/* - * SMN accesses may fail in ways that are difficult to detect here in the called - * functions amd_smn_read() and amd_smn_write(). Therefore, callers must do - * their own checking based on what behavior they expect. - * - * For SMN reads, the returned value may be zero if the register is Read-as-Zero. - * Or it may be a "PCI Error Response", e.g. all 0xFFs. The "PCI Error Response" - * can be checked here, and a proper error code can be returned. - * - * But the Read-as-Zero response cannot be verified here. A value of 0 may be - * correct in some cases, so callers must check that this correct is for the - * register/fields they need. - * - * For SMN writes, success can be determined through a "write and read back" - * However, this is not robust when done here. - * - * Possible issues: - * - * 1) Bits that are "Write-1-to-Clear". In this case, the read value should - * *not* match the write value. - * - * 2) Bits that are "Read-as-Zero"/"Writes-Ignored". This information cannot be - * known here. - * - * 3) Bits that are "Reserved / Set to 1". Ditto above. - * - * Callers of amd_smn_write() should do the "write and read back" check - * themselves, if needed. - * - * For #1, they can see if their target bits got cleared. - * - * For #2 and #3, they can check if their target bits got set as intended. - * - * This matches what is done for RDMSR/WRMSR. As long as there's no #GP, then - * the operation is considered a success, and the caller does their own - * checking. - */ -static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write) -{ - struct pci_dev *root; - int err = -ENODEV; - - if (node >= amd_northbridges.num) - goto out; - - root = node_to_amd_nb(node)->root; - if (!root) - goto out; - - mutex_lock(&smn_mutex); - - err = pci_write_config_dword(root, 0x60, address); - if (err) { - pr_warn("Error programming SMN address 0x%x.\n", address); - goto out_unlock; - } - - err = (write ? pci_write_config_dword(root, 0x64, *value) - : pci_read_config_dword(root, 0x64, value)); - -out_unlock: - mutex_unlock(&smn_mutex); - -out: - return err; -} - -int __must_check amd_smn_read(u16 node, u32 address, u32 *value) -{ - int err = __amd_smn_rw(node, address, value, false); - - if (PCI_POSSIBLE_ERROR(*value)) { - err = -ENODEV; - *value = 0; - } - - return err; -} -EXPORT_SYMBOL_GPL(amd_smn_read); - -int __must_check amd_smn_write(u16 node, u32 address, u32 value) -{ - return __amd_smn_rw(node, address, &value, true); -} -EXPORT_SYMBOL_GPL(amd_smn_write); - - static int amd_cache_northbridges(void) { - const struct pci_device_id *misc_ids = amd_nb_misc_ids; - const struct pci_device_id *link_ids = amd_nb_link_ids; - const struct pci_device_id *root_ids = amd_root_ids; - struct pci_dev *root, *misc, *link; struct amd_northbridge *nb; - u16 roots_per_misc = 0; - u16 misc_count = 0; - u16 root_count = 0; - u16 i, j; + u16 i; if (amd_northbridges.num) return 0; - if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - root_ids = hygon_root_ids; - misc_ids = hygon_nb_misc_ids; - link_ids = hygon_nb_link_ids; - } + amd_northbridges.num = amd_num_nodes(); - misc = NULL; - while ((misc = next_northbridge(misc, misc_ids))) - misc_count++; - - if (!misc_count) - return -ENODEV; - - root = NULL; - while ((root = next_northbridge(root, root_ids))) - root_count++; - - if (root_count) { - roots_per_misc = root_count / misc_count; - - /* - * There should be _exactly_ N roots for each DF/SMN - * interface. - */ - if (!roots_per_misc || (root_count % roots_per_misc)) { - pr_info("Unsupported AMD DF/PCI configuration found\n"); - return -ENODEV; - } - } - - nb = kcalloc(misc_count, sizeof(struct amd_northbridge), GFP_KERNEL); + nb = kcalloc(amd_northbridges.num, sizeof(struct amd_northbridge), GFP_KERNEL); if (!nb) return -ENOMEM; amd_northbridges.nb = nb; - amd_northbridges.num = misc_count; - link = misc = root = NULL; for (i = 0; i < amd_northbridges.num; i++) { - node_to_amd_nb(i)->root = root = - next_northbridge(root, root_ids); - node_to_amd_nb(i)->misc = misc = - next_northbridge(misc, misc_ids); - node_to_amd_nb(i)->link = link = - next_northbridge(link, link_ids); + node_to_amd_nb(i)->root = amd_node_get_root(i); + node_to_amd_nb(i)->misc = amd_node_get_func(i, 3); /* - * If there are more PCI root devices than data fabric/ - * system management network interfaces, then the (N) - * PCI roots per DF/SMN interface are functionally the - * same (for DF/SMN access) and N-1 are redundant. N-1 - * PCI roots should be skipped per DF/SMN interface so - * the following DF/SMN interfaces get mapped to - * correct PCI roots. + * Each Northbridge must have a 'misc' device. + * If not, then uninitialize everything. */ - for (j = 1; j < roots_per_misc; j++) - root = next_northbridge(root, root_ids); + if (!node_to_amd_nb(i)->misc) { + amd_northbridges.num = 0; + kfree(nb); + return -ENODEV; + } + + node_to_amd_nb(i)->link = amd_node_get_func(i, 4); } if (amd_gart_present()) @@ -385,7 +124,6 @@ static int amd_cache_northbridges(void) */ bool __init early_is_amd_nb(u32 device) { - const struct pci_device_id *misc_ids = amd_nb_misc_ids; const struct pci_device_id *id; u32 vendor = device & 0xffff; @@ -393,11 +131,11 @@ bool __init early_is_amd_nb(u32 device) boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) return false; - if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - misc_ids = hygon_nb_misc_ids; + if (cpu_feature_enabled(X86_FEATURE_ZEN)) + return false; device >>= 16; - for (id = misc_ids; id->vendor; id++) + for (id = amd_nb_misc_ids; id->vendor; id++) if (vendor == id->vendor && device == id->device) return true; return false; @@ -582,6 +320,10 @@ static __init void fix_erratum_688(void) static __init int init_amd_nbs(void) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return 0; + amd_cache_northbridges(); amd_cache_gart(); diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c new file mode 100644 index 000000000000..d2ec7fd555c5 --- /dev/null +++ b/arch/x86/kernel/amd_node.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Node helper functions and common defines + * + * Copyright (c) 2024, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include + +/* + * AMD Nodes are a physical collection of I/O devices within an SoC. There can be one + * or more nodes per package. + * + * The nodes are software-visible through PCI config space. All nodes are enumerated + * on segment 0 bus 0. The device (slot) numbers range from 0x18 to 0x1F (maximum 8 + * nodes) with 0x18 corresponding to node 0, 0x19 to node 1, etc. Each node can be a + * multi-function device. + * + * On legacy systems, these node devices represent integrated Northbridge functionality. + * On Zen-based systems, these node devices represent Data Fabric functionality. + * + * See "Configuration Space Accesses" section in BKDGs or + * "Processor x86 Core" -> "Configuration Space" section in PPRs. + */ +struct pci_dev *amd_node_get_func(u16 node, u8 func) +{ + if (node >= MAX_AMD_NUM_NODES) + return NULL; + + return pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(AMD_NODE0_PCI_SLOT + node, func)); +} + +#define DF_BLK_INST_CNT 0x040 +#define DF_CFG_ADDR_CNTL_LEGACY 0x084 +#define DF_CFG_ADDR_CNTL_DF4 0xC04 + +#define DF_MAJOR_REVISION GENMASK(27, 24) + +static u16 get_cfg_addr_cntl_offset(struct pci_dev *df_f0) +{ + u32 reg; + + /* + * Revision fields added for DF4 and later. + * + * Major revision of '0' is found pre-DF4. Field is Read-as-Zero. + */ + if (pci_read_config_dword(df_f0, DF_BLK_INST_CNT, ®)) + return 0; + + if (reg & DF_MAJOR_REVISION) + return DF_CFG_ADDR_CNTL_DF4; + + return DF_CFG_ADDR_CNTL_LEGACY; +} + +struct pci_dev *amd_node_get_root(u16 node) +{ + struct pci_dev *root; + u16 cntl_off; + u8 bus; + + if (!cpu_feature_enabled(X86_FEATURE_ZEN)) + return NULL; + + /* + * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) + * Bits [7:0] (SecBusNum) holds the bus number of the root device for + * this Data Fabric instance. The segment, device, and function will be 0. + */ + struct pci_dev *df_f0 __free(pci_dev_put) = amd_node_get_func(node, 0); + if (!df_f0) + return NULL; + + cntl_off = get_cfg_addr_cntl_offset(df_f0); + if (!cntl_off) + return NULL; + + if (pci_read_config_byte(df_f0, cntl_off, &bus)) + return NULL; + + /* Grab the pointer for the actual root device instance. */ + root = pci_get_domain_bus_and_slot(0, bus, 0); + + pci_dbg(root, "is root for AMD node %u\n", node); + return root; +} + +static struct pci_dev **amd_roots; + +/* Protect the PCI config register pairs used for SMN. */ +static DEFINE_MUTEX(smn_mutex); + +#define SMN_INDEX_OFFSET 0x60 +#define SMN_DATA_OFFSET 0x64 + +/* + * SMN accesses may fail in ways that are difficult to detect here in the called + * functions amd_smn_read() and amd_smn_write(). Therefore, callers must do + * their own checking based on what behavior they expect. + * + * For SMN reads, the returned value may be zero if the register is Read-as-Zero. + * Or it may be a "PCI Error Response", e.g. all 0xFFs. The "PCI Error Response" + * can be checked here, and a proper error code can be returned. + * + * But the Read-as-Zero response cannot be verified here. A value of 0 may be + * correct in some cases, so callers must check that this correct is for the + * register/fields they need. + * + * For SMN writes, success can be determined through a "write and read back" + * However, this is not robust when done here. + * + * Possible issues: + * + * 1) Bits that are "Write-1-to-Clear". In this case, the read value should + * *not* match the write value. + * + * 2) Bits that are "Read-as-Zero"/"Writes-Ignored". This information cannot be + * known here. + * + * 3) Bits that are "Reserved / Set to 1". Ditto above. + * + * Callers of amd_smn_write() should do the "write and read back" check + * themselves, if needed. + * + * For #1, they can see if their target bits got cleared. + * + * For #2 and #3, they can check if their target bits got set as intended. + * + * This matches what is done for RDMSR/WRMSR. As long as there's no #GP, then + * the operation is considered a success, and the caller does their own + * checking. + */ +static int __amd_smn_rw(u8 i_off, u8 d_off, u16 node, u32 address, u32 *value, bool write) +{ + struct pci_dev *root; + int err = -ENODEV; + + if (node >= amd_num_nodes()) + return err; + + root = amd_roots[node]; + if (!root) + return err; + + guard(mutex)(&smn_mutex); + + err = pci_write_config_dword(root, i_off, address); + if (err) { + pr_warn("Error programming SMN address 0x%x.\n", address); + return pcibios_err_to_errno(err); + } + + err = (write ? pci_write_config_dword(root, d_off, *value) + : pci_read_config_dword(root, d_off, value)); + + return pcibios_err_to_errno(err); +} + +int __must_check amd_smn_read(u16 node, u32 address, u32 *value) +{ + int err = __amd_smn_rw(SMN_INDEX_OFFSET, SMN_DATA_OFFSET, node, address, value, false); + + if (PCI_POSSIBLE_ERROR(*value)) { + err = -ENODEV; + *value = 0; + } + + return err; +} +EXPORT_SYMBOL_GPL(amd_smn_read); + +int __must_check amd_smn_write(u16 node, u32 address, u32 value) +{ + return __amd_smn_rw(SMN_INDEX_OFFSET, SMN_DATA_OFFSET, node, address, &value, true); +} +EXPORT_SYMBOL_GPL(amd_smn_write); + +static int amd_cache_roots(void) +{ + u16 node, num_nodes = amd_num_nodes(); + + amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); + if (!amd_roots) + return -ENOMEM; + + for (node = 0; node < num_nodes; node++) + amd_roots[node] = amd_node_get_root(node); + + return 0; +} + +static int __init amd_smn_init(void) +{ + int err; + + if (!cpu_feature_enabled(X86_FEATURE_ZEN)) + return 0; + + guard(mutex)(&smn_mutex); + + if (amd_roots) + return 0; + + err = amd_cache_roots(); + if (err) + return err; + + return 0; +} + +fs_initcall(amd_smn_init); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 6ca80fff1fea..1075a90141da 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -4,8 +4,6 @@ * * Written by Jacob Shin - AMD, Inc. * Maintained by: Borislav Petkov - * - * All MC4_MISCi registers are shared between cores on a node. */ #include #include @@ -20,7 +18,6 @@ #include #include -#include #include #include #include @@ -221,6 +218,32 @@ static const struct smca_hwid smca_hwid_mcatypes[] = { #define MAX_MCATYPE_NAME_LEN 30 static char buf_mcatype[MAX_MCATYPE_NAME_LEN]; +struct threshold_block { + /* This block's number within its bank. */ + unsigned int block; + /* MCA bank number that contains this block. */ + unsigned int bank; + /* CPU which controls this block's MCA bank. */ + unsigned int cpu; + /* MCA_MISC MSR address for this block. */ + u32 address; + /* Enable/Disable APIC interrupt. */ + bool interrupt_enable; + /* Bank can generate an interrupt. */ + bool interrupt_capable; + /* Value upon which threshold interrupt is generated. */ + u16 threshold_limit; + /* sysfs object */ + struct kobject kobj; + /* List of threshold blocks within this block's MCA bank. */ + struct list_head miscj; +}; + +struct threshold_bank { + struct kobject *kobj; + struct threshold_block *blocks; +}; + static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); /* @@ -333,19 +356,6 @@ struct thresh_restart { u16 old_limit; }; -static inline bool is_shared_bank(int bank) -{ - /* - * Scalable MCA provides for only one core to have access to the MSRs of - * a shared bank. - */ - if (mce_flags.smca) - return false; - - /* Bank 4 is for northbridge reporting and is thus shared */ - return (bank == 4); -} - static const char *bank4_names(const struct threshold_block *b) { switch (b->address) { @@ -381,7 +391,7 @@ static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) return msr_high_bits & BIT(28); } -static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) +static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) { int msr = (hi & MASK_LVTOFF_HI) >> 20; @@ -389,7 +399,7 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, b->bank, b->block, b->address, hi, lo); - return 0; + return false; } if (apic != msr) { @@ -399,15 +409,15 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) * was set is reserved. Return early here: */ if (mce_flags.smca) - return 0; + return false; pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, apic, b->bank, b->block, b->address, hi, lo); - return 0; + return false; } - return 1; + return true; }; /* Reprogram MCx_MISC MSR behind this threshold bank. */ @@ -1198,35 +1208,10 @@ out_free: return err; } -static int __threshold_add_blocks(struct threshold_bank *b) -{ - struct list_head *head = &b->blocks->miscj; - struct threshold_block *pos = NULL; - struct threshold_block *tmp = NULL; - int err = 0; - - err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name); - if (err) - return err; - - list_for_each_entry_safe(pos, tmp, head, miscj) { - - err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name); - if (err) { - list_for_each_entry_safe_reverse(pos, tmp, head, miscj) - kobject_del(&pos->kobj); - - return err; - } - } - return err; -} - static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, unsigned int bank) { struct device *dev = this_cpu_read(mce_device); - struct amd_northbridge *nb = NULL; struct threshold_bank *b = NULL; const char *name = get_name(cpu, bank, NULL); int err = 0; @@ -1234,26 +1219,6 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, if (!dev) return -ENODEV; - if (is_shared_bank(bank)) { - nb = node_to_amd_nb(topology_amd_node_id(cpu)); - - /* threshold descriptor already initialized on this node? */ - if (nb && nb->bank4) { - /* yes, use it */ - b = nb->bank4; - err = kobject_add(b->kobj, &dev->kobj, name); - if (err) - goto out; - - bp[bank] = b; - refcount_inc(&b->cpus); - - err = __threshold_add_blocks(b); - - goto out; - } - } - b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); if (!b) { err = -ENOMEM; @@ -1267,17 +1232,6 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, goto out_free; } - if (is_shared_bank(bank)) { - b->shared = 1; - refcount_set(&b->cpus, 1); - - /* nb is already initialized, see above */ - if (nb) { - WARN_ON(nb->bank4); - nb->bank4 = b; - } - } - err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC)); if (err) goto out_kobj; @@ -1310,40 +1264,11 @@ static void deallocate_threshold_blocks(struct threshold_bank *bank) kobject_put(&bank->blocks->kobj); } -static void __threshold_remove_blocks(struct threshold_bank *b) -{ - struct threshold_block *pos = NULL; - struct threshold_block *tmp = NULL; - - kobject_put(b->kobj); - - list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj) - kobject_put(b->kobj); -} - static void threshold_remove_bank(struct threshold_bank *bank) { - struct amd_northbridge *nb; - if (!bank->blocks) goto out_free; - if (!bank->shared) - goto out_dealloc; - - if (!refcount_dec_and_test(&bank->cpus)) { - __threshold_remove_blocks(bank); - return; - } else { - /* - * The last CPU on this node using the shared bank is going - * away, remove that bank now. - */ - nb = node_to_amd_nb(topology_amd_node_id(smp_processor_id())); - nb->bank4 = NULL; - } - -out_dealloc: deallocate_threshold_blocks(bank); out_free: diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 7fb5556a0b53..0dc00c9894c7 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -151,7 +151,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm); void mce_log(struct mce_hw_err *err) { - if (!mce_gen_pool_add(err)) + if (mce_gen_pool_add(err)) irq_work_queue(&mce_irq_work); } EXPORT_SYMBOL_GPL(mce_log); @@ -492,10 +492,10 @@ static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs } } -int mce_available(struct cpuinfo_x86 *c) +bool mce_available(struct cpuinfo_x86 *c) { if (mca_cfg.disabled) - return 0; + return false; return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } @@ -1778,7 +1778,7 @@ static void mce_timer_delete_all(void) * Can be called from interrupt context, but not from machine check/NMI * context. */ -int mce_notify_irq(void) +bool mce_notify_irq(void) { /* Not more than two messages every minute */ static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); @@ -1789,9 +1789,9 @@ int mce_notify_irq(void) if (__ratelimit(&ratelimit)) pr_info(HW_ERR "Machine check events logged\n"); - return 1; + return true; } - return 0; + return false; } EXPORT_SYMBOL_GPL(mce_notify_irq); @@ -1910,101 +1910,120 @@ static void __mcheck_cpu_check_banks(void) } } -/* Add per CPU specific workarounds here */ -static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) +static void apply_quirks_amd(struct cpuinfo_x86 *c) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); - struct mca_config *cfg = &mca_cfg; - - if (c->x86_vendor == X86_VENDOR_UNKNOWN) { - pr_info("unknown CPU type - not enabling MCE support\n"); - return -EOPNOTSUPP; - } /* This should be disabled by the BIOS, but isn't always */ - if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { - /* - * disable GART TBL walk error reporting, which - * trips off incorrectly with the IOMMU & 3ware - * & Cerberus: - */ - clear_bit(10, (unsigned long *)&mce_banks[4].ctl); - } - if (c->x86 < 0x11 && cfg->bootlog < 0) { - /* - * Lots of broken BIOS around that don't clear them - * by default and leave crap in there. Don't log: - */ - cfg->bootlog = 0; - } + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { /* - * Various K7s with broken bank 0 around. Always disable - * by default. + * disable GART TBL walk error reporting, which + * trips off incorrectly with the IOMMU & 3ware + * & Cerberus: */ - if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) - mce_banks[0].ctl = 0; - - /* - * overflow_recov is supported for F15h Models 00h-0fh - * even though we don't have a CPUID bit for it. - */ - if (c->x86 == 0x15 && c->x86_model <= 0xf) - mce_flags.overflow_recov = 1; - - if (c->x86 >= 0x17 && c->x86 <= 0x1A) - mce_flags.zen_ifu_quirk = 1; - + clear_bit(10, (unsigned long *)&mce_banks[4].ctl); } - if (c->x86_vendor == X86_VENDOR_INTEL) { + if (c->x86 < 0x11 && mca_cfg.bootlog < 0) { /* - * SDM documents that on family 6 bank 0 should not be written - * because it aliases to another special BIOS controlled - * register. - * But it's not aliased anymore on model 0x1a+ - * Don't ignore bank 0 completely because there could be a - * valid event later, merely don't write CTL0. + * Lots of broken BIOS around that don't clear them + * by default and leave crap in there. Don't log: */ - - if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) - mce_banks[0].init = false; - - /* - * All newer Intel systems support MCE broadcasting. Enable - * synchronization with a one second timeout. - */ - if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && - cfg->monarch_timeout < 0) - cfg->monarch_timeout = USEC_PER_SEC; - - /* - * There are also broken BIOSes on some Pentium M and - * earlier systems: - */ - if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0) - cfg->bootlog = 0; - - if (c->x86_vfm == INTEL_SANDYBRIDGE_X) - mce_flags.snb_ifu_quirk = 1; - - /* - * Skylake, Cascacde Lake and Cooper Lake require a quirk on - * rep movs. - */ - if (c->x86_vfm == INTEL_SKYLAKE_X) - mce_flags.skx_repmov_quirk = 1; + mca_cfg.bootlog = 0; } - if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { - /* - * All newer Zhaoxin CPUs support MCE broadcasting. Enable - * synchronization with a one second timeout. - */ - if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { - if (cfg->monarch_timeout < 0) - cfg->monarch_timeout = USEC_PER_SEC; - } + /* + * Various K7s with broken bank 0 around. Always disable + * by default. + */ + if (c->x86 == 6 && this_cpu_read(mce_num_banks)) + mce_banks[0].ctl = 0; + + /* + * overflow_recov is supported for F15h Models 00h-0fh + * even though we don't have a CPUID bit for it. + */ + if (c->x86 == 0x15 && c->x86_model <= 0xf) + mce_flags.overflow_recov = 1; + + if (c->x86 >= 0x17 && c->x86 <= 0x1A) + mce_flags.zen_ifu_quirk = 1; +} + +static void apply_quirks_intel(struct cpuinfo_x86 *c) +{ + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + + /* Older CPUs (prior to family 6) don't need quirks. */ + if (c->x86_vfm < INTEL_PENTIUM_PRO) + return; + + /* + * SDM documents that on family 6 bank 0 should not be written + * because it aliases to another special BIOS controlled + * register. + * But it's not aliased anymore on model 0x1a+ + * Don't ignore bank 0 completely because there could be a + * valid event later, merely don't write CTL0. + */ + if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks)) + mce_banks[0].init = false; + + /* + * All newer Intel systems support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if (c->x86_vfm >= INTEL_CORE_YONAH && mca_cfg.monarch_timeout < 0) + mca_cfg.monarch_timeout = USEC_PER_SEC; + + /* + * There are also broken BIOSes on some Pentium M and + * earlier systems: + */ + if (c->x86_vfm < INTEL_CORE_YONAH && mca_cfg.bootlog < 0) + mca_cfg.bootlog = 0; + + if (c->x86_vfm == INTEL_SANDYBRIDGE_X) + mce_flags.snb_ifu_quirk = 1; + + /* + * Skylake, Cascacde Lake and Cooper Lake require a quirk on + * rep movs. + */ + if (c->x86_vfm == INTEL_SKYLAKE_X) + mce_flags.skx_repmov_quirk = 1; +} + +static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c) +{ + /* + * All newer Zhaoxin CPUs support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { + if (mca_cfg.monarch_timeout < 0) + mca_cfg.monarch_timeout = USEC_PER_SEC; + } +} + +/* Add per CPU specific workarounds here */ +static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) +{ + struct mca_config *cfg = &mca_cfg; + + switch (c->x86_vendor) { + case X86_VENDOR_UNKNOWN: + pr_info("unknown CPU type - not enabling MCE support\n"); + return false; + case X86_VENDOR_AMD: + apply_quirks_amd(c); + break; + case X86_VENDOR_INTEL: + apply_quirks_intel(c); + break; + case X86_VENDOR_ZHAOXIN: + apply_quirks_zhaoxin(c); + break; } if (cfg->monarch_timeout < 0) @@ -2012,28 +2031,28 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (cfg->bootlog != 0) cfg->panic_timeout = 30; - return 0; + return true; } -static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) +static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) - return 0; + return false; switch (c->x86_vendor) { case X86_VENDOR_INTEL: intel_p5_mcheck_init(c); mce_flags.p5 = 1; - return 1; + return true; case X86_VENDOR_CENTAUR: winchip_mcheck_init(c); mce_flags.winchip = 1; - return 1; + return true; default: - return 0; + return false; } - return 0; + return false; } /* @@ -2099,13 +2118,9 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) mce_intel_feature_init(c); break; - case X86_VENDOR_AMD: { - mce_amd_feature_init(c); - break; - } - + case X86_VENDOR_AMD: case X86_VENDOR_HYGON: - mce_hygon_feature_init(c); + mce_amd_feature_init(c); break; case X86_VENDOR_CENTAUR: @@ -2279,12 +2294,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_cap_init(); - if (__mcheck_cpu_apply_quirks(c) < 0) { + if (!__mcheck_cpu_apply_quirks(c)) { mca_cfg.disabled = 1; return; } - if (mce_gen_pool_init()) { + if (!mce_gen_pool_init()) { mca_cfg.disabled = 1; pr_emerg("Couldn't allocate MCE records pool!\n"); return; diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c index d0be6dda0c14..3ca9c007a666 100644 --- a/arch/x86/kernel/cpu/mce/genpool.c +++ b/arch/x86/kernel/cpu/mce/genpool.c @@ -94,64 +94,63 @@ bool mce_gen_pool_empty(void) return llist_empty(&mce_event_llist); } -int mce_gen_pool_add(struct mce_hw_err *err) +bool mce_gen_pool_add(struct mce_hw_err *err) { struct mce_evt_llist *node; if (filter_mce(&err->m)) - return -EINVAL; + return false; if (!mce_evt_pool) - return -EINVAL; + return false; node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node)); if (!node) { pr_warn_ratelimited("MCE records pool full!\n"); - return -ENOMEM; + return false; } memcpy(&node->err, err, sizeof(*err)); llist_add(&node->llnode, &mce_event_llist); - return 0; + return true; } -static int mce_gen_pool_create(void) +static bool mce_gen_pool_create(void) { int mce_numrecords, mce_poolsz, order; struct gen_pool *gpool; - int ret = -ENOMEM; void *mce_pool; order = order_base_2(sizeof(struct mce_evt_llist)); gpool = gen_pool_create(order, -1); if (!gpool) - return ret; + return false; mce_numrecords = max(MCE_MIN_ENTRIES, num_possible_cpus() * MCE_PER_CPU); mce_poolsz = mce_numrecords * (1 << order); mce_pool = kmalloc(mce_poolsz, GFP_KERNEL); if (!mce_pool) { gen_pool_destroy(gpool); - return ret; + return false; } - ret = gen_pool_add(gpool, (unsigned long)mce_pool, mce_poolsz, -1); - if (ret) { + + if (gen_pool_add(gpool, (unsigned long)mce_pool, mce_poolsz, -1)) { gen_pool_destroy(gpool); kfree(mce_pool); - return ret; + return false; } mce_evt_pool = gpool; - return ret; + return true; } -int mce_gen_pool_init(void) +bool mce_gen_pool_init(void) { /* Just init mce_gen_pool once. */ if (mce_evt_pool) - return 0; + return true; return mce_gen_pool_create(); } diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index b3cd2c61b11d..f863df0ff42c 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -75,12 +75,12 @@ static u16 cmci_threshold[MAX_NR_BANKS]; */ #define CMCI_STORM_THRESHOLD 32749 -static int cmci_supported(int *banks) +static bool cmci_supported(int *banks) { u64 cap; if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) - return 0; + return false; /* * Vendor check is not strictly needed, but the initial @@ -89,10 +89,11 @@ static int cmci_supported(int *banks) */ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) - return 0; + return false; if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) - return 0; + return false; + rdmsrl(MSR_IA32_MCG_CAP, cap); *banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK); return !!(cap & MCG_CMCI_P); diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 84f810598231..95a504ece43e 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -31,8 +31,8 @@ struct mce_evt_llist { void mce_gen_pool_process(struct work_struct *__unused); bool mce_gen_pool_empty(void); -int mce_gen_pool_add(struct mce_hw_err *err); -int mce_gen_pool_init(void); +bool mce_gen_pool_add(struct mce_hw_err *err); +bool mce_gen_pool_init(void); struct llist_node *mce_gen_pool_prepare_records(void); int mce_severity(struct mce *a, struct pt_regs *regs, char **msg, bool is_excp); diff --git a/arch/x86/kernel/cpu/mce/threshold.c b/arch/x86/kernel/cpu/mce/threshold.c index 89e31e1e5c9c..f4a007616468 100644 --- a/arch/x86/kernel/cpu/mce/threshold.c +++ b/arch/x86/kernel/cpu/mce/threshold.c @@ -90,7 +90,7 @@ void cmci_storm_end(unsigned int bank) storm->banks[bank].in_storm_mode = false; /* If no banks left in storm mode, stop polling. */ - if (!this_cpu_dec_return(storm_desc.stormy_bank_count)) + if (!--storm->stormy_bank_count) mce_timer_kick(false); } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 7b29ebda024f..2fdfda2b60e4 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -423,7 +423,7 @@ void __init mtrr_copy_map(void) } /** - * mtrr_overwrite_state - set static MTRR state + * guest_force_mtrr_state - set static MTRR state for a guest * * Used to set MTRR state via different means (e.g. with data obtained from * a hypervisor). @@ -436,8 +436,8 @@ void __init mtrr_copy_map(void) * @num_var: length of the @var array * @def_type: default caching type */ -void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var, - mtrr_type def_type) +void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var, + mtrr_type def_type) { unsigned int i; diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 989d368be04f..ecbda0341a8a 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -625,7 +625,7 @@ void mtrr_save_state(void) static int __init mtrr_init_finalize(void) { /* - * Map might exist if mtrr_overwrite_state() has been called or if + * Map might exist if guest_force_mtrr_state() has been called or if * mtrr_enabled() returns true. */ mtrr_copy_map(); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 21e9e4845354..7a422a6c5983 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -983,7 +983,7 @@ static void __init kvm_init_platform(void) x86_platform.apic_post_init = kvm_apic_init; /* Set WB as the default cache mode for SEV-SNP and TDX */ - mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); + guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); } #if defined(CONFIG_AMD_MEM_ENCRYPT) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 0681ecfe3430..592fb9d97e77 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include @@ -828,7 +828,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma); #endif -#ifdef CONFIG_AMD_NB +#ifdef CONFIG_AMD_NODE #define AMD_15B8_RCC_DEV2_EPF0_STRAP2 0x10136008 #define AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK 0x00000080L diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index a8eb7e0c473c..e033d5594265 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -171,7 +171,7 @@ static void __init xen_set_mtrr_data(void) /* Only overwrite MTRR state if any MTRR could be got from Xen. */ if (reg) - mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE); + guest_force_mtrr_state(var, reg, MTRR_TYPE_UNCACHABLE); #endif } @@ -195,7 +195,7 @@ static void __init xen_pv_init_platform(void) if (xen_initial_domain()) xen_set_mtrr_data(); else - mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); + guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); /* Adjust nr_cpu_ids before "enumeration" happens */ xen_smp_count_cpus(); diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 06f7b43a6f78..cb97d7bdae31 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -78,6 +78,7 @@ config EDAC_GHES config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" depends on AMD_NB && EDAC_DECODE_MCE + depends on AMD_NODE imply AMD_ATL help Support for error detection and correction of DRAM ECC errors on diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5d356b7c4589..8414ceb43e4a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2,6 +2,7 @@ #include #include "amd64_edac.h" #include +#include static struct edac_pci_ctl_info *pci_ctl; diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index dd376602f3f1..ea13ea482a63 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -324,7 +324,7 @@ config SENSORS_K8TEMP config SENSORS_K10TEMP tristate "AMD Family 10h+ temperature sensor" - depends on X86 && PCI && AMD_NB + depends on X86 && PCI && AMD_NODE help If you say yes here you get support for the temperature sensor(s) inside your CPU. Supported are later revisions of diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 7dc19c5d62ac..d0b4cc9a5011 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include MODULE_DESCRIPTION("AMD Family 10h+ CPU core temperature monitor"); @@ -150,6 +150,11 @@ static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval) F15H_M60H_REPORTED_TEMP_CTRL_OFFSET, regval); } +static u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) +{ + return PCI_SLOT(pdev->devfn) - AMD_NODE0_PCI_SLOT; +} + static void read_tempreg_nb_zen(struct pci_dev *pdev, u32 *regval) { if (amd_smn_read(amd_pci_dev_to_node_id(pdev), diff --git a/drivers/platform/x86/amd/pmc/Kconfig b/drivers/platform/x86/amd/pmc/Kconfig index 94f9563d8be7..eeffdafd686e 100644 --- a/drivers/platform/x86/amd/pmc/Kconfig +++ b/drivers/platform/x86/amd/pmc/Kconfig @@ -5,7 +5,7 @@ config AMD_PMC tristate "AMD SoC PMC driver" - depends on ACPI && PCI && RTC_CLASS && AMD_NB + depends on ACPI && PCI && RTC_CLASS && AMD_NODE depends on SUSPEND select SERIO help diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index a254debb9256..87b064e8ca5a 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -10,7 +10,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include #include @@ -28,6 +27,8 @@ #include #include +#include + #include "pmc.h" /* SMU communication registers */ diff --git a/drivers/platform/x86/amd/pmf/Kconfig b/drivers/platform/x86/amd/pmf/Kconfig index 99d67cdbd91e..25b8f7ae3abd 100644 --- a/drivers/platform/x86/amd/pmf/Kconfig +++ b/drivers/platform/x86/amd/pmf/Kconfig @@ -7,7 +7,7 @@ config AMD_PMF tristate "AMD Platform Management Framework" depends on ACPI && PCI depends on POWER_SUPPLY - depends on AMD_NB + depends on AMD_NODE select ACPI_PLATFORM_PROFILE depends on TEE && AMDTEE depends on AMD_SFH_HID diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 06a97c533cb8..7f88f3121cf5 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -8,13 +8,13 @@ * Author: Shyam Sundar S K */ -#include #include #include #include #include #include #include +#include #include "pmf.h" /* PMF-SMU communication registers */ diff --git a/drivers/ras/amd/atl/Kconfig b/drivers/ras/amd/atl/Kconfig index 551680073e43..6e03942cd7da 100644 --- a/drivers/ras/amd/atl/Kconfig +++ b/drivers/ras/amd/atl/Kconfig @@ -10,6 +10,7 @@ config AMD_ATL tristate "AMD Address Translation Library" depends on AMD_NB && X86_64 && RAS + depends on AMD_NODE depends on MEMORY_FAILURE default N help diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 143d04c779a8..f9be26d25348 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -18,6 +18,7 @@ #include #include +#include #include "reg_fields.h"