mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-07 22:42:04 +00:00
- More noinstr fixes
- Add an erratum workaround for Intel CPUs which, in certain circumstances, end up consuming an unrelated uncorrectable memory error when using fast string copy insns - Remove the MCE tolerance level control as it is not really needed or used anymore -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmI7Pe4ACgkQEsHwGGHe VUpQQRAAjEK4k+iXhWrNaX736WSaVb8qom+JFlAarrOKaJ6UpdQn+IZD8aF7iscr n1LWGYOyieFvovt69jjTeSprbCVueyhvCmOxxsvH9F2qhNklNwxKEaAPNBXgDuyJ SOs1fTZO4tS85qZbnZa/Um1keSIacBCVar49sXKsj6Ss+rg6wXnPitQh3ztGOAVn CBkNE5n6GG2ELjV+fuVOO54NixMtoElj8SIplQ0UOMlQPBO0Z5MkY5VM6LaQVx/e GGEna6Jo1Z9+b29yf6bR5izWLWcBHTXjvn6i2EIulqKGFRCFmPDBWmuw8YqeyG2a eT/sxVILKZby0Dj11Q1uxaUcln48WNIM5WPYWojaOelzYNNjJ1Kwa+klrlLOxbnM j92MSEBe7Nr2w4cukBg+0sIAdtcfRNx5Oov8yXC9VUA0tg4satAoYHdXn35eVJ3z ZEFo+94H3T0nlCwP+6TayXkTs1k1YICSaCZzp7HcbUdxCsIZQ0kyGknLVtTzydQc z3GEze35VPeqULeBntoaAb2Vpy76Hs5uBl1lkXv+wEGJuECdDld8IilvqtEzCZy5 vLRizqfXle1PQjlGG+eAqUG/7TPTvDmwuCyHEiCdSf1r3f8WLXevdP4WGyCB/yXy VYLmz/Rbga1wsFC4w19pe8FM2S6SSeODYqx6zEjiKYgbNjV/thQ= =oVWo -----END PGP SIGNATURE----- Merge tag 'ras_core_for_v5.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull RAS updates from Borislav Petkov: - More noinstr fixes - Add an erratum workaround for Intel CPUs which, in certain circumstances, end up consuming an unrelated uncorrectable memory error when using fast string copy insns - Remove the MCE tolerance level control as it is not really needed or used anymore * tag 'ras_core_for_v5.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Remove the tolerance level control x86/mce: Work around an erratum on fast string copy instructions x86/mce: Use arch atomic and bit helpers
This commit is contained in:
commit
636f64db07
37
Documentation/ABI/removed/sysfs-mce
Normal file
37
Documentation/ABI/removed/sysfs-mce
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
What: /sys/devices/system/machinecheck/machinecheckX/tolerant
|
||||||
|
Contact: Borislav Petkov <bp@suse.de>
|
||||||
|
Date: Dec, 2021
|
||||||
|
Description:
|
||||||
|
Unused and obsolete after the advent of recoverable machine
|
||||||
|
checks (see last sentence below) and those are present since
|
||||||
|
2010 (Nehalem).
|
||||||
|
|
||||||
|
Original description:
|
||||||
|
|
||||||
|
The entries appear for each CPU, but they are truly shared
|
||||||
|
between all CPUs.
|
||||||
|
|
||||||
|
Tolerance level. When a machine check exception occurs for a
|
||||||
|
non corrected machine check the kernel can take different
|
||||||
|
actions.
|
||||||
|
|
||||||
|
Since machine check exceptions can happen any time it is
|
||||||
|
sometimes risky for the kernel to kill a process because it
|
||||||
|
defies normal kernel locking rules. The tolerance level
|
||||||
|
configures how hard the kernel tries to recover even at some
|
||||||
|
risk of deadlock. Higher tolerant values trade potentially
|
||||||
|
better uptime with the risk of a crash or even corruption
|
||||||
|
(for tolerant >= 3).
|
||||||
|
|
||||||
|
== ===========================================================
|
||||||
|
0 always panic on uncorrected errors, log corrected errors
|
||||||
|
1 panic or SIGBUS on uncorrected errors, log corrected errors
|
||||||
|
2 SIGBUS or log uncorrected errors, log corrected errors
|
||||||
|
3 never panic or SIGBUS, log all errors (for testing only)
|
||||||
|
== ===========================================================
|
||||||
|
|
||||||
|
Default: 1
|
||||||
|
|
||||||
|
Note this only makes a difference if the CPU allows recovery
|
||||||
|
from a machine check exception. Current x86 CPUs generally
|
||||||
|
do not.
|
@ -53,38 +53,6 @@ Description:
|
|||||||
(but some corrected errors might be still reported
|
(but some corrected errors might be still reported
|
||||||
in other ways)
|
in other ways)
|
||||||
|
|
||||||
What: /sys/devices/system/machinecheck/machinecheckX/tolerant
|
|
||||||
Contact: Andi Kleen <ak@linux.intel.com>
|
|
||||||
Date: Feb, 2007
|
|
||||||
Description:
|
|
||||||
The entries appear for each CPU, but they are truly shared
|
|
||||||
between all CPUs.
|
|
||||||
|
|
||||||
Tolerance level. When a machine check exception occurs for a
|
|
||||||
non corrected machine check the kernel can take different
|
|
||||||
actions.
|
|
||||||
|
|
||||||
Since machine check exceptions can happen any time it is
|
|
||||||
sometimes risky for the kernel to kill a process because it
|
|
||||||
defies normal kernel locking rules. The tolerance level
|
|
||||||
configures how hard the kernel tries to recover even at some
|
|
||||||
risk of deadlock. Higher tolerant values trade potentially
|
|
||||||
better uptime with the risk of a crash or even corruption
|
|
||||||
(for tolerant >= 3).
|
|
||||||
|
|
||||||
== ===========================================================
|
|
||||||
0 always panic on uncorrected errors, log corrected errors
|
|
||||||
1 panic or SIGBUS on uncorrected errors, log corrected errors
|
|
||||||
2 SIGBUS or log uncorrected errors, log corrected errors
|
|
||||||
3 never panic or SIGBUS, log all errors (for testing only)
|
|
||||||
== ===========================================================
|
|
||||||
|
|
||||||
Default: 1
|
|
||||||
|
|
||||||
Note this only makes a difference if the CPU allows recovery
|
|
||||||
from a machine check exception. Current x86 CPUs generally
|
|
||||||
do not.
|
|
||||||
|
|
||||||
What: /sys/devices/system/machinecheck/machinecheckX/trigger
|
What: /sys/devices/system/machinecheck/machinecheckX/trigger
|
||||||
Contact: Andi Kleen <ak@linux.intel.com>
|
Contact: Andi Kleen <ak@linux.intel.com>
|
||||||
Date: Feb, 2007
|
Date: Feb, 2007
|
||||||
|
@ -60,8 +60,6 @@ There are two (actually three) modes memory failure recovery can be in:
|
|||||||
|
|
||||||
vm.memory_failure_recovery sysctl set to zero:
|
vm.memory_failure_recovery sysctl set to zero:
|
||||||
All memory failures cause a panic. Do not attempt recovery.
|
All memory failures cause a panic. Do not attempt recovery.
|
||||||
(on x86 this can be also affected by the tolerant level of the
|
|
||||||
MCE subsystem)
|
|
||||||
|
|
||||||
early kill
|
early kill
|
||||||
(can be controlled globally and per process)
|
(can be controlled globally and per process)
|
||||||
|
@ -47,14 +47,7 @@ Please see Documentation/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
|
|||||||
in a reboot. On Intel systems it is enabled by default.
|
in a reboot. On Intel systems it is enabled by default.
|
||||||
mce=nobootlog
|
mce=nobootlog
|
||||||
Disable boot machine check logging.
|
Disable boot machine check logging.
|
||||||
mce=tolerancelevel[,monarchtimeout] (number,number)
|
mce=monarchtimeout (number)
|
||||||
tolerance levels:
|
|
||||||
0: always panic on uncorrected errors, log corrected errors
|
|
||||||
1: panic or SIGBUS on uncorrected errors, log corrected errors
|
|
||||||
2: SIGBUS or log uncorrected errors, log corrected errors
|
|
||||||
3: never panic or SIGBUS, log all errors (for testing only)
|
|
||||||
Default is 1
|
|
||||||
Can be also set using sysfs which is preferable.
|
|
||||||
monarchtimeout:
|
monarchtimeout:
|
||||||
Sets the time in us to wait for other CPUs on machine checks. 0
|
Sets the time in us to wait for other CPUs on machine checks. 0
|
||||||
to disable.
|
to disable.
|
||||||
|
@ -86,14 +86,6 @@ struct mce_vendor_flags mce_flags __read_mostly;
|
|||||||
|
|
||||||
struct mca_config mca_cfg __read_mostly = {
|
struct mca_config mca_cfg __read_mostly = {
|
||||||
.bootlog = -1,
|
.bootlog = -1,
|
||||||
/*
|
|
||||||
* Tolerant levels:
|
|
||||||
* 0: always panic on uncorrected errors, log corrected errors
|
|
||||||
* 1: panic or SIGBUS on uncorrected errors, log corrected errors
|
|
||||||
* 2: SIGBUS or log uncorrected errors (if possible), log corr. errors
|
|
||||||
* 3: never panic or SIGBUS, log all errors (for testing only)
|
|
||||||
*/
|
|
||||||
.tolerant = 1,
|
|
||||||
.monarch_timeout = -1
|
.monarch_timeout = -1
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -168,27 +160,6 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
|
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
|
||||||
|
|
||||||
u32 mca_msr_reg(int bank, enum mca_msr reg)
|
|
||||||
{
|
|
||||||
if (mce_flags.smca) {
|
|
||||||
switch (reg) {
|
|
||||||
case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
|
|
||||||
case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);
|
|
||||||
case MCA_MISC: return MSR_AMD64_SMCA_MCx_MISC(bank);
|
|
||||||
case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (reg) {
|
|
||||||
case MCA_CTL: return MSR_IA32_MCx_CTL(bank);
|
|
||||||
case MCA_ADDR: return MSR_IA32_MCx_ADDR(bank);
|
|
||||||
case MCA_MISC: return MSR_IA32_MCx_MISC(bank);
|
|
||||||
case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __print_mce(struct mce *m)
|
static void __print_mce(struct mce *m)
|
||||||
{
|
{
|
||||||
pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
|
pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
|
||||||
@ -769,7 +740,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
|||||||
goto clear_it;
|
goto clear_it;
|
||||||
|
|
||||||
mce_read_aux(&m, i);
|
mce_read_aux(&m, i);
|
||||||
m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
|
m.severity = mce_severity(&m, NULL, NULL, false);
|
||||||
/*
|
/*
|
||||||
* Don't get the IP here because it's unlikely to
|
* Don't get the IP here because it's unlikely to
|
||||||
* have anything to do with the actual error location.
|
* have anything to do with the actual error location.
|
||||||
@ -809,7 +780,8 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
|
|||||||
* the severity assessment code. Pretend that EIPV was set, and take the
|
* the severity assessment code. Pretend that EIPV was set, and take the
|
||||||
* ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
|
* ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
|
||||||
*/
|
*/
|
||||||
static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
|
static __always_inline void
|
||||||
|
quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
if (bank != 0)
|
if (bank != 0)
|
||||||
return;
|
return;
|
||||||
@ -829,11 +801,64 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
|
|||||||
m->cs = regs->cs;
|
m->cs = regs->cs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable fast string copy and return from the MCE handler upon the first SRAR
|
||||||
|
* MCE on bank 1 due to a CPU erratum on Intel Skylake/Cascade Lake/Cooper Lake
|
||||||
|
* CPUs.
|
||||||
|
* The fast string copy instructions ("REP; MOVS*") could consume an
|
||||||
|
* uncorrectable memory error in the cache line _right after_ the desired region
|
||||||
|
* to copy and raise an MCE with RIP pointing to the instruction _after_ the
|
||||||
|
* "REP; MOVS*".
|
||||||
|
* This mitigation addresses the issue completely with the caveat of performance
|
||||||
|
* degradation on the CPU affected. This is still better than the OS crashing on
|
||||||
|
* MCEs raised on an irrelevant process due to "REP; MOVS*" accesses from a
|
||||||
|
* kernel context (e.g., copy_page).
|
||||||
|
*
|
||||||
|
* Returns true when fast string copy on CPU has been disabled.
|
||||||
|
*/
|
||||||
|
static noinstr bool quirk_skylake_repmov(void)
|
||||||
|
{
|
||||||
|
u64 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|
||||||
|
u64 misc_enable = mce_rdmsrl(MSR_IA32_MISC_ENABLE);
|
||||||
|
u64 mc1_status;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Apply the quirk only to local machine checks, i.e., no broadcast
|
||||||
|
* sync is needed.
|
||||||
|
*/
|
||||||
|
if (!(mcgstatus & MCG_STATUS_LMCES) ||
|
||||||
|
!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
mc1_status = mce_rdmsrl(MSR_IA32_MCx_STATUS(1));
|
||||||
|
|
||||||
|
/* Check for a software-recoverable data fetch error. */
|
||||||
|
if ((mc1_status &
|
||||||
|
(MCI_STATUS_VAL | MCI_STATUS_OVER | MCI_STATUS_UC | MCI_STATUS_EN |
|
||||||
|
MCI_STATUS_ADDRV | MCI_STATUS_MISCV | MCI_STATUS_PCC |
|
||||||
|
MCI_STATUS_AR | MCI_STATUS_S)) ==
|
||||||
|
(MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
|
||||||
|
MCI_STATUS_ADDRV | MCI_STATUS_MISCV |
|
||||||
|
MCI_STATUS_AR | MCI_STATUS_S)) {
|
||||||
|
misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
|
||||||
|
mce_wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
|
||||||
|
mce_wrmsrl(MSR_IA32_MCx_STATUS(1), 0);
|
||||||
|
|
||||||
|
instrumentation_begin();
|
||||||
|
pr_err_once("Erratum detected, disable fast string copy instructions.\n");
|
||||||
|
instrumentation_end();
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do a quick check if any of the events requires a panic.
|
* Do a quick check if any of the events requires a panic.
|
||||||
* This decides if we keep the events around or clear them.
|
* This decides if we keep the events around or clear them.
|
||||||
*/
|
*/
|
||||||
static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
|
static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
|
||||||
struct pt_regs *regs)
|
struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
char *tmp = *msg;
|
char *tmp = *msg;
|
||||||
@ -844,12 +869,12 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
|
|||||||
if (!(m->status & MCI_STATUS_VAL))
|
if (!(m->status & MCI_STATUS_VAL))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
__set_bit(i, validp);
|
arch___set_bit(i, validp);
|
||||||
if (mce_flags.snb_ifu_quirk)
|
if (mce_flags.snb_ifu_quirk)
|
||||||
quirk_sandybridge_ifu(i, m, regs);
|
quirk_sandybridge_ifu(i, m, regs);
|
||||||
|
|
||||||
m->bank = i;
|
m->bank = i;
|
||||||
if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
|
if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
|
||||||
mce_read_aux(m, i);
|
mce_read_aux(m, i);
|
||||||
*msg = tmp;
|
*msg = tmp;
|
||||||
return 1;
|
return 1;
|
||||||
@ -897,12 +922,11 @@ static noinstr int mce_timed_out(u64 *t, const char *msg)
|
|||||||
if (!mca_cfg.monarch_timeout)
|
if (!mca_cfg.monarch_timeout)
|
||||||
goto out;
|
goto out;
|
||||||
if ((s64)*t < SPINUNIT) {
|
if ((s64)*t < SPINUNIT) {
|
||||||
if (mca_cfg.tolerant <= 1) {
|
|
||||||
if (cpumask_and(&mce_missing_cpus, cpu_online_mask, &mce_missing_cpus))
|
if (cpumask_and(&mce_missing_cpus, cpu_online_mask, &mce_missing_cpus))
|
||||||
pr_emerg("CPUs not responding to MCE broadcast (may include false positives): %*pbl\n",
|
pr_emerg("CPUs not responding to MCE broadcast (may include false positives): %*pbl\n",
|
||||||
cpumask_pr_args(&mce_missing_cpus));
|
cpumask_pr_args(&mce_missing_cpus));
|
||||||
mce_panic(msg, NULL, NULL);
|
mce_panic(msg, NULL, NULL);
|
||||||
}
|
|
||||||
ret = 1;
|
ret = 1;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -966,9 +990,9 @@ static void mce_reign(void)
|
|||||||
* This dumps all the mces in the log buffer and stops the
|
* This dumps all the mces in the log buffer and stops the
|
||||||
* other CPUs.
|
* other CPUs.
|
||||||
*/
|
*/
|
||||||
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
|
if (m && global_worst >= MCE_PANIC_SEVERITY) {
|
||||||
/* call mce_severity() to get "msg" for panic */
|
/* call mce_severity() to get "msg" for panic */
|
||||||
mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
|
mce_severity(m, NULL, &msg, true);
|
||||||
mce_panic("Fatal machine check", m, msg);
|
mce_panic("Fatal machine check", m, msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -982,7 +1006,7 @@ static void mce_reign(void)
|
|||||||
* No machine check event found. Must be some external
|
* No machine check event found. Must be some external
|
||||||
* source or one CPU is hung. Panic.
|
* source or one CPU is hung. Panic.
|
||||||
*/
|
*/
|
||||||
if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
|
if (global_worst <= MCE_KEEP_SEVERITY)
|
||||||
mce_panic("Fatal machine check from unknown source", NULL, NULL);
|
mce_panic("Fatal machine check from unknown source", NULL, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1010,13 +1034,13 @@ static noinstr int mce_start(int *no_way_out)
|
|||||||
if (!timeout)
|
if (!timeout)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
atomic_add(*no_way_out, &global_nwo);
|
arch_atomic_add(*no_way_out, &global_nwo);
|
||||||
/*
|
/*
|
||||||
* Rely on the implied barrier below, such that global_nwo
|
* Rely on the implied barrier below, such that global_nwo
|
||||||
* is updated before mce_callin.
|
* is updated before mce_callin.
|
||||||
*/
|
*/
|
||||||
order = atomic_inc_return(&mce_callin);
|
order = arch_atomic_inc_return(&mce_callin);
|
||||||
cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
|
arch_cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
|
||||||
|
|
||||||
/* Enable instrumentation around calls to external facilities */
|
/* Enable instrumentation around calls to external facilities */
|
||||||
instrumentation_begin();
|
instrumentation_begin();
|
||||||
@ -1024,10 +1048,10 @@ static noinstr int mce_start(int *no_way_out)
|
|||||||
/*
|
/*
|
||||||
* Wait for everyone.
|
* Wait for everyone.
|
||||||
*/
|
*/
|
||||||
while (atomic_read(&mce_callin) != num_online_cpus()) {
|
while (arch_atomic_read(&mce_callin) != num_online_cpus()) {
|
||||||
if (mce_timed_out(&timeout,
|
if (mce_timed_out(&timeout,
|
||||||
"Timeout: Not all CPUs entered broadcast exception handler")) {
|
"Timeout: Not all CPUs entered broadcast exception handler")) {
|
||||||
atomic_set(&global_nwo, 0);
|
arch_atomic_set(&global_nwo, 0);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
ndelay(SPINUNIT);
|
ndelay(SPINUNIT);
|
||||||
@ -1042,7 +1066,7 @@ static noinstr int mce_start(int *no_way_out)
|
|||||||
/*
|
/*
|
||||||
* Monarch: Starts executing now, the others wait.
|
* Monarch: Starts executing now, the others wait.
|
||||||
*/
|
*/
|
||||||
atomic_set(&mce_executing, 1);
|
arch_atomic_set(&mce_executing, 1);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Subject: Now start the scanning loop one by one in
|
* Subject: Now start the scanning loop one by one in
|
||||||
@ -1050,10 +1074,10 @@ static noinstr int mce_start(int *no_way_out)
|
|||||||
* This way when there are any shared banks it will be
|
* This way when there are any shared banks it will be
|
||||||
* only seen by one CPU before cleared, avoiding duplicates.
|
* only seen by one CPU before cleared, avoiding duplicates.
|
||||||
*/
|
*/
|
||||||
while (atomic_read(&mce_executing) < order) {
|
while (arch_atomic_read(&mce_executing) < order) {
|
||||||
if (mce_timed_out(&timeout,
|
if (mce_timed_out(&timeout,
|
||||||
"Timeout: Subject CPUs unable to finish machine check processing")) {
|
"Timeout: Subject CPUs unable to finish machine check processing")) {
|
||||||
atomic_set(&global_nwo, 0);
|
arch_atomic_set(&global_nwo, 0);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
ndelay(SPINUNIT);
|
ndelay(SPINUNIT);
|
||||||
@ -1063,7 +1087,7 @@ static noinstr int mce_start(int *no_way_out)
|
|||||||
/*
|
/*
|
||||||
* Cache the global no_way_out state.
|
* Cache the global no_way_out state.
|
||||||
*/
|
*/
|
||||||
*no_way_out = atomic_read(&global_nwo);
|
*no_way_out = arch_atomic_read(&global_nwo);
|
||||||
|
|
||||||
ret = order;
|
ret = order;
|
||||||
|
|
||||||
@ -1148,12 +1172,12 @@ static noinstr int mce_end(int order)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mce_clear_state(unsigned long *toclear)
|
static __always_inline void mce_clear_state(unsigned long *toclear)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
||||||
if (test_bit(i, toclear))
|
if (arch_test_bit(i, toclear))
|
||||||
mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
|
mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1203,8 +1227,8 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
|
|||||||
int severity, i, taint = 0;
|
int severity, i, taint = 0;
|
||||||
|
|
||||||
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
||||||
__clear_bit(i, toclear);
|
arch___clear_bit(i, toclear);
|
||||||
if (!test_bit(i, valid_banks))
|
if (!arch_test_bit(i, valid_banks))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!mce_banks[i].ctl)
|
if (!mce_banks[i].ctl)
|
||||||
@ -1229,7 +1253,7 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
|
|||||||
/* Set taint even when machine check was not enabled. */
|
/* Set taint even when machine check was not enabled. */
|
||||||
taint++;
|
taint++;
|
||||||
|
|
||||||
severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
|
severity = mce_severity(m, regs, NULL, true);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When machine check was for corrected/deferred handler don't
|
* When machine check was for corrected/deferred handler don't
|
||||||
@ -1239,7 +1263,7 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
|
|||||||
severity == MCE_UCNA_SEVERITY) && !no_way_out)
|
severity == MCE_UCNA_SEVERITY) && !no_way_out)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
__set_bit(i, toclear);
|
arch___set_bit(i, toclear);
|
||||||
|
|
||||||
/* Machine check event was not enabled. Clear, but ignore. */
|
/* Machine check event was not enabled. Clear, but ignore. */
|
||||||
if (severity == MCE_NO_SEVERITY)
|
if (severity == MCE_NO_SEVERITY)
|
||||||
@ -1389,7 +1413,6 @@ noinstr void do_machine_check(struct pt_regs *regs)
|
|||||||
int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0;
|
int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0;
|
||||||
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 };
|
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 };
|
||||||
DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 };
|
DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 };
|
||||||
struct mca_config *cfg = &mca_cfg;
|
|
||||||
struct mce m, *final;
|
struct mce m, *final;
|
||||||
char *msg = NULL;
|
char *msg = NULL;
|
||||||
|
|
||||||
@ -1400,6 +1423,9 @@ noinstr void do_machine_check(struct pt_regs *regs)
|
|||||||
else if (unlikely(!mca_cfg.initialized))
|
else if (unlikely(!mca_cfg.initialized))
|
||||||
return unexpected_machine_check(regs);
|
return unexpected_machine_check(regs);
|
||||||
|
|
||||||
|
if (mce_flags.skx_repmov_quirk && quirk_skylake_repmov())
|
||||||
|
goto clear;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Establish sequential order between the CPUs entering the machine
|
* Establish sequential order between the CPUs entering the machine
|
||||||
* check handler.
|
* check handler.
|
||||||
@ -1408,7 +1434,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If no_way_out gets set, there is no safe way to recover from this
|
* If no_way_out gets set, there is no safe way to recover from this
|
||||||
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
|
* MCE.
|
||||||
*/
|
*/
|
||||||
no_way_out = 0;
|
no_way_out = 0;
|
||||||
|
|
||||||
@ -1442,7 +1468,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
|
|||||||
* severity is MCE_AR_SEVERITY we have other options.
|
* severity is MCE_AR_SEVERITY we have other options.
|
||||||
*/
|
*/
|
||||||
if (!(m.mcgstatus & MCG_STATUS_RIPV))
|
if (!(m.mcgstatus & MCG_STATUS_RIPV))
|
||||||
kill_current_task = (cfg->tolerant == 3) ? 0 : 1;
|
kill_current_task = 1;
|
||||||
/*
|
/*
|
||||||
* Check if this MCE is signaled to only this logical processor,
|
* Check if this MCE is signaled to only this logical processor,
|
||||||
* on Intel, Zhaoxin only.
|
* on Intel, Zhaoxin only.
|
||||||
@ -1459,7 +1485,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
|
|||||||
* to see it will clear it.
|
* to see it will clear it.
|
||||||
*/
|
*/
|
||||||
if (lmce) {
|
if (lmce) {
|
||||||
if (no_way_out && cfg->tolerant < 3)
|
if (no_way_out)
|
||||||
mce_panic("Fatal local machine check", &m, msg);
|
mce_panic("Fatal local machine check", &m, msg);
|
||||||
} else {
|
} else {
|
||||||
order = mce_start(&no_way_out);
|
order = mce_start(&no_way_out);
|
||||||
@ -1479,7 +1505,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
|
|||||||
if (!no_way_out)
|
if (!no_way_out)
|
||||||
no_way_out = worst >= MCE_PANIC_SEVERITY;
|
no_way_out = worst >= MCE_PANIC_SEVERITY;
|
||||||
|
|
||||||
if (no_way_out && cfg->tolerant < 3)
|
if (no_way_out)
|
||||||
mce_panic("Fatal machine check on current CPU", &m, msg);
|
mce_panic("Fatal machine check on current CPU", &m, msg);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -1491,8 +1517,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
|
|||||||
* fatal error. We call "mce_severity()" again to
|
* fatal error. We call "mce_severity()" again to
|
||||||
* make sure we have the right "msg".
|
* make sure we have the right "msg".
|
||||||
*/
|
*/
|
||||||
if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
|
if (worst >= MCE_PANIC_SEVERITY) {
|
||||||
mce_severity(&m, regs, cfg->tolerant, &msg, true);
|
mce_severity(&m, regs, &msg, true);
|
||||||
mce_panic("Local fatal machine check!", &m, msg);
|
mce_panic("Local fatal machine check!", &m, msg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1542,6 +1568,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
|
|||||||
out:
|
out:
|
||||||
instrumentation_end();
|
instrumentation_end();
|
||||||
|
|
||||||
|
clear:
|
||||||
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
|
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(do_machine_check);
|
EXPORT_SYMBOL_GPL(do_machine_check);
|
||||||
@ -1855,6 +1882,13 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
|||||||
|
|
||||||
if (c->x86 == 6 && c->x86_model == 45)
|
if (c->x86 == 6 && c->x86_model == 45)
|
||||||
mce_flags.snb_ifu_quirk = 1;
|
mce_flags.snb_ifu_quirk = 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Skylake, Cascacde Lake and Cooper Lake require a quirk on
|
||||||
|
* rep movs.
|
||||||
|
*/
|
||||||
|
if (c->x86 == 6 && c->x86_model == INTEL_FAM6_SKYLAKE_X)
|
||||||
|
mce_flags.skx_repmov_quirk = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
|
if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
|
||||||
@ -2220,10 +2254,9 @@ static int __init mcheck_enable(char *str)
|
|||||||
cfg->bios_cmci_threshold = 1;
|
cfg->bios_cmci_threshold = 1;
|
||||||
else if (!strcmp(str, "recovery"))
|
else if (!strcmp(str, "recovery"))
|
||||||
cfg->recovery = 1;
|
cfg->recovery = 1;
|
||||||
else if (isdigit(str[0])) {
|
else if (isdigit(str[0]))
|
||||||
if (get_option(&str, &cfg->tolerant) == 2)
|
|
||||||
get_option(&str, &(cfg->monarch_timeout));
|
get_option(&str, &(cfg->monarch_timeout));
|
||||||
} else {
|
else {
|
||||||
pr_info("mce argument %s ignored. Please use /sys\n", str);
|
pr_info("mce argument %s ignored. Please use /sys\n", str);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -2473,7 +2506,6 @@ static ssize_t store_int_with_restart(struct device *s,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
|
|
||||||
static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
|
static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
|
||||||
static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
|
static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
|
||||||
static DEVICE_BOOL_ATTR(print_all, 0644, mca_cfg.print_all);
|
static DEVICE_BOOL_ATTR(print_all, 0644, mca_cfg.print_all);
|
||||||
@ -2494,7 +2526,6 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static struct device_attribute *mce_device_attrs[] = {
|
static struct device_attribute *mce_device_attrs[] = {
|
||||||
&dev_attr_tolerant.attr,
|
|
||||||
&dev_attr_check_interval.attr,
|
&dev_attr_check_interval.attr,
|
||||||
#ifdef CONFIG_X86_MCELOG_LEGACY
|
#ifdef CONFIG_X86_MCELOG_LEGACY
|
||||||
&dev_attr_trigger,
|
&dev_attr_trigger,
|
||||||
|
@ -35,7 +35,7 @@ int mce_gen_pool_add(struct mce *mce);
|
|||||||
int mce_gen_pool_init(void);
|
int mce_gen_pool_init(void);
|
||||||
struct llist_node *mce_gen_pool_prepare_records(void);
|
struct llist_node *mce_gen_pool_prepare_records(void);
|
||||||
|
|
||||||
int mce_severity(struct mce *a, struct pt_regs *regs, int tolerant, char **msg, bool is_excp);
|
int mce_severity(struct mce *a, struct pt_regs *regs, char **msg, bool is_excp);
|
||||||
struct dentry *mce_get_debugfs_dir(void);
|
struct dentry *mce_get_debugfs_dir(void);
|
||||||
|
|
||||||
extern mce_banks_t mce_banks_ce_disabled;
|
extern mce_banks_t mce_banks_ce_disabled;
|
||||||
@ -127,7 +127,6 @@ struct mca_config {
|
|||||||
bool ignore_ce;
|
bool ignore_ce;
|
||||||
bool print_all;
|
bool print_all;
|
||||||
|
|
||||||
int tolerant;
|
|
||||||
int monarch_timeout;
|
int monarch_timeout;
|
||||||
int panic_timeout;
|
int panic_timeout;
|
||||||
u32 rip_msr;
|
u32 rip_msr;
|
||||||
@ -170,7 +169,10 @@ struct mce_vendor_flags {
|
|||||||
/* SandyBridge IFU quirk */
|
/* SandyBridge IFU quirk */
|
||||||
snb_ifu_quirk : 1,
|
snb_ifu_quirk : 1,
|
||||||
|
|
||||||
__reserved_0 : 57;
|
/* Skylake, Cascade Lake, Cooper Lake REP;MOVS* quirk */
|
||||||
|
skx_repmov_quirk : 1,
|
||||||
|
|
||||||
|
__reserved_0 : 56;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern struct mce_vendor_flags mce_flags;
|
extern struct mce_vendor_flags mce_flags;
|
||||||
@ -182,8 +184,6 @@ enum mca_msr {
|
|||||||
MCA_MISC,
|
MCA_MISC,
|
||||||
};
|
};
|
||||||
|
|
||||||
u32 mca_msr_reg(int bank, enum mca_msr reg);
|
|
||||||
|
|
||||||
/* Decide whether to add MCE record to MCE event pool or filter it out. */
|
/* Decide whether to add MCE record to MCE event pool or filter it out. */
|
||||||
extern bool filter_mce(struct mce *m);
|
extern bool filter_mce(struct mce *m);
|
||||||
|
|
||||||
@ -209,4 +209,25 @@ static inline void winchip_machine_check(struct pt_regs *regs) {}
|
|||||||
|
|
||||||
noinstr u64 mce_rdmsrl(u32 msr);
|
noinstr u64 mce_rdmsrl(u32 msr);
|
||||||
|
|
||||||
|
static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
|
||||||
|
{
|
||||||
|
if (mce_flags.smca) {
|
||||||
|
switch (reg) {
|
||||||
|
case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
|
||||||
|
case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);
|
||||||
|
case MCA_MISC: return MSR_AMD64_SMCA_MCx_MISC(bank);
|
||||||
|
case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (reg) {
|
||||||
|
case MCA_CTL: return MSR_IA32_MCx_CTL(bank);
|
||||||
|
case MCA_ADDR: return MSR_IA32_MCx_ADDR(bank);
|
||||||
|
case MCA_MISC: return MSR_IA32_MCx_MISC(bank);
|
||||||
|
case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* __X86_MCE_INTERNAL_H__ */
|
#endif /* __X86_MCE_INTERNAL_H__ */
|
||||||
|
@ -301,7 +301,7 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
|
static __always_inline int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
|
||||||
{
|
{
|
||||||
u64 mcx_cfg;
|
u64 mcx_cfg;
|
||||||
|
|
||||||
@ -330,8 +330,7 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
|
|||||||
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
|
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
|
||||||
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
|
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
|
||||||
*/
|
*/
|
||||||
static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
|
static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
|
||||||
char **msg, bool is_excp)
|
|
||||||
{
|
{
|
||||||
enum context ctx = error_context(m, regs);
|
enum context ctx = error_context(m, regs);
|
||||||
|
|
||||||
@ -383,8 +382,7 @@ static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tol
|
|||||||
return MCE_KEEP_SEVERITY;
|
return MCE_KEEP_SEVERITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs,
|
static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
|
||||||
int tolerant, char **msg, bool is_excp)
|
|
||||||
{
|
{
|
||||||
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
|
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
|
||||||
enum context ctx = error_context(m, regs);
|
enum context ctx = error_context(m, regs);
|
||||||
@ -412,22 +410,21 @@ static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs,
|
|||||||
if (msg)
|
if (msg)
|
||||||
*msg = s->msg;
|
*msg = s->msg;
|
||||||
s->covered = 1;
|
s->covered = 1;
|
||||||
if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
|
|
||||||
if (tolerant < 1)
|
if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL)
|
||||||
return MCE_PANIC_SEVERITY;
|
return MCE_PANIC_SEVERITY;
|
||||||
}
|
|
||||||
return s->sev;
|
return s->sev;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int noinstr mce_severity(struct mce *m, struct pt_regs *regs, int tolerant, char **msg,
|
int noinstr mce_severity(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
|
||||||
bool is_excp)
|
|
||||||
{
|
{
|
||||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
|
||||||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
|
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
|
||||||
return mce_severity_amd(m, regs, tolerant, msg, is_excp);
|
return mce_severity_amd(m, regs, msg, is_excp);
|
||||||
else
|
else
|
||||||
return mce_severity_intel(m, regs, tolerant, msg, is_excp);
|
return mce_severity_intel(m, regs, msg, is_excp);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
Loading…
Reference in New Issue
Block a user