mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-19 12:00:00 +00:00
EDAC/ghes: Unify CPER memory error location reporting
Switch the GHES EDAC memory error reporting functions to use the common CPER ones and get rid of code duplication. [ bp: - rewrite commit message, remove useless text - rip out useless reformatting - align function params on the opening brace - rename function to a more descriptive name - drop useless function exports - handle buffer lengths properly when printing other detail - remove useless casting ] Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com> Signed-off-by: Borislav Petkov <bp@suse.de> Link: https://lore.kernel.org/r/20220308144053.49090-3-xueshuai@linux.alibaba.com
This commit is contained in:
parent
bdae796514
commit
ed27b5df38
@ -55,6 +55,7 @@ config EDAC_DECODE_MCE
|
||||
config EDAC_GHES
|
||||
bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
|
||||
depends on ACPI_APEI_GHES && (EDAC=y)
|
||||
select UEFI_CPER
|
||||
help
|
||||
Not all machines support hardware-driven error report. Some of those
|
||||
provide a BIOS-driven error report mechanism via ACPI, using the
|
||||
|
@ -15,11 +15,13 @@
|
||||
#include "edac_module.h"
|
||||
#include <ras/ras_event.h>
|
||||
|
||||
#define OTHER_DETAIL_LEN 400
|
||||
|
||||
struct ghes_pvt {
|
||||
struct mem_ctl_info *mci;
|
||||
|
||||
/* Buffers for the error handling routine */
|
||||
char other_detail[400];
|
||||
char other_detail[OTHER_DETAIL_LEN];
|
||||
char msg[80];
|
||||
};
|
||||
|
||||
@ -235,8 +237,34 @@ static void ghes_scan_system(void)
|
||||
system_scanned = true;
|
||||
}
|
||||
|
||||
static int print_mem_error_other_detail(const struct cper_sec_mem_err *mem, char *msg,
|
||||
const char *location, unsigned int len)
|
||||
{
|
||||
u32 n;
|
||||
|
||||
if (!msg)
|
||||
return 0;
|
||||
|
||||
n = 0;
|
||||
len -= 1;
|
||||
|
||||
n += scnprintf(msg + n, len - n, "APEI location: %s ", location);
|
||||
|
||||
if (!(mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS))
|
||||
goto out;
|
||||
|
||||
n += scnprintf(msg + n, len - n, "status(0x%016llx): ", mem->error_status);
|
||||
n += scnprintf(msg + n, len - n, "%s ", cper_mem_err_status_str(mem->error_status));
|
||||
|
||||
out:
|
||||
msg[n] = '\0';
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
|
||||
{
|
||||
struct cper_mem_err_compact cmem;
|
||||
struct edac_raw_error_desc *e;
|
||||
struct mem_ctl_info *mci;
|
||||
struct ghes_pvt *pvt;
|
||||
@ -292,60 +320,10 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
|
||||
|
||||
/* Error type, mapped on e->msg */
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
|
||||
u8 etype = mem_err->error_type;
|
||||
|
||||
p = pvt->msg;
|
||||
switch (mem_err->error_type) {
|
||||
case 0:
|
||||
p += sprintf(p, "Unknown");
|
||||
break;
|
||||
case 1:
|
||||
p += sprintf(p, "No error");
|
||||
break;
|
||||
case 2:
|
||||
p += sprintf(p, "Single-bit ECC");
|
||||
break;
|
||||
case 3:
|
||||
p += sprintf(p, "Multi-bit ECC");
|
||||
break;
|
||||
case 4:
|
||||
p += sprintf(p, "Single-symbol ChipKill ECC");
|
||||
break;
|
||||
case 5:
|
||||
p += sprintf(p, "Multi-symbol ChipKill ECC");
|
||||
break;
|
||||
case 6:
|
||||
p += sprintf(p, "Master abort");
|
||||
break;
|
||||
case 7:
|
||||
p += sprintf(p, "Target abort");
|
||||
break;
|
||||
case 8:
|
||||
p += sprintf(p, "Parity Error");
|
||||
break;
|
||||
case 9:
|
||||
p += sprintf(p, "Watchdog timeout");
|
||||
break;
|
||||
case 10:
|
||||
p += sprintf(p, "Invalid address");
|
||||
break;
|
||||
case 11:
|
||||
p += sprintf(p, "Mirror Broken");
|
||||
break;
|
||||
case 12:
|
||||
p += sprintf(p, "Memory Sparing");
|
||||
break;
|
||||
case 13:
|
||||
p += sprintf(p, "Scrub corrected error");
|
||||
break;
|
||||
case 14:
|
||||
p += sprintf(p, "Scrub uncorrected error");
|
||||
break;
|
||||
case 15:
|
||||
p += sprintf(p, "Physical Memory Map-out event");
|
||||
break;
|
||||
default:
|
||||
p += sprintf(p, "reserved error (%d)",
|
||||
mem_err->error_type);
|
||||
}
|
||||
p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype));
|
||||
} else {
|
||||
strcpy(pvt->msg, "unknown error");
|
||||
}
|
||||
@ -362,52 +340,19 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
|
||||
|
||||
/* Memory error location, mapped on e->location */
|
||||
p = e->location;
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
|
||||
p += sprintf(p, "node:%d ", mem_err->node);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
|
||||
p += sprintf(p, "card:%d ", mem_err->card);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
|
||||
p += sprintf(p, "module:%d ", mem_err->module);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
|
||||
p += sprintf(p, "rank:%d ", mem_err->rank);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
|
||||
p += sprintf(p, "bank:%d ", mem_err->bank);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_BANK_GROUP)
|
||||
p += sprintf(p, "bank_group:%d ",
|
||||
mem_err->bank >> CPER_MEM_BANK_GROUP_SHIFT);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
|
||||
p += sprintf(p, "bank_address:%d ",
|
||||
mem_err->bank & CPER_MEM_BANK_ADDRESS_MASK);
|
||||
if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
|
||||
u32 row = mem_err->row;
|
||||
cper_mem_err_pack(mem_err, &cmem);
|
||||
p += cper_mem_err_location(&cmem, p);
|
||||
|
||||
row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended);
|
||||
p += sprintf(p, "row:%d ", row);
|
||||
}
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
|
||||
p += sprintf(p, "col:%d ", mem_err->column);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
|
||||
p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
|
||||
const char *bank = NULL, *device = NULL;
|
||||
struct dimm_info *dimm;
|
||||
|
||||
dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
|
||||
if (bank != NULL && device != NULL)
|
||||
p += sprintf(p, "DIMM location:%s %s ", bank, device);
|
||||
else
|
||||
p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
|
||||
mem_err->mem_dev_handle);
|
||||
|
||||
p += cper_dimm_err_location(&cmem, p);
|
||||
dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle);
|
||||
if (dimm) {
|
||||
e->top_layer = dimm->idx;
|
||||
strcpy(e->label, dimm->label);
|
||||
}
|
||||
}
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_CHIP_ID)
|
||||
p += sprintf(p, "chipID: %d ",
|
||||
mem_err->extended >> CPER_MEM_CHIP_ID_SHIFT);
|
||||
if (p > e->location)
|
||||
*(p - 1) = '\0';
|
||||
|
||||
@ -416,78 +361,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
|
||||
|
||||
/* All other fields are mapped on e->other_detail */
|
||||
p = pvt->other_detail;
|
||||
p += snprintf(p, sizeof(pvt->other_detail),
|
||||
"APEI location: %s ", e->location);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
|
||||
u64 status = mem_err->error_status;
|
||||
|
||||
p += sprintf(p, "status(0x%016llx): ", (long long)status);
|
||||
switch ((status >> 8) & 0xff) {
|
||||
case 1:
|
||||
p += sprintf(p, "Error detected internal to the component ");
|
||||
break;
|
||||
case 16:
|
||||
p += sprintf(p, "Error detected in the bus ");
|
||||
break;
|
||||
case 4:
|
||||
p += sprintf(p, "Storage error in DRAM memory ");
|
||||
break;
|
||||
case 5:
|
||||
p += sprintf(p, "Storage error in TLB ");
|
||||
break;
|
||||
case 6:
|
||||
p += sprintf(p, "Storage error in cache ");
|
||||
break;
|
||||
case 7:
|
||||
p += sprintf(p, "Error in one or more functional units ");
|
||||
break;
|
||||
case 8:
|
||||
p += sprintf(p, "component failed self test ");
|
||||
break;
|
||||
case 9:
|
||||
p += sprintf(p, "Overflow or undervalue of internal queue ");
|
||||
break;
|
||||
case 17:
|
||||
p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
|
||||
break;
|
||||
case 18:
|
||||
p += sprintf(p, "Improper access error ");
|
||||
break;
|
||||
case 19:
|
||||
p += sprintf(p, "Access to a memory address which is not mapped to any component ");
|
||||
break;
|
||||
case 20:
|
||||
p += sprintf(p, "Loss of Lockstep ");
|
||||
break;
|
||||
case 21:
|
||||
p += sprintf(p, "Response not associated with a request ");
|
||||
break;
|
||||
case 22:
|
||||
p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
|
||||
break;
|
||||
case 23:
|
||||
p += sprintf(p, "Detection of a PATH_ERROR ");
|
||||
break;
|
||||
case 25:
|
||||
p += sprintf(p, "Bus operation timeout ");
|
||||
break;
|
||||
case 26:
|
||||
p += sprintf(p, "A read was issued to data that has been poisoned ");
|
||||
break;
|
||||
default:
|
||||
p += sprintf(p, "reserved ");
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
|
||||
p += sprintf(p, "requestorID: 0x%016llx ",
|
||||
(long long)mem_err->requestor_id);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
|
||||
p += sprintf(p, "responderID: 0x%016llx ",
|
||||
(long long)mem_err->responder_id);
|
||||
if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
|
||||
p += sprintf(p, "targetID: 0x%016llx ",
|
||||
(long long)mem_err->responder_id);
|
||||
p += print_mem_error_other_detail(mem_err, p, e->location, OTHER_DETAIL_LEN);
|
||||
if (p > pvt->other_detail)
|
||||
*(p - 1) = '\0';
|
||||
|
||||
|
@ -237,7 +237,7 @@ const char *cper_mem_err_status_str(u64 status)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cper_mem_err_status_str);
|
||||
|
||||
static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
|
||||
int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
|
||||
{
|
||||
u32 len, n;
|
||||
|
||||
@ -291,7 +291,7 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
|
||||
return n;
|
||||
}
|
||||
|
||||
static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
|
||||
int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
|
||||
{
|
||||
u32 len, n;
|
||||
const char *bank = NULL, *device = NULL;
|
||||
|
@ -569,5 +569,7 @@ void cper_print_proc_arm(const char *pfx,
|
||||
const struct cper_sec_proc_arm *proc);
|
||||
void cper_print_proc_ia(const char *pfx,
|
||||
const struct cper_sec_proc_ia *proc);
|
||||
int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg);
|
||||
int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg);
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user