mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-07 21:53:44 +00:00
Merge branch 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
* 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac: (21 commits) MAINTAINERS: add an entry for Edac Sandy Bridge driver edac: tag sb_edac as EXPERIMENTAL, as it requires more testing EDAC: Fix incorrect edac mode reporting in sb_edac edac: sb_edac: Add it to the building system edac: Add an experimental new driver to support Sandy Bridge CPU's i7300_edac: Fix error cleanup logic i7core_edac: Initialize memory name with cpu, channel, bank i7core_edac: Fix compilation on 32 bits arch i7core_edac: scrubbing fixups EDAC: Correct Kconfig dependencies i7core_edac: return -ENODEV if no MC is found i7core_edac: use edac's own way to print errors MAINTAINERS: remove dropped edac_mce.* from the file i7core_edac: Drop the edac_mce facility x86, MCE: Use notifier chain only for MCE decoding EDAC i7core: Use mce socketid for better compatibility i7core_edac: Don't enable memory scrubbing for Xeon 35xx i7core_edac: Add scrubbing support edac: Move edac main structs to include/linux/edac.h i7core_edac: Fix oops when trying to inject errors ...
This commit is contained in:
commit
6681ba7ec4
@ -2467,8 +2467,6 @@ L: linux-edac@vger.kernel.org
|
||||
W: bluesmoke.sourceforge.net
|
||||
S: Maintained
|
||||
F: drivers/edac/i7core_edac.c
|
||||
F: drivers/edac/edac_mce.c
|
||||
F: include/linux/edac_mce.h
|
||||
|
||||
EDAC-I82975X
|
||||
M: Ranganathan Desikan <ravi@jetztechnologies.com>
|
||||
@ -2492,6 +2490,13 @@ W: bluesmoke.sourceforge.net
|
||||
S: Maintained
|
||||
F: drivers/edac/r82600_edac.c
|
||||
|
||||
EDAC-SBRIDGE
|
||||
M: Mauro Carvalho Chehab <mchehab@redhat.com>
|
||||
L: linux-edac@vger.kernel.org
|
||||
W: bluesmoke.sourceforge.net
|
||||
S: Maintained
|
||||
F: drivers/edac/sb_edac.c
|
||||
|
||||
EDIROL UA-101/UA-1000 DRIVER
|
||||
M: Clemens Ladisch <clemens@ladisch.de>
|
||||
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
|
||||
|
@ -36,7 +36,6 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/edac_mce.h>
|
||||
#include <linux/irq_work.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
@ -144,23 +143,20 @@ static struct mce_log mcelog = {
|
||||
void mce_log(struct mce *mce)
|
||||
{
|
||||
unsigned next, entry;
|
||||
int ret = 0;
|
||||
|
||||
/* Emit the trace record: */
|
||||
trace_mce_record(mce);
|
||||
|
||||
ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
|
||||
if (ret == NOTIFY_STOP)
|
||||
return;
|
||||
|
||||
mce->finished = 0;
|
||||
wmb();
|
||||
for (;;) {
|
||||
entry = rcu_dereference_check_mce(mcelog.next);
|
||||
for (;;) {
|
||||
/*
|
||||
* If edac_mce is enabled, it will check the error type
|
||||
* and will process it, if it is a known error.
|
||||
* Otherwise, the error will be sent through mcelog
|
||||
* interface
|
||||
*/
|
||||
if (edac_mce_parse(mce))
|
||||
return;
|
||||
|
||||
/*
|
||||
* When the buffer fills up discard new entries.
|
||||
@ -556,10 +552,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
||||
* Don't get the IP here because it's unlikely to
|
||||
* have anything to do with the actual error location.
|
||||
*/
|
||||
if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
|
||||
if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
|
||||
mce_log(&m);
|
||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear state for this bank.
|
||||
|
@ -41,7 +41,7 @@ config EDAC_DEBUG
|
||||
|
||||
config EDAC_DECODE_MCE
|
||||
tristate "Decode MCEs in human-readable form (only on AMD for now)"
|
||||
depends on CPU_SUP_AMD && X86_MCE
|
||||
depends on CPU_SUP_AMD && X86_MCE_AMD
|
||||
default y
|
||||
---help---
|
||||
Enable this option if you want to decode Machine Check Exceptions
|
||||
@ -71,9 +71,6 @@ config EDAC_MM_EDAC
|
||||
occurred so that a particular failing memory module can be
|
||||
replaced. If unsure, select 'Y'.
|
||||
|
||||
config EDAC_MCE
|
||||
bool
|
||||
|
||||
config EDAC_AMD64
|
||||
tristate "AMD64 (Opteron, Athlon64) K8, F10h"
|
||||
depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
|
||||
@ -173,8 +170,7 @@ config EDAC_I5400
|
||||
|
||||
config EDAC_I7CORE
|
||||
tristate "Intel i7 Core (Nehalem) processors"
|
||||
depends on EDAC_MM_EDAC && PCI && X86
|
||||
select EDAC_MCE
|
||||
depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
|
||||
help
|
||||
Support for error detection and correction the Intel
|
||||
i7 Core (Nehalem) Integrated Memory Controller that exists on
|
||||
@ -216,6 +212,14 @@ config EDAC_I7300
|
||||
Support for error detection and correction the Intel
|
||||
Clarksboro MCH (Intel 7300 chipset).
|
||||
|
||||
config EDAC_SBRIDGE
|
||||
tristate "Intel Sandy-Bridge Integrated MC"
|
||||
depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
|
||||
depends on EXPERIMENTAL
|
||||
help
|
||||
Support for error detection and correction the Intel
|
||||
Sandy Bridge Integrated Memory Controller.
|
||||
|
||||
config EDAC_MPC85XX
|
||||
tristate "Freescale MPC83xx / MPC85xx"
|
||||
depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
obj-$(CONFIG_EDAC) := edac_stub.o
|
||||
obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
|
||||
obj-$(CONFIG_EDAC_MCE) += edac_mce.o
|
||||
|
||||
edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
|
||||
edac_core-y += edac_module.o edac_device_sysfs.o
|
||||
@ -29,6 +28,7 @@ obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
|
||||
obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
|
||||
obj-$(CONFIG_EDAC_I7300) += i7300_edac.o
|
||||
obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o
|
||||
obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o
|
||||
obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
|
||||
obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
|
||||
obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
|
||||
|
@ -34,11 +34,10 @@
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/edac.h>
|
||||
|
||||
#define EDAC_MC_LABEL_LEN 31
|
||||
#define EDAC_DEVICE_NAME_LEN 31
|
||||
#define EDAC_ATTRIB_VALUE_LEN 15
|
||||
#define MC_PROC_NAME_MAX_LEN 7
|
||||
|
||||
#if PAGE_SHIFT < 20
|
||||
#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
|
||||
@ -101,353 +100,6 @@ extern int edac_debug_level;
|
||||
|
||||
#define edac_dev_name(dev) (dev)->dev_name
|
||||
|
||||
/* memory devices */
|
||||
enum dev_type {
|
||||
DEV_UNKNOWN = 0,
|
||||
DEV_X1,
|
||||
DEV_X2,
|
||||
DEV_X4,
|
||||
DEV_X8,
|
||||
DEV_X16,
|
||||
DEV_X32, /* Do these parts exist? */
|
||||
DEV_X64 /* Do these parts exist? */
|
||||
};
|
||||
|
||||
#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN)
|
||||
#define DEV_FLAG_X1 BIT(DEV_X1)
|
||||
#define DEV_FLAG_X2 BIT(DEV_X2)
|
||||
#define DEV_FLAG_X4 BIT(DEV_X4)
|
||||
#define DEV_FLAG_X8 BIT(DEV_X8)
|
||||
#define DEV_FLAG_X16 BIT(DEV_X16)
|
||||
#define DEV_FLAG_X32 BIT(DEV_X32)
|
||||
#define DEV_FLAG_X64 BIT(DEV_X64)
|
||||
|
||||
/* memory types */
|
||||
enum mem_type {
|
||||
MEM_EMPTY = 0, /* Empty csrow */
|
||||
MEM_RESERVED, /* Reserved csrow type */
|
||||
MEM_UNKNOWN, /* Unknown csrow type */
|
||||
MEM_FPM, /* Fast page mode */
|
||||
MEM_EDO, /* Extended data out */
|
||||
MEM_BEDO, /* Burst Extended data out */
|
||||
MEM_SDR, /* Single data rate SDRAM */
|
||||
MEM_RDR, /* Registered single data rate SDRAM */
|
||||
MEM_DDR, /* Double data rate SDRAM */
|
||||
MEM_RDDR, /* Registered Double data rate SDRAM */
|
||||
MEM_RMBS, /* Rambus DRAM */
|
||||
MEM_DDR2, /* DDR2 RAM */
|
||||
MEM_FB_DDR2, /* fully buffered DDR2 */
|
||||
MEM_RDDR2, /* Registered DDR2 RAM */
|
||||
MEM_XDR, /* Rambus XDR */
|
||||
MEM_DDR3, /* DDR3 RAM */
|
||||
MEM_RDDR3, /* Registered DDR3 RAM */
|
||||
};
|
||||
|
||||
#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
|
||||
#define MEM_FLAG_RESERVED BIT(MEM_RESERVED)
|
||||
#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN)
|
||||
#define MEM_FLAG_FPM BIT(MEM_FPM)
|
||||
#define MEM_FLAG_EDO BIT(MEM_EDO)
|
||||
#define MEM_FLAG_BEDO BIT(MEM_BEDO)
|
||||
#define MEM_FLAG_SDR BIT(MEM_SDR)
|
||||
#define MEM_FLAG_RDR BIT(MEM_RDR)
|
||||
#define MEM_FLAG_DDR BIT(MEM_DDR)
|
||||
#define MEM_FLAG_RDDR BIT(MEM_RDDR)
|
||||
#define MEM_FLAG_RMBS BIT(MEM_RMBS)
|
||||
#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
|
||||
#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
|
||||
#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
|
||||
#define MEM_FLAG_XDR BIT(MEM_XDR)
|
||||
#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
|
||||
#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
|
||||
|
||||
/* chipset Error Detection and Correction capabilities and mode */
|
||||
enum edac_type {
|
||||
EDAC_UNKNOWN = 0, /* Unknown if ECC is available */
|
||||
EDAC_NONE, /* Doesn't support ECC */
|
||||
EDAC_RESERVED, /* Reserved ECC type */
|
||||
EDAC_PARITY, /* Detects parity errors */
|
||||
EDAC_EC, /* Error Checking - no correction */
|
||||
EDAC_SECDED, /* Single bit error correction, Double detection */
|
||||
EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */
|
||||
EDAC_S4ECD4ED, /* Chipkill x4 devices */
|
||||
EDAC_S8ECD8ED, /* Chipkill x8 devices */
|
||||
EDAC_S16ECD16ED, /* Chipkill x16 devices */
|
||||
};
|
||||
|
||||
#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN)
|
||||
#define EDAC_FLAG_NONE BIT(EDAC_NONE)
|
||||
#define EDAC_FLAG_PARITY BIT(EDAC_PARITY)
|
||||
#define EDAC_FLAG_EC BIT(EDAC_EC)
|
||||
#define EDAC_FLAG_SECDED BIT(EDAC_SECDED)
|
||||
#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED)
|
||||
#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED)
|
||||
#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED)
|
||||
#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED)
|
||||
|
||||
/* scrubbing capabilities */
|
||||
enum scrub_type {
|
||||
SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */
|
||||
SCRUB_NONE, /* No scrubber */
|
||||
SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */
|
||||
SCRUB_SW_SRC, /* Software scrub only errors */
|
||||
SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */
|
||||
SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */
|
||||
SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */
|
||||
SCRUB_HW_SRC, /* Hardware scrub only errors */
|
||||
SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */
|
||||
SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */
|
||||
};
|
||||
|
||||
#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG)
|
||||
#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC)
|
||||
#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
|
||||
#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE)
|
||||
#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG)
|
||||
#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC)
|
||||
#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
|
||||
#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE)
|
||||
|
||||
/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
|
||||
|
||||
/* EDAC internal operation states */
|
||||
#define OP_ALLOC 0x100
|
||||
#define OP_RUNNING_POLL 0x201
|
||||
#define OP_RUNNING_INTERRUPT 0x202
|
||||
#define OP_RUNNING_POLL_INTR 0x203
|
||||
#define OP_OFFLINE 0x300
|
||||
|
||||
/*
|
||||
* There are several things to be aware of that aren't at all obvious:
|
||||
*
|
||||
*
|
||||
* SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
|
||||
*
|
||||
* These are some of the many terms that are thrown about that don't always
|
||||
* mean what people think they mean (Inconceivable!). In the interest of
|
||||
* creating a common ground for discussion, terms and their definitions
|
||||
* will be established.
|
||||
*
|
||||
* Memory devices: The individual chip on a memory stick. These devices
|
||||
* commonly output 4 and 8 bits each. Grouping several
|
||||
* of these in parallel provides 64 bits which is common
|
||||
* for a memory stick.
|
||||
*
|
||||
* Memory Stick: A printed circuit board that aggregates multiple
|
||||
* memory devices in parallel. This is the atomic
|
||||
* memory component that is purchaseable by Joe consumer
|
||||
* and loaded into a memory socket.
|
||||
*
|
||||
* Socket: A physical connector on the motherboard that accepts
|
||||
* a single memory stick.
|
||||
*
|
||||
* Channel: Set of memory devices on a memory stick that must be
|
||||
* grouped in parallel with one or more additional
|
||||
* channels from other memory sticks. This parallel
|
||||
* grouping of the output from multiple channels are
|
||||
* necessary for the smallest granularity of memory access.
|
||||
* Some memory controllers are capable of single channel -
|
||||
* which means that memory sticks can be loaded
|
||||
* individually. Other memory controllers are only
|
||||
* capable of dual channel - which means that memory
|
||||
* sticks must be loaded as pairs (see "socket set").
|
||||
*
|
||||
* Chip-select row: All of the memory devices that are selected together.
|
||||
* for a single, minimum grain of memory access.
|
||||
* This selects all of the parallel memory devices across
|
||||
* all of the parallel channels. Common chip-select rows
|
||||
* for single channel are 64 bits, for dual channel 128
|
||||
* bits.
|
||||
*
|
||||
* Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory.
|
||||
* Motherboards commonly drive two chip-select pins to
|
||||
* a memory stick. A single-ranked stick, will occupy
|
||||
* only one of those rows. The other will be unused.
|
||||
*
|
||||
* Double-Ranked stick: A double-ranked stick has two chip-select rows which
|
||||
* access different sets of memory devices. The two
|
||||
* rows cannot be accessed concurrently.
|
||||
*
|
||||
* Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
|
||||
* A double-sided stick has two chip-select rows which
|
||||
* access different sets of memory devices. The two
|
||||
* rows cannot be accessed concurrently. "Double-sided"
|
||||
* is irrespective of the memory devices being mounted
|
||||
* on both sides of the memory stick.
|
||||
*
|
||||
* Socket set: All of the memory sticks that are required for
|
||||
* a single memory access or all of the memory sticks
|
||||
* spanned by a chip-select row. A single socket set
|
||||
* has two chip-select rows and if double-sided sticks
|
||||
* are used these will occupy those chip-select rows.
|
||||
*
|
||||
* Bank: This term is avoided because it is unclear when
|
||||
* needing to distinguish between chip-select rows and
|
||||
* socket sets.
|
||||
*
|
||||
* Controller pages:
|
||||
*
|
||||
* Physical pages:
|
||||
*
|
||||
* Virtual pages:
|
||||
*
|
||||
*
|
||||
* STRUCTURE ORGANIZATION AND CHOICES
|
||||
*
|
||||
*
|
||||
*
|
||||
* PS - I enjoyed writing all that about as much as you enjoyed reading it.
|
||||
*/
|
||||
|
||||
struct channel_info {
|
||||
int chan_idx; /* channel index */
|
||||
u32 ce_count; /* Correctable Errors for this CHANNEL */
|
||||
char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
|
||||
struct csrow_info *csrow; /* the parent */
|
||||
};
|
||||
|
||||
struct csrow_info {
|
||||
unsigned long first_page; /* first page number in dimm */
|
||||
unsigned long last_page; /* last page number in dimm */
|
||||
unsigned long page_mask; /* used for interleaving -
|
||||
* 0UL for non intlv
|
||||
*/
|
||||
u32 nr_pages; /* number of pages in csrow */
|
||||
u32 grain; /* granularity of reported error in bytes */
|
||||
int csrow_idx; /* the chip-select row */
|
||||
enum dev_type dtype; /* memory device type */
|
||||
u32 ue_count; /* Uncorrectable Errors for this csrow */
|
||||
u32 ce_count; /* Correctable Errors for this csrow */
|
||||
enum mem_type mtype; /* memory csrow type */
|
||||
enum edac_type edac_mode; /* EDAC mode for this csrow */
|
||||
struct mem_ctl_info *mci; /* the parent */
|
||||
|
||||
struct kobject kobj; /* sysfs kobject for this csrow */
|
||||
|
||||
/* channel information for this csrow */
|
||||
u32 nr_channels;
|
||||
struct channel_info *channels;
|
||||
};
|
||||
|
||||
struct mcidev_sysfs_group {
|
||||
const char *name; /* group name */
|
||||
const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */
|
||||
};
|
||||
|
||||
struct mcidev_sysfs_group_kobj {
|
||||
struct list_head list; /* list for all instances within a mc */
|
||||
|
||||
struct kobject kobj; /* kobj for the group */
|
||||
|
||||
const struct mcidev_sysfs_group *grp; /* group description table */
|
||||
struct mem_ctl_info *mci; /* the parent */
|
||||
};
|
||||
|
||||
/* mcidev_sysfs_attribute structure
|
||||
* used for driver sysfs attributes and in mem_ctl_info
|
||||
* sysfs top level entries
|
||||
*/
|
||||
struct mcidev_sysfs_attribute {
|
||||
/* It should use either attr or grp */
|
||||
struct attribute attr;
|
||||
const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */
|
||||
|
||||
/* Ops for show/store values at the attribute - not used on group */
|
||||
ssize_t (*show)(struct mem_ctl_info *,char *);
|
||||
ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
|
||||
};
|
||||
|
||||
/* MEMORY controller information structure
|
||||
*/
|
||||
struct mem_ctl_info {
|
||||
struct list_head link; /* for global list of mem_ctl_info structs */
|
||||
|
||||
struct module *owner; /* Module owner of this control struct */
|
||||
|
||||
unsigned long mtype_cap; /* memory types supported by mc */
|
||||
unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */
|
||||
unsigned long edac_cap; /* configuration capabilities - this is
|
||||
* closely related to edac_ctl_cap. The
|
||||
* difference is that the controller may be
|
||||
* capable of s4ecd4ed which would be listed
|
||||
* in edac_ctl_cap, but if channels aren't
|
||||
* capable of s4ecd4ed then the edac_cap would
|
||||
* not have that capability.
|
||||
*/
|
||||
unsigned long scrub_cap; /* chipset scrub capabilities */
|
||||
enum scrub_type scrub_mode; /* current scrub mode */
|
||||
|
||||
/* Translates sdram memory scrub rate given in bytes/sec to the
|
||||
internal representation and configures whatever else needs
|
||||
to be configured.
|
||||
*/
|
||||
int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
|
||||
|
||||
/* Get the current sdram memory scrub rate from the internal
|
||||
representation and converts it to the closest matching
|
||||
bandwidth in bytes/sec.
|
||||
*/
|
||||
int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
|
||||
|
||||
|
||||
/* pointer to edac checking routine */
|
||||
void (*edac_check) (struct mem_ctl_info * mci);
|
||||
|
||||
/*
|
||||
* Remaps memory pages: controller pages to physical pages.
|
||||
* For most MC's, this will be NULL.
|
||||
*/
|
||||
/* FIXME - why not send the phys page to begin with? */
|
||||
unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
|
||||
unsigned long page);
|
||||
int mc_idx;
|
||||
int nr_csrows;
|
||||
struct csrow_info *csrows;
|
||||
/*
|
||||
* FIXME - what about controllers on other busses? - IDs must be
|
||||
* unique. dev pointer should be sufficiently unique, but
|
||||
* BUS:SLOT.FUNC numbers may not be unique.
|
||||
*/
|
||||
struct device *dev;
|
||||
const char *mod_name;
|
||||
const char *mod_ver;
|
||||
const char *ctl_name;
|
||||
const char *dev_name;
|
||||
char proc_name[MC_PROC_NAME_MAX_LEN + 1];
|
||||
void *pvt_info;
|
||||
u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
|
||||
u32 ce_noinfo_count; /* Correctable Errors w/o info */
|
||||
u32 ue_count; /* Total Uncorrectable Errors for this MC */
|
||||
u32 ce_count; /* Total Correctable Errors for this MC */
|
||||
unsigned long start_time; /* mci load start time (in jiffies) */
|
||||
|
||||
struct completion complete;
|
||||
|
||||
/* edac sysfs device control */
|
||||
struct kobject edac_mci_kobj;
|
||||
|
||||
/* list for all grp instances within a mc */
|
||||
struct list_head grp_kobj_list;
|
||||
|
||||
/* Additional top controller level attributes, but specified
|
||||
* by the low level driver.
|
||||
*
|
||||
* Set by the low level driver to provide attributes at the
|
||||
* controller level, same level as 'ue_count' and 'ce_count' above.
|
||||
* An array of structures, NULL terminated
|
||||
*
|
||||
* If attributes are desired, then set to array of attributes
|
||||
* If no attributes are desired, leave NULL
|
||||
*/
|
||||
const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
|
||||
|
||||
/* work struct for this MC */
|
||||
struct delayed_work work;
|
||||
|
||||
/* the internal state of this controller instance */
|
||||
int op_state;
|
||||
};
|
||||
|
||||
/*
|
||||
* The following are the structures to provide for a generic
|
||||
* or abstract 'edac_device'. This set of structures and the
|
||||
|
@ -1,61 +0,0 @@
|
||||
/* Provides edac interface to mcelog events
|
||||
*
|
||||
* This file may be distributed under the terms of the
|
||||
* GNU General Public License version 2.
|
||||
*
|
||||
* Copyright (c) 2009 by:
|
||||
* Mauro Carvalho Chehab <mchehab@redhat.com>
|
||||
*
|
||||
* Red Hat Inc. http://www.redhat.com
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/edac_mce.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
int edac_mce_enabled;
|
||||
EXPORT_SYMBOL_GPL(edac_mce_enabled);
|
||||
|
||||
|
||||
/*
|
||||
* Extension interface
|
||||
*/
|
||||
|
||||
static LIST_HEAD(edac_mce_list);
|
||||
static DEFINE_MUTEX(edac_mce_lock);
|
||||
|
||||
int edac_mce_register(struct edac_mce *edac_mce)
|
||||
{
|
||||
mutex_lock(&edac_mce_lock);
|
||||
list_add_tail(&edac_mce->list, &edac_mce_list);
|
||||
mutex_unlock(&edac_mce_lock);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(edac_mce_register);
|
||||
|
||||
void edac_mce_unregister(struct edac_mce *edac_mce)
|
||||
{
|
||||
mutex_lock(&edac_mce_lock);
|
||||
list_del(&edac_mce->list);
|
||||
mutex_unlock(&edac_mce_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(edac_mce_unregister);
|
||||
|
||||
int edac_mce_parse(struct mce *mce)
|
||||
{
|
||||
struct edac_mce *edac_mce;
|
||||
|
||||
list_for_each_entry(edac_mce, &edac_mce_list, list) {
|
||||
if (edac_mce->check_error(edac_mce->priv, mce))
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Nobody queued the error */
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(edac_mce_parse);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
|
||||
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
|
||||
MODULE_DESCRIPTION("EDAC Driver for mcelog captured errors");
|
@ -372,7 +372,7 @@ static const char *get_err_from_table(const char *table[], int size, int pos)
|
||||
static void i7300_process_error_global(struct mem_ctl_info *mci)
|
||||
{
|
||||
struct i7300_pvt *pvt;
|
||||
u32 errnum, value;
|
||||
u32 errnum, error_reg;
|
||||
unsigned long errors;
|
||||
const char *specific;
|
||||
bool is_fatal;
|
||||
@ -381,9 +381,9 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
|
||||
|
||||
/* read in the 1st FATAL error register */
|
||||
pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
|
||||
FERR_GLOBAL_HI, &value);
|
||||
if (unlikely(value)) {
|
||||
errors = value;
|
||||
FERR_GLOBAL_HI, &error_reg);
|
||||
if (unlikely(error_reg)) {
|
||||
errors = error_reg;
|
||||
errnum = find_first_bit(&errors,
|
||||
ARRAY_SIZE(ferr_global_hi_name));
|
||||
specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum);
|
||||
@ -391,15 +391,15 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
|
||||
|
||||
/* Clear the error bit */
|
||||
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
|
||||
FERR_GLOBAL_HI, value);
|
||||
FERR_GLOBAL_HI, error_reg);
|
||||
|
||||
goto error_global;
|
||||
}
|
||||
|
||||
pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
|
||||
FERR_GLOBAL_LO, &value);
|
||||
if (unlikely(value)) {
|
||||
errors = value;
|
||||
FERR_GLOBAL_LO, &error_reg);
|
||||
if (unlikely(error_reg)) {
|
||||
errors = error_reg;
|
||||
errnum = find_first_bit(&errors,
|
||||
ARRAY_SIZE(ferr_global_lo_name));
|
||||
specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum);
|
||||
@ -407,7 +407,7 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
|
||||
|
||||
/* Clear the error bit */
|
||||
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
|
||||
FERR_GLOBAL_LO, value);
|
||||
FERR_GLOBAL_LO, error_reg);
|
||||
|
||||
goto error_global;
|
||||
}
|
||||
@ -427,7 +427,7 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
|
||||
static void i7300_process_fbd_error(struct mem_ctl_info *mci)
|
||||
{
|
||||
struct i7300_pvt *pvt;
|
||||
u32 errnum, value;
|
||||
u32 errnum, value, error_reg;
|
||||
u16 val16;
|
||||
unsigned branch, channel, bank, rank, cas, ras;
|
||||
u32 syndrome;
|
||||
@ -440,14 +440,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
|
||||
|
||||
/* read in the 1st FATAL error register */
|
||||
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
|
||||
FERR_FAT_FBD, &value);
|
||||
if (unlikely(value & FERR_FAT_FBD_ERR_MASK)) {
|
||||
errors = value & FERR_FAT_FBD_ERR_MASK ;
|
||||
FERR_FAT_FBD, &error_reg);
|
||||
if (unlikely(error_reg & FERR_FAT_FBD_ERR_MASK)) {
|
||||
errors = error_reg & FERR_FAT_FBD_ERR_MASK ;
|
||||
errnum = find_first_bit(&errors,
|
||||
ARRAY_SIZE(ferr_fat_fbd_name));
|
||||
specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum);
|
||||
branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
|
||||
|
||||
branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
|
||||
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
|
||||
NRECMEMA, &val16);
|
||||
bank = NRECMEMA_BANK(val16);
|
||||
@ -455,11 +455,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
|
||||
|
||||
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
|
||||
NRECMEMB, &value);
|
||||
|
||||
is_wr = NRECMEMB_IS_WR(value);
|
||||
cas = NRECMEMB_CAS(value);
|
||||
ras = NRECMEMB_RAS(value);
|
||||
|
||||
/* Clean the error register */
|
||||
pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
|
||||
FERR_FAT_FBD, error_reg);
|
||||
|
||||
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
|
||||
"FATAL (Branch=%d DRAM-Bank=%d %s "
|
||||
"RAS=%d CAS=%d Err=0x%lx (%s))",
|
||||
@ -476,21 +479,17 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
|
||||
|
||||
/* read in the 1st NON-FATAL error register */
|
||||
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
|
||||
FERR_NF_FBD, &value);
|
||||
if (unlikely(value & FERR_NF_FBD_ERR_MASK)) {
|
||||
errors = value & FERR_NF_FBD_ERR_MASK;
|
||||
FERR_NF_FBD, &error_reg);
|
||||
if (unlikely(error_reg & FERR_NF_FBD_ERR_MASK)) {
|
||||
errors = error_reg & FERR_NF_FBD_ERR_MASK;
|
||||
errnum = find_first_bit(&errors,
|
||||
ARRAY_SIZE(ferr_nf_fbd_name));
|
||||
specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum);
|
||||
|
||||
/* Clear the error bit */
|
||||
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
|
||||
FERR_GLOBAL_LO, value);
|
||||
branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
|
||||
|
||||
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
|
||||
REDMEMA, &syndrome);
|
||||
|
||||
branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
|
||||
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
|
||||
RECMEMA, &val16);
|
||||
bank = RECMEMA_BANK(val16);
|
||||
@ -498,18 +497,20 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
|
||||
|
||||
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
|
||||
RECMEMB, &value);
|
||||
|
||||
is_wr = RECMEMB_IS_WR(value);
|
||||
cas = RECMEMB_CAS(value);
|
||||
ras = RECMEMB_RAS(value);
|
||||
|
||||
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
|
||||
REDMEMB, &value);
|
||||
|
||||
channel = (branch << 1);
|
||||
if (IS_SECOND_CH(value))
|
||||
channel++;
|
||||
|
||||
/* Clear the error bit */
|
||||
pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
|
||||
FERR_NF_FBD, error_reg);
|
||||
|
||||
/* Form out message */
|
||||
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
|
||||
"Corrected error (Branch=%d, Channel %d), "
|
||||
|
@ -31,11 +31,13 @@
|
||||
#include <linux/pci_ids.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/edac.h>
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/edac_mce.h>
|
||||
#include <linux/smp.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/div64.h>
|
||||
|
||||
#include "edac_core.h"
|
||||
|
||||
@ -78,6 +80,8 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
|
||||
/* OFFSETS for Device 0 Function 0 */
|
||||
|
||||
#define MC_CFG_CONTROL 0x90
|
||||
#define MC_CFG_UNLOCK 0x02
|
||||
#define MC_CFG_LOCK 0x00
|
||||
|
||||
/* OFFSETS for Device 3 Function 0 */
|
||||
|
||||
@ -98,6 +102,15 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
|
||||
#define DIMM0_COR_ERR(r) ((r) & 0x7fff)
|
||||
|
||||
/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
|
||||
#define MC_SSRCONTROL 0x48
|
||||
#define SSR_MODE_DISABLE 0x00
|
||||
#define SSR_MODE_ENABLE 0x01
|
||||
#define SSR_MODE_MASK 0x03
|
||||
|
||||
#define MC_SCRUB_CONTROL 0x4c
|
||||
#define STARTSCRUB (1 << 24)
|
||||
#define SCRUBINTERVAL_MASK 0xffffff
|
||||
|
||||
#define MC_COR_ECC_CNT_0 0x80
|
||||
#define MC_COR_ECC_CNT_1 0x84
|
||||
#define MC_COR_ECC_CNT_2 0x88
|
||||
@ -253,10 +266,7 @@ struct i7core_pvt {
|
||||
unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
|
||||
int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
|
||||
|
||||
unsigned int is_registered;
|
||||
|
||||
/* mcelog glue */
|
||||
struct edac_mce edac_mce;
|
||||
bool is_registered, enable_scrub;
|
||||
|
||||
/* Fifo double buffers */
|
||||
struct mce mce_entry[MCE_LOG_LEN];
|
||||
@ -268,6 +278,9 @@ struct i7core_pvt {
|
||||
/* Count indicator to show errors not got */
|
||||
unsigned mce_overrun;
|
||||
|
||||
/* DCLK Frequency used for computing scrub rate */
|
||||
int dclk_freq;
|
||||
|
||||
/* Struct to control EDAC polling */
|
||||
struct edac_pci_ctl_info *i7core_pci;
|
||||
};
|
||||
@ -281,8 +294,7 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
|
||||
/* Memory controller */
|
||||
{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
|
||||
{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
|
||||
|
||||
/* Exists only for RDIMM */
|
||||
/* Exists only for RDIMM */
|
||||
{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
|
||||
{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
|
||||
|
||||
@ -303,6 +315,16 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
|
||||
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
|
||||
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
|
||||
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
|
||||
|
||||
/* Generic Non-core registers */
|
||||
/*
|
||||
* This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
|
||||
* On Xeon 55xx, however, it has a different id (8086:2c40). So,
|
||||
* the probing code needs to test for the other address in case of
|
||||
* failure of this one
|
||||
*/
|
||||
{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
|
||||
|
||||
};
|
||||
|
||||
static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
|
||||
@ -319,6 +341,12 @@ static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
|
||||
{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
|
||||
{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
|
||||
{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
|
||||
|
||||
/*
|
||||
* This is the PCI device has an alternate address on some
|
||||
* processors like Core i7 860
|
||||
*/
|
||||
{ PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
|
||||
};
|
||||
|
||||
static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
|
||||
@ -346,6 +374,10 @@ static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
|
||||
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
|
||||
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
|
||||
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
|
||||
|
||||
/* Generic Non-core registers */
|
||||
{ PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) },
|
||||
|
||||
};
|
||||
|
||||
#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
|
||||
@ -714,6 +746,10 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
|
||||
|
||||
csr->edac_mode = mode;
|
||||
csr->mtype = mtype;
|
||||
snprintf(csr->channels[0].label,
|
||||
sizeof(csr->channels[0].label),
|
||||
"CPU#%uChannel#%u_DIMM#%u",
|
||||
pvt->i7core_dev->socket, i, j);
|
||||
|
||||
csrow++;
|
||||
}
|
||||
@ -731,7 +767,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
|
||||
debugf1("\t\t%#x\t%#x\t%#x\n",
|
||||
(value[j] >> 27) & 0x1,
|
||||
(value[j] >> 24) & 0x7,
|
||||
(value[j] && ((1 << 24) - 1)));
|
||||
(value[j] & ((1 << 24) - 1)));
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -1324,6 +1360,20 @@ static int i7core_get_onedevice(struct pci_dev **prev,
|
||||
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
|
||||
dev_descr->dev_id, *prev);
|
||||
|
||||
/*
|
||||
* On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
|
||||
* is at addr 8086:2c40, instead of 8086:2c41. So, we need
|
||||
* to probe for the alternate address in case of failure
|
||||
*/
|
||||
if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
|
||||
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
|
||||
PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
|
||||
|
||||
if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
|
||||
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
|
||||
PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
|
||||
*prev);
|
||||
|
||||
if (!pdev) {
|
||||
if (*prev) {
|
||||
*prev = pdev;
|
||||
@ -1444,8 +1494,10 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
|
||||
struct i7core_pvt *pvt = mci->pvt_info;
|
||||
struct pci_dev *pdev;
|
||||
int i, func, slot;
|
||||
char *family;
|
||||
|
||||
pvt->is_registered = 0;
|
||||
pvt->is_registered = false;
|
||||
pvt->enable_scrub = false;
|
||||
for (i = 0; i < i7core_dev->n_devs; i++) {
|
||||
pdev = i7core_dev->pdev[i];
|
||||
if (!pdev)
|
||||
@ -1461,9 +1513,37 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
|
||||
if (unlikely(func > MAX_CHAN_FUNC))
|
||||
goto error;
|
||||
pvt->pci_ch[slot - 4][func] = pdev;
|
||||
} else if (!slot && !func)
|
||||
} else if (!slot && !func) {
|
||||
pvt->pci_noncore = pdev;
|
||||
else
|
||||
|
||||
/* Detect the processor family */
|
||||
switch (pdev->device) {
|
||||
case PCI_DEVICE_ID_INTEL_I7_NONCORE:
|
||||
family = "Xeon 35xx/ i7core";
|
||||
pvt->enable_scrub = false;
|
||||
break;
|
||||
case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
|
||||
family = "i7-800/i5-700";
|
||||
pvt->enable_scrub = false;
|
||||
break;
|
||||
case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
|
||||
family = "Xeon 34xx";
|
||||
pvt->enable_scrub = false;
|
||||
break;
|
||||
case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
|
||||
family = "Xeon 55xx";
|
||||
pvt->enable_scrub = true;
|
||||
break;
|
||||
case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
|
||||
family = "Xeon 56xx / i7-900";
|
||||
pvt->enable_scrub = true;
|
||||
break;
|
||||
default:
|
||||
family = "unknown";
|
||||
pvt->enable_scrub = false;
|
||||
}
|
||||
debugf0("Detected a processor type %s\n", family);
|
||||
} else
|
||||
goto error;
|
||||
|
||||
debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
|
||||
@ -1472,7 +1552,7 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
|
||||
|
||||
if (PCI_SLOT(pdev->devfn) == 3 &&
|
||||
PCI_FUNC(pdev->devfn) == 2)
|
||||
pvt->is_registered = 1;
|
||||
pvt->is_registered = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -1826,33 +1906,43 @@ static void i7core_check_error(struct mem_ctl_info *mci)
|
||||
* WARNING: As this routine should be called at NMI time, extra care should
|
||||
* be taken to avoid deadlocks, and to be as fast as possible.
|
||||
*/
|
||||
static int i7core_mce_check_error(void *priv, struct mce *mce)
|
||||
static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
struct mem_ctl_info *mci = priv;
|
||||
struct i7core_pvt *pvt = mci->pvt_info;
|
||||
struct mce *mce = (struct mce *)data;
|
||||
struct i7core_dev *i7_dev;
|
||||
struct mem_ctl_info *mci;
|
||||
struct i7core_pvt *pvt;
|
||||
|
||||
i7_dev = get_i7core_dev(mce->socketid);
|
||||
if (!i7_dev)
|
||||
return NOTIFY_BAD;
|
||||
|
||||
mci = i7_dev->mci;
|
||||
pvt = mci->pvt_info;
|
||||
|
||||
/*
|
||||
* Just let mcelog handle it if the error is
|
||||
* outside the memory controller
|
||||
*/
|
||||
if (((mce->status & 0xffff) >> 7) != 1)
|
||||
return 0;
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/* Bank 8 registers are the only ones that we know how to handle */
|
||||
if (mce->bank != 8)
|
||||
return 0;
|
||||
return NOTIFY_DONE;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Only handle if it is the right mc controller */
|
||||
if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
|
||||
return 0;
|
||||
if (mce->socketid != pvt->i7core_dev->socket)
|
||||
return NOTIFY_DONE;
|
||||
#endif
|
||||
|
||||
smp_rmb();
|
||||
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
|
||||
smp_wmb();
|
||||
pvt->mce_overrun++;
|
||||
return 0;
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
/* Copy memory error at the ringbuffer */
|
||||
@ -1865,7 +1955,240 @@ static int i7core_mce_check_error(void *priv, struct mce *mce)
|
||||
i7core_check_error(mci);
|
||||
|
||||
/* Advise mcelog that the errors were handled */
|
||||
return 1;
|
||||
return NOTIFY_STOP;
|
||||
}
|
||||
|
||||
static struct notifier_block i7_mce_dec = {
|
||||
.notifier_call = i7core_mce_check_error,
|
||||
};
|
||||
|
||||
struct memdev_dmi_entry {
|
||||
u8 type;
|
||||
u8 length;
|
||||
u16 handle;
|
||||
u16 phys_mem_array_handle;
|
||||
u16 mem_err_info_handle;
|
||||
u16 total_width;
|
||||
u16 data_width;
|
||||
u16 size;
|
||||
u8 form;
|
||||
u8 device_set;
|
||||
u8 device_locator;
|
||||
u8 bank_locator;
|
||||
u8 memory_type;
|
||||
u16 type_detail;
|
||||
u16 speed;
|
||||
u8 manufacturer;
|
||||
u8 serial_number;
|
||||
u8 asset_tag;
|
||||
u8 part_number;
|
||||
u8 attributes;
|
||||
u32 extended_size;
|
||||
u16 conf_mem_clk_speed;
|
||||
} __attribute__((__packed__));
|
||||
|
||||
|
||||
/*
|
||||
* Decode the DRAM Clock Frequency, be paranoid, make sure that all
|
||||
* memory devices show the same speed, and if they don't then consider
|
||||
* all speeds to be invalid.
|
||||
*/
|
||||
static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
|
||||
{
|
||||
int *dclk_freq = _dclk_freq;
|
||||
u16 dmi_mem_clk_speed;
|
||||
|
||||
if (*dclk_freq == -1)
|
||||
return;
|
||||
|
||||
if (dh->type == DMI_ENTRY_MEM_DEVICE) {
|
||||
struct memdev_dmi_entry *memdev_dmi_entry =
|
||||
(struct memdev_dmi_entry *)dh;
|
||||
unsigned long conf_mem_clk_speed_offset =
|
||||
(unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
|
||||
(unsigned long)&memdev_dmi_entry->type;
|
||||
unsigned long speed_offset =
|
||||
(unsigned long)&memdev_dmi_entry->speed -
|
||||
(unsigned long)&memdev_dmi_entry->type;
|
||||
|
||||
/* Check that a DIMM is present */
|
||||
if (memdev_dmi_entry->size == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Pick the configured speed if it's available, otherwise
|
||||
* pick the DIMM speed, or we don't have a speed.
|
||||
*/
|
||||
if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
|
||||
dmi_mem_clk_speed =
|
||||
memdev_dmi_entry->conf_mem_clk_speed;
|
||||
} else if (memdev_dmi_entry->length > speed_offset) {
|
||||
dmi_mem_clk_speed = memdev_dmi_entry->speed;
|
||||
} else {
|
||||
*dclk_freq = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
if (*dclk_freq == 0) {
|
||||
/* First pass, speed was 0 */
|
||||
if (dmi_mem_clk_speed > 0) {
|
||||
/* Set speed if a valid speed is read */
|
||||
*dclk_freq = dmi_mem_clk_speed;
|
||||
} else {
|
||||
/* Otherwise we don't have a valid speed */
|
||||
*dclk_freq = -1;
|
||||
}
|
||||
} else if (*dclk_freq > 0 &&
|
||||
*dclk_freq != dmi_mem_clk_speed) {
|
||||
/*
|
||||
* If we have a speed, check that all DIMMS are the same
|
||||
* speed, otherwise set the speed as invalid.
|
||||
*/
|
||||
*dclk_freq = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The default DCLK frequency is used as a fallback if we
|
||||
* fail to find anything reliable in the DMI. The value
|
||||
* is taken straight from the datasheet.
|
||||
*/
|
||||
#define DEFAULT_DCLK_FREQ 800
|
||||
|
||||
static int get_dclk_freq(void)
|
||||
{
|
||||
int dclk_freq = 0;
|
||||
|
||||
dmi_walk(decode_dclk, (void *)&dclk_freq);
|
||||
|
||||
if (dclk_freq < 1)
|
||||
return DEFAULT_DCLK_FREQ;
|
||||
|
||||
return dclk_freq;
|
||||
}
|
||||
|
||||
/*
|
||||
* set_sdram_scrub_rate This routine sets byte/sec bandwidth scrub rate
|
||||
* to hardware according to SCRUBINTERVAL formula
|
||||
* found in datasheet.
|
||||
*/
|
||||
static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
|
||||
{
|
||||
struct i7core_pvt *pvt = mci->pvt_info;
|
||||
struct pci_dev *pdev;
|
||||
u32 dw_scrub;
|
||||
u32 dw_ssr;
|
||||
|
||||
/* Get data from the MC register, function 2 */
|
||||
pdev = pvt->pci_mcr[2];
|
||||
if (!pdev)
|
||||
return -ENODEV;
|
||||
|
||||
pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
|
||||
|
||||
if (new_bw == 0) {
|
||||
/* Prepare to disable petrol scrub */
|
||||
dw_scrub &= ~STARTSCRUB;
|
||||
/* Stop the patrol scrub engine */
|
||||
write_and_test(pdev, MC_SCRUB_CONTROL,
|
||||
dw_scrub & ~SCRUBINTERVAL_MASK);
|
||||
|
||||
/* Get current status of scrub rate and set bit to disable */
|
||||
pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
|
||||
dw_ssr &= ~SSR_MODE_MASK;
|
||||
dw_ssr |= SSR_MODE_DISABLE;
|
||||
} else {
|
||||
const int cache_line_size = 64;
|
||||
const u32 freq_dclk_mhz = pvt->dclk_freq;
|
||||
unsigned long long scrub_interval;
|
||||
/*
|
||||
* Translate the desired scrub rate to a register value and
|
||||
* program the corresponding register value.
|
||||
*/
|
||||
scrub_interval = (unsigned long long)freq_dclk_mhz *
|
||||
cache_line_size * 1000000;
|
||||
do_div(scrub_interval, new_bw);
|
||||
|
||||
if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
|
||||
|
||||
/* Start the patrol scrub engine */
|
||||
pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
|
||||
STARTSCRUB | dw_scrub);
|
||||
|
||||
/* Get current status of scrub rate and set bit to enable */
|
||||
pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
|
||||
dw_ssr &= ~SSR_MODE_MASK;
|
||||
dw_ssr |= SSR_MODE_ENABLE;
|
||||
}
|
||||
/* Disable or enable scrubbing */
|
||||
pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
|
||||
|
||||
return new_bw;
|
||||
}
|
||||
|
||||
/*
|
||||
* get_sdram_scrub_rate This routine convert current scrub rate value
|
||||
* into byte/sec bandwidth accourding to
|
||||
* SCRUBINTERVAL formula found in datasheet.
|
||||
*/
|
||||
static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
|
||||
{
|
||||
struct i7core_pvt *pvt = mci->pvt_info;
|
||||
struct pci_dev *pdev;
|
||||
const u32 cache_line_size = 64;
|
||||
const u32 freq_dclk_mhz = pvt->dclk_freq;
|
||||
unsigned long long scrub_rate;
|
||||
u32 scrubval;
|
||||
|
||||
/* Get data from the MC register, function 2 */
|
||||
pdev = pvt->pci_mcr[2];
|
||||
if (!pdev)
|
||||
return -ENODEV;
|
||||
|
||||
/* Get current scrub control data */
|
||||
pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
|
||||
|
||||
/* Mask highest 8-bits to 0 */
|
||||
scrubval &= SCRUBINTERVAL_MASK;
|
||||
if (!scrubval)
|
||||
return 0;
|
||||
|
||||
/* Calculate scrub rate value into byte/sec bandwidth */
|
||||
scrub_rate = (unsigned long long)freq_dclk_mhz *
|
||||
1000000 * cache_line_size;
|
||||
do_div(scrub_rate, scrubval);
|
||||
return (int)scrub_rate;
|
||||
}
|
||||
|
||||
static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
|
||||
{
|
||||
struct i7core_pvt *pvt = mci->pvt_info;
|
||||
u32 pci_lock;
|
||||
|
||||
/* Unlock writes to pci registers */
|
||||
pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
|
||||
pci_lock &= ~0x3;
|
||||
pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
|
||||
pci_lock | MC_CFG_UNLOCK);
|
||||
|
||||
mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
|
||||
mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
|
||||
}
|
||||
|
||||
static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
|
||||
{
|
||||
struct i7core_pvt *pvt = mci->pvt_info;
|
||||
u32 pci_lock;
|
||||
|
||||
/* Lock writes to pci registers */
|
||||
pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
|
||||
pci_lock &= ~0x3;
|
||||
pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
|
||||
pci_lock | MC_CFG_LOCK);
|
||||
}
|
||||
|
||||
static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
|
||||
@ -1874,7 +2197,8 @@ static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
|
||||
&pvt->i7core_dev->pdev[0]->dev,
|
||||
EDAC_MOD_STR);
|
||||
if (unlikely(!pvt->i7core_pci))
|
||||
pr_warn("Unable to setup PCI error report via EDAC\n");
|
||||
i7core_printk(KERN_WARNING,
|
||||
"Unable to setup PCI error report via EDAC\n");
|
||||
}
|
||||
|
||||
static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
|
||||
@ -1906,8 +2230,11 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
|
||||
debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
|
||||
__func__, mci, &i7core_dev->pdev[0]->dev);
|
||||
|
||||
/* Disable MCE NMI handler */
|
||||
edac_mce_unregister(&pvt->edac_mce);
|
||||
/* Disable scrubrate setting */
|
||||
if (pvt->enable_scrub)
|
||||
disable_sdram_scrub_setting(mci);
|
||||
|
||||
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec);
|
||||
|
||||
/* Disable EDAC polling */
|
||||
i7core_pci_ctl_release(pvt);
|
||||
@ -1979,6 +2306,10 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
|
||||
/* Set the function pointer to an actual operation function */
|
||||
mci->edac_check = i7core_check_error;
|
||||
|
||||
/* Enable scrubrate setting */
|
||||
if (pvt->enable_scrub)
|
||||
enable_sdram_scrub_setting(mci);
|
||||
|
||||
/* add this new MC control structure to EDAC's list of MCs */
|
||||
if (unlikely(edac_mc_add_mc(mci))) {
|
||||
debugf0("MC: " __FILE__
|
||||
@ -2002,21 +2333,13 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
|
||||
/* allocating generic PCI control info */
|
||||
i7core_pci_ctl_create(pvt);
|
||||
|
||||
/* Registers on edac_mce in order to receive memory errors */
|
||||
pvt->edac_mce.priv = mci;
|
||||
pvt->edac_mce.check_error = i7core_mce_check_error;
|
||||
rc = edac_mce_register(&pvt->edac_mce);
|
||||
if (unlikely(rc < 0)) {
|
||||
debugf0("MC: " __FILE__
|
||||
": %s(): failed edac_mce_register()\n", __func__);
|
||||
goto fail1;
|
||||
}
|
||||
/* DCLK for scrub rate setting */
|
||||
pvt->dclk_freq = get_dclk_freq();
|
||||
|
||||
atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec);
|
||||
|
||||
return 0;
|
||||
|
||||
fail1:
|
||||
i7core_pci_ctl_release(pvt);
|
||||
edac_mc_del_mc(mci->dev);
|
||||
fail0:
|
||||
kfree(mci->ctl_name);
|
||||
edac_mc_free(mci);
|
||||
@ -2035,7 +2358,7 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
|
||||
static int __devinit i7core_probe(struct pci_dev *pdev,
|
||||
const struct pci_device_id *id)
|
||||
{
|
||||
int rc;
|
||||
int rc, count = 0;
|
||||
struct i7core_dev *i7core_dev;
|
||||
|
||||
/* get the pci devices we want to reserve for our use */
|
||||
@ -2055,12 +2378,28 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
|
||||
goto fail0;
|
||||
|
||||
list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
|
||||
count++;
|
||||
rc = i7core_register_mci(i7core_dev);
|
||||
if (unlikely(rc < 0))
|
||||
goto fail1;
|
||||
}
|
||||
|
||||
i7core_printk(KERN_INFO, "Driver loaded.\n");
|
||||
/*
|
||||
* Nehalem-EX uses a different memory controller. However, as the
|
||||
* memory controller is not visible on some Nehalem/Nehalem-EP, we
|
||||
* need to indirectly probe via a X58 PCI device. The same devices
|
||||
* are found on (some) Nehalem-EX. So, on those machines, the
|
||||
* probe routine needs to return -ENODEV, as the actual Memory
|
||||
* Controller registers won't be detected.
|
||||
*/
|
||||
if (!count) {
|
||||
rc = -ENODEV;
|
||||
goto fail1;
|
||||
}
|
||||
|
||||
i7core_printk(KERN_INFO,
|
||||
"Driver loaded, %d memory controller(s) found.\n",
|
||||
count);
|
||||
|
||||
mutex_unlock(&i7core_edac_lock);
|
||||
return 0;
|
||||
|
1893
drivers/edac/sb_edac.c
Normal file
1893
drivers/edac/sb_edac.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -42,4 +42,354 @@ static inline void opstate_init(void)
|
||||
return;
|
||||
}
|
||||
|
||||
#define EDAC_MC_LABEL_LEN 31
|
||||
#define MC_PROC_NAME_MAX_LEN 7
|
||||
|
||||
/* memory devices */
|
||||
enum dev_type {
|
||||
DEV_UNKNOWN = 0,
|
||||
DEV_X1,
|
||||
DEV_X2,
|
||||
DEV_X4,
|
||||
DEV_X8,
|
||||
DEV_X16,
|
||||
DEV_X32, /* Do these parts exist? */
|
||||
DEV_X64 /* Do these parts exist? */
|
||||
};
|
||||
|
||||
#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN)
|
||||
#define DEV_FLAG_X1 BIT(DEV_X1)
|
||||
#define DEV_FLAG_X2 BIT(DEV_X2)
|
||||
#define DEV_FLAG_X4 BIT(DEV_X4)
|
||||
#define DEV_FLAG_X8 BIT(DEV_X8)
|
||||
#define DEV_FLAG_X16 BIT(DEV_X16)
|
||||
#define DEV_FLAG_X32 BIT(DEV_X32)
|
||||
#define DEV_FLAG_X64 BIT(DEV_X64)
|
||||
|
||||
/* memory types */
|
||||
enum mem_type {
|
||||
MEM_EMPTY = 0, /* Empty csrow */
|
||||
MEM_RESERVED, /* Reserved csrow type */
|
||||
MEM_UNKNOWN, /* Unknown csrow type */
|
||||
MEM_FPM, /* Fast page mode */
|
||||
MEM_EDO, /* Extended data out */
|
||||
MEM_BEDO, /* Burst Extended data out */
|
||||
MEM_SDR, /* Single data rate SDRAM */
|
||||
MEM_RDR, /* Registered single data rate SDRAM */
|
||||
MEM_DDR, /* Double data rate SDRAM */
|
||||
MEM_RDDR, /* Registered Double data rate SDRAM */
|
||||
MEM_RMBS, /* Rambus DRAM */
|
||||
MEM_DDR2, /* DDR2 RAM */
|
||||
MEM_FB_DDR2, /* fully buffered DDR2 */
|
||||
MEM_RDDR2, /* Registered DDR2 RAM */
|
||||
MEM_XDR, /* Rambus XDR */
|
||||
MEM_DDR3, /* DDR3 RAM */
|
||||
MEM_RDDR3, /* Registered DDR3 RAM */
|
||||
};
|
||||
|
||||
#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
|
||||
#define MEM_FLAG_RESERVED BIT(MEM_RESERVED)
|
||||
#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN)
|
||||
#define MEM_FLAG_FPM BIT(MEM_FPM)
|
||||
#define MEM_FLAG_EDO BIT(MEM_EDO)
|
||||
#define MEM_FLAG_BEDO BIT(MEM_BEDO)
|
||||
#define MEM_FLAG_SDR BIT(MEM_SDR)
|
||||
#define MEM_FLAG_RDR BIT(MEM_RDR)
|
||||
#define MEM_FLAG_DDR BIT(MEM_DDR)
|
||||
#define MEM_FLAG_RDDR BIT(MEM_RDDR)
|
||||
#define MEM_FLAG_RMBS BIT(MEM_RMBS)
|
||||
#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
|
||||
#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
|
||||
#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
|
||||
#define MEM_FLAG_XDR BIT(MEM_XDR)
|
||||
#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
|
||||
#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
|
||||
|
||||
/* chipset Error Detection and Correction capabilities and mode */
|
||||
enum edac_type {
|
||||
EDAC_UNKNOWN = 0, /* Unknown if ECC is available */
|
||||
EDAC_NONE, /* Doesn't support ECC */
|
||||
EDAC_RESERVED, /* Reserved ECC type */
|
||||
EDAC_PARITY, /* Detects parity errors */
|
||||
EDAC_EC, /* Error Checking - no correction */
|
||||
EDAC_SECDED, /* Single bit error correction, Double detection */
|
||||
EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */
|
||||
EDAC_S4ECD4ED, /* Chipkill x4 devices */
|
||||
EDAC_S8ECD8ED, /* Chipkill x8 devices */
|
||||
EDAC_S16ECD16ED, /* Chipkill x16 devices */
|
||||
};
|
||||
|
||||
#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN)
|
||||
#define EDAC_FLAG_NONE BIT(EDAC_NONE)
|
||||
#define EDAC_FLAG_PARITY BIT(EDAC_PARITY)
|
||||
#define EDAC_FLAG_EC BIT(EDAC_EC)
|
||||
#define EDAC_FLAG_SECDED BIT(EDAC_SECDED)
|
||||
#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED)
|
||||
#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED)
|
||||
#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED)
|
||||
#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED)
|
||||
|
||||
/* scrubbing capabilities */
|
||||
enum scrub_type {
|
||||
SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */
|
||||
SCRUB_NONE, /* No scrubber */
|
||||
SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */
|
||||
SCRUB_SW_SRC, /* Software scrub only errors */
|
||||
SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */
|
||||
SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */
|
||||
SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */
|
||||
SCRUB_HW_SRC, /* Hardware scrub only errors */
|
||||
SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */
|
||||
SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */
|
||||
};
|
||||
|
||||
#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG)
|
||||
#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC)
|
||||
#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
|
||||
#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE)
|
||||
#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG)
|
||||
#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC)
|
||||
#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
|
||||
#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE)
|
||||
|
||||
/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
|
||||
|
||||
/* EDAC internal operation states */
|
||||
#define OP_ALLOC 0x100
|
||||
#define OP_RUNNING_POLL 0x201
|
||||
#define OP_RUNNING_INTERRUPT 0x202
|
||||
#define OP_RUNNING_POLL_INTR 0x203
|
||||
#define OP_OFFLINE 0x300
|
||||
|
||||
/*
|
||||
* There are several things to be aware of that aren't at all obvious:
|
||||
*
|
||||
*
|
||||
* SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
|
||||
*
|
||||
* These are some of the many terms that are thrown about that don't always
|
||||
* mean what people think they mean (Inconceivable!). In the interest of
|
||||
* creating a common ground for discussion, terms and their definitions
|
||||
* will be established.
|
||||
*
|
||||
* Memory devices: The individual chip on a memory stick. These devices
|
||||
* commonly output 4 and 8 bits each. Grouping several
|
||||
* of these in parallel provides 64 bits which is common
|
||||
* for a memory stick.
|
||||
*
|
||||
* Memory Stick: A printed circuit board that aggregates multiple
|
||||
* memory devices in parallel. This is the atomic
|
||||
* memory component that is purchaseable by Joe consumer
|
||||
* and loaded into a memory socket.
|
||||
*
|
||||
* Socket: A physical connector on the motherboard that accepts
|
||||
* a single memory stick.
|
||||
*
|
||||
* Channel: Set of memory devices on a memory stick that must be
|
||||
* grouped in parallel with one or more additional
|
||||
* channels from other memory sticks. This parallel
|
||||
* grouping of the output from multiple channels are
|
||||
* necessary for the smallest granularity of memory access.
|
||||
* Some memory controllers are capable of single channel -
|
||||
* which means that memory sticks can be loaded
|
||||
* individually. Other memory controllers are only
|
||||
* capable of dual channel - which means that memory
|
||||
* sticks must be loaded as pairs (see "socket set").
|
||||
*
|
||||
* Chip-select row: All of the memory devices that are selected together.
|
||||
* for a single, minimum grain of memory access.
|
||||
* This selects all of the parallel memory devices across
|
||||
* all of the parallel channels. Common chip-select rows
|
||||
* for single channel are 64 bits, for dual channel 128
|
||||
* bits.
|
||||
*
|
||||
* Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory.
|
||||
* Motherboards commonly drive two chip-select pins to
|
||||
* a memory stick. A single-ranked stick, will occupy
|
||||
* only one of those rows. The other will be unused.
|
||||
*
|
||||
* Double-Ranked stick: A double-ranked stick has two chip-select rows which
|
||||
* access different sets of memory devices. The two
|
||||
* rows cannot be accessed concurrently.
|
||||
*
|
||||
* Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
|
||||
* A double-sided stick has two chip-select rows which
|
||||
* access different sets of memory devices. The two
|
||||
* rows cannot be accessed concurrently. "Double-sided"
|
||||
* is irrespective of the memory devices being mounted
|
||||
* on both sides of the memory stick.
|
||||
*
|
||||
* Socket set: All of the memory sticks that are required for
|
||||
* a single memory access or all of the memory sticks
|
||||
* spanned by a chip-select row. A single socket set
|
||||
* has two chip-select rows and if double-sided sticks
|
||||
* are used these will occupy those chip-select rows.
|
||||
*
|
||||
* Bank: This term is avoided because it is unclear when
|
||||
* needing to distinguish between chip-select rows and
|
||||
* socket sets.
|
||||
*
|
||||
* Controller pages:
|
||||
*
|
||||
* Physical pages:
|
||||
*
|
||||
* Virtual pages:
|
||||
*
|
||||
*
|
||||
* STRUCTURE ORGANIZATION AND CHOICES
|
||||
*
|
||||
*
|
||||
*
|
||||
* PS - I enjoyed writing all that about as much as you enjoyed reading it.
|
||||
*/
|
||||
|
||||
struct channel_info {
|
||||
int chan_idx; /* channel index */
|
||||
u32 ce_count; /* Correctable Errors for this CHANNEL */
|
||||
char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
|
||||
struct csrow_info *csrow; /* the parent */
|
||||
};
|
||||
|
||||
struct csrow_info {
|
||||
unsigned long first_page; /* first page number in dimm */
|
||||
unsigned long last_page; /* last page number in dimm */
|
||||
unsigned long page_mask; /* used for interleaving -
|
||||
* 0UL for non intlv
|
||||
*/
|
||||
u32 nr_pages; /* number of pages in csrow */
|
||||
u32 grain; /* granularity of reported error in bytes */
|
||||
int csrow_idx; /* the chip-select row */
|
||||
enum dev_type dtype; /* memory device type */
|
||||
u32 ue_count; /* Uncorrectable Errors for this csrow */
|
||||
u32 ce_count; /* Correctable Errors for this csrow */
|
||||
enum mem_type mtype; /* memory csrow type */
|
||||
enum edac_type edac_mode; /* EDAC mode for this csrow */
|
||||
struct mem_ctl_info *mci; /* the parent */
|
||||
|
||||
struct kobject kobj; /* sysfs kobject for this csrow */
|
||||
|
||||
/* channel information for this csrow */
|
||||
u32 nr_channels;
|
||||
struct channel_info *channels;
|
||||
};
|
||||
|
||||
struct mcidev_sysfs_group {
|
||||
const char *name; /* group name */
|
||||
const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */
|
||||
};
|
||||
|
||||
struct mcidev_sysfs_group_kobj {
|
||||
struct list_head list; /* list for all instances within a mc */
|
||||
|
||||
struct kobject kobj; /* kobj for the group */
|
||||
|
||||
const struct mcidev_sysfs_group *grp; /* group description table */
|
||||
struct mem_ctl_info *mci; /* the parent */
|
||||
};
|
||||
|
||||
/* mcidev_sysfs_attribute structure
|
||||
* used for driver sysfs attributes and in mem_ctl_info
|
||||
* sysfs top level entries
|
||||
*/
|
||||
struct mcidev_sysfs_attribute {
|
||||
/* It should use either attr or grp */
|
||||
struct attribute attr;
|
||||
const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */
|
||||
|
||||
/* Ops for show/store values at the attribute - not used on group */
|
||||
ssize_t (*show)(struct mem_ctl_info *,char *);
|
||||
ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
|
||||
};
|
||||
|
||||
/* MEMORY controller information structure
|
||||
*/
|
||||
struct mem_ctl_info {
|
||||
struct list_head link; /* for global list of mem_ctl_info structs */
|
||||
|
||||
struct module *owner; /* Module owner of this control struct */
|
||||
|
||||
unsigned long mtype_cap; /* memory types supported by mc */
|
||||
unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */
|
||||
unsigned long edac_cap; /* configuration capabilities - this is
|
||||
* closely related to edac_ctl_cap. The
|
||||
* difference is that the controller may be
|
||||
* capable of s4ecd4ed which would be listed
|
||||
* in edac_ctl_cap, but if channels aren't
|
||||
* capable of s4ecd4ed then the edac_cap would
|
||||
* not have that capability.
|
||||
*/
|
||||
unsigned long scrub_cap; /* chipset scrub capabilities */
|
||||
enum scrub_type scrub_mode; /* current scrub mode */
|
||||
|
||||
/* Translates sdram memory scrub rate given in bytes/sec to the
|
||||
internal representation and configures whatever else needs
|
||||
to be configured.
|
||||
*/
|
||||
int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
|
||||
|
||||
/* Get the current sdram memory scrub rate from the internal
|
||||
representation and converts it to the closest matching
|
||||
bandwidth in bytes/sec.
|
||||
*/
|
||||
int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
|
||||
|
||||
|
||||
/* pointer to edac checking routine */
|
||||
void (*edac_check) (struct mem_ctl_info * mci);
|
||||
|
||||
/*
|
||||
* Remaps memory pages: controller pages to physical pages.
|
||||
* For most MC's, this will be NULL.
|
||||
*/
|
||||
/* FIXME - why not send the phys page to begin with? */
|
||||
unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
|
||||
unsigned long page);
|
||||
int mc_idx;
|
||||
int nr_csrows;
|
||||
struct csrow_info *csrows;
|
||||
/*
|
||||
* FIXME - what about controllers on other busses? - IDs must be
|
||||
* unique. dev pointer should be sufficiently unique, but
|
||||
* BUS:SLOT.FUNC numbers may not be unique.
|
||||
*/
|
||||
struct device *dev;
|
||||
const char *mod_name;
|
||||
const char *mod_ver;
|
||||
const char *ctl_name;
|
||||
const char *dev_name;
|
||||
char proc_name[MC_PROC_NAME_MAX_LEN + 1];
|
||||
void *pvt_info;
|
||||
u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
|
||||
u32 ce_noinfo_count; /* Correctable Errors w/o info */
|
||||
u32 ue_count; /* Total Uncorrectable Errors for this MC */
|
||||
u32 ce_count; /* Total Correctable Errors for this MC */
|
||||
unsigned long start_time; /* mci load start time (in jiffies) */
|
||||
|
||||
struct completion complete;
|
||||
|
||||
/* edac sysfs device control */
|
||||
struct kobject edac_mci_kobj;
|
||||
|
||||
/* list for all grp instances within a mc */
|
||||
struct list_head grp_kobj_list;
|
||||
|
||||
/* Additional top controller level attributes, but specified
|
||||
* by the low level driver.
|
||||
*
|
||||
* Set by the low level driver to provide attributes at the
|
||||
* controller level, same level as 'ue_count' and 'ce_count' above.
|
||||
* An array of structures, NULL terminated
|
||||
*
|
||||
* If attributes are desired, then set to array of attributes
|
||||
* If no attributes are desired, leave NULL
|
||||
*/
|
||||
const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
|
||||
|
||||
/* work struct for this MC */
|
||||
struct delayed_work work;
|
||||
|
||||
/* the internal state of this controller instance */
|
||||
int op_state;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,31 +0,0 @@
|
||||
/* Provides edac interface to mcelog events
|
||||
*
|
||||
* This file may be distributed under the terms of the
|
||||
* GNU General Public License version 2.
|
||||
*
|
||||
* Copyright (c) 2009 by:
|
||||
* Mauro Carvalho Chehab <mchehab@redhat.com>
|
||||
*
|
||||
* Red Hat Inc. http://www.redhat.com
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_EDAC_MCE) || \
|
||||
(defined(CONFIG_EDAC_MCE_MODULE) && defined(MODULE))
|
||||
|
||||
#include <asm/mce.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
struct edac_mce {
|
||||
struct list_head list;
|
||||
|
||||
void *priv;
|
||||
int (*check_error)(void *priv, struct mce *mce);
|
||||
};
|
||||
|
||||
int edac_mce_register(struct edac_mce *edac_mce);
|
||||
void edac_mce_unregister(struct edac_mce *edac_mce);
|
||||
int edac_mce_parse(struct mce *mce);
|
||||
|
||||
#else
|
||||
#define edac_mce_parse(mce) (0)
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user