2011-10-20 19:18:01 -02:00
|
|
|
/* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module
|
|
|
|
*
|
|
|
|
* This driver supports the memory controllers found on the Intel
|
|
|
|
* processor family Sandy Bridge.
|
|
|
|
*
|
|
|
|
* This file may be distributed under the terms of the
|
|
|
|
* GNU General Public License version 2 only.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2011 by:
|
2014-02-07 08:03:07 -02:00
|
|
|
* Mauro Carvalho Chehab
|
2011-10-20 19:18:01 -02:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/pci.h>
|
|
|
|
#include <linux/pci_ids.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/edac.h>
|
|
|
|
#include <linux/mmzone.h>
|
|
|
|
#include <linux/smp.h>
|
|
|
|
#include <linux/bitmap.h>
|
2011-11-07 18:26:53 -03:00
|
|
|
#include <linux/math64.h>
|
2011-10-20 19:18:01 -02:00
|
|
|
#include <asm/processor.h>
|
2011-10-20 19:33:46 -02:00
|
|
|
#include <asm/mce.h>
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
#include "edac_core.h"
|
|
|
|
|
|
|
|
/* Static vars */
|
|
|
|
static LIST_HEAD(sbridge_edac_list);
|
|
|
|
static DEFINE_MUTEX(sbridge_edac_lock);
|
|
|
|
static int probed;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Alter this version for the module when modifications are made
|
|
|
|
*/
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
#define SBRIDGE_REVISION " Ver: 1.1.1 "
|
2011-10-20 19:18:01 -02:00
|
|
|
#define EDAC_MOD_STR "sbridge_edac"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Debug macros
|
|
|
|
*/
|
|
|
|
#define sbridge_printk(level, fmt, arg...) \
|
|
|
|
edac_printk(level, "sbridge", fmt, ##arg)
|
|
|
|
|
|
|
|
#define sbridge_mc_printk(mci, level, fmt, arg...) \
|
|
|
|
edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get a bit field at register value <v>, from bit <lo> to bit <hi>
|
|
|
|
*/
|
|
|
|
#define GET_BITFIELD(v, lo, hi) \
|
2013-10-18 14:29:07 -07:00
|
|
|
(((v) & GENMASK_ULL(hi, lo)) >> (lo))
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* Devices 12 Function 6, Offsets 0x80 to 0xcc */
|
2013-10-30 13:27:00 -03:00
|
|
|
static const u32 sbridge_dram_rule[] = {
|
2011-10-20 19:18:01 -02:00
|
|
|
0x80, 0x88, 0x90, 0x98, 0xa0,
|
|
|
|
0xa8, 0xb0, 0xb8, 0xc0, 0xc8,
|
|
|
|
};
|
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
static const u32 ibridge_dram_rule[] = {
|
|
|
|
0x60, 0x68, 0x70, 0x78, 0x80,
|
|
|
|
0x88, 0x90, 0x98, 0xa0, 0xa8,
|
|
|
|
0xb0, 0xb8, 0xc0, 0xc8, 0xd0,
|
|
|
|
0xd8, 0xe0, 0xe8, 0xf0, 0xf8,
|
|
|
|
};
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
#define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
|
2014-06-20 10:27:54 -03:00
|
|
|
#define A7MODE(reg) GET_BITFIELD(reg, 26, 26)
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2015-12-03 10:48:52 +01:00
|
|
|
static char *show_dram_attr(u32 attr)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
2015-12-03 10:48:52 +01:00
|
|
|
switch (attr) {
|
2011-10-20 19:18:01 -02:00
|
|
|
case 0:
|
|
|
|
return "DRAM";
|
|
|
|
case 1:
|
|
|
|
return "MMCFG";
|
|
|
|
case 2:
|
|
|
|
return "NXM";
|
|
|
|
default:
|
|
|
|
return "unknown";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-10-30 13:27:01 -03:00
|
|
|
static const u32 sbridge_interleave_list[] = {
|
2011-10-20 19:18:01 -02:00
|
|
|
0x84, 0x8c, 0x94, 0x9c, 0xa4,
|
|
|
|
0xac, 0xb4, 0xbc, 0xc4, 0xcc,
|
|
|
|
};
|
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
static const u32 ibridge_interleave_list[] = {
|
|
|
|
0x64, 0x6c, 0x74, 0x7c, 0x84,
|
|
|
|
0x8c, 0x94, 0x9c, 0xa4, 0xac,
|
|
|
|
0xb4, 0xbc, 0xc4, 0xcc, 0xd4,
|
|
|
|
0xdc, 0xe4, 0xec, 0xf4, 0xfc,
|
|
|
|
};
|
|
|
|
|
2013-10-30 13:27:02 -03:00
|
|
|
struct interleave_pkg {
|
|
|
|
unsigned char start;
|
|
|
|
unsigned char end;
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct interleave_pkg sbridge_interleave_pkg[] = {
|
|
|
|
{ 0, 2 },
|
|
|
|
{ 3, 5 },
|
|
|
|
{ 8, 10 },
|
|
|
|
{ 11, 13 },
|
|
|
|
{ 16, 18 },
|
|
|
|
{ 19, 21 },
|
|
|
|
{ 24, 26 },
|
|
|
|
{ 27, 29 },
|
|
|
|
};
|
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
static const struct interleave_pkg ibridge_interleave_pkg[] = {
|
|
|
|
{ 0, 3 },
|
|
|
|
{ 4, 7 },
|
|
|
|
{ 8, 11 },
|
|
|
|
{ 12, 15 },
|
|
|
|
{ 16, 19 },
|
|
|
|
{ 20, 23 },
|
|
|
|
{ 24, 27 },
|
|
|
|
{ 28, 31 },
|
|
|
|
};
|
|
|
|
|
2013-10-30 13:27:02 -03:00
|
|
|
static inline int sad_pkg(const struct interleave_pkg *table, u32 reg,
|
|
|
|
int interleave)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
2013-10-30 13:27:02 -03:00
|
|
|
return GET_BITFIELD(reg, table[interleave].start,
|
|
|
|
table[interleave].end);
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Devices 12 Function 7 */
|
|
|
|
|
|
|
|
#define TOLM 0x80
|
|
|
|
#define TOHM 0x84
|
sb_edac: Fix discovery of top-of-low-memory for Haswell
Haswell moved the TOLM/TOHM registers to a different device and offset.
The sb_edac driver accounted for the change of device, but not for the
new offset. There was also a typo in the constant to fill in the low
26 bits (was 0x1ffffff, should be 0x3ffffff).
This resulted in a bogus value for the top of low memory:
EDAC DEBUG: get_memory_layout: TOLM: 0.032 GB (0x0000000001ffffff)
which would result in EDAC refusing to translate addresses for
errors above the bogus value and below 4GB:
sbridge MC3: HANDLING MCE MEMORY ERROR
sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
sbridge MC3: TSC 0
sbridge MC3: ADDR 2000000
sbridge MC3: MISC 523eac86
sbridge MC3: PROCESSOR 0:306f3 TIME 1414600951 SOCKET 0 APIC 0
MC3: 1 CE Error at TOLM area, on addr 0x02000000 on any memory ( page:0x0 offset:0x0 grain:32 syndrome:0x0)
With the fix we see the correct TOLM value:
DEBUG: get_memory_layout: TOLM: 2.048 GB (0x000000007fffffff)
and we decode address 2000000 correctly:
sbridge MC3: HANDLING MCE MEMORY ERROR
sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
sbridge MC3: TSC 0
sbridge MC3: ADDR 2000000
sbridge MC3: MISC 523e1086
sbridge MC3: PROCESSOR 0:306f3 TIME 1414601319 SOCKET 0 APIC 0
DEBUG: get_memory_error_data: SAD interleave package: 0 = CPU socket 0, HA 0, shiftup: 0
DEBUG: get_memory_error_data: TAD#0: address 0x0000000002000000 < 0x000000007fffffff, socket interleave 1, channel interleave 4 (offset 0x00000000), index 0, base ch: 0, ch mask: 0x01
DEBUG: get_memory_error_data: RIR#0, limit: 4.095 GB (0x00000000ffffffff), way: 1
DEBUG: get_memory_error_data: RIR#0: channel address 0x00200000 < 0xffffffff, RIR interleave 0, index 0
DEBUG: sbridge_mce_output_error: area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0
MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x2000 offset:0x0 grain:32 syndrome:0x0 - area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2014-10-29 10:36:50 -07:00
|
|
|
#define HASWELL_TOLM 0xd0
|
2014-06-20 10:27:54 -03:00
|
|
|
#define HASWELL_TOHM_0 0xd4
|
|
|
|
#define HASWELL_TOHM_1 0xd8
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
#define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff)
|
|
|
|
#define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff)
|
|
|
|
|
|
|
|
/* Device 13 Function 6 */
|
|
|
|
|
|
|
|
#define SAD_TARGET 0xf0
|
|
|
|
|
|
|
|
#define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11)
|
|
|
|
|
|
|
|
#define SAD_CONTROL 0xf4
|
|
|
|
|
|
|
|
/* Device 14 function 0 */
|
|
|
|
|
|
|
|
static const u32 tad_dram_rule[] = {
|
|
|
|
0x40, 0x44, 0x48, 0x4c,
|
|
|
|
0x50, 0x54, 0x58, 0x5c,
|
|
|
|
0x60, 0x64, 0x68, 0x6c,
|
|
|
|
};
|
|
|
|
#define MAX_TAD ARRAY_SIZE(tad_dram_rule)
|
|
|
|
|
|
|
|
#define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff)
|
|
|
|
#define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11)
|
|
|
|
#define TAD_CH(reg) GET_BITFIELD(reg, 8, 9)
|
|
|
|
#define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7)
|
|
|
|
#define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5)
|
|
|
|
#define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3)
|
|
|
|
#define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1)
|
|
|
|
|
|
|
|
/* Device 15, function 0 */
|
|
|
|
|
|
|
|
#define MCMTR 0x7c
|
|
|
|
|
|
|
|
#define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2)
|
|
|
|
#define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1)
|
|
|
|
#define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0)
|
|
|
|
|
|
|
|
/* Device 15, function 1 */
|
|
|
|
|
|
|
|
#define RASENABLES 0xac
|
|
|
|
#define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0)
|
|
|
|
|
|
|
|
/* Device 15, functions 2-5 */
|
|
|
|
|
|
|
|
static const int mtr_regs[] = {
|
|
|
|
0x80, 0x84, 0x88,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19)
|
|
|
|
#define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14)
|
|
|
|
#define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13)
|
|
|
|
#define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4)
|
|
|
|
#define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1)
|
|
|
|
|
|
|
|
static const u32 tad_ch_nilv_offset[] = {
|
|
|
|
0x90, 0x94, 0x98, 0x9c,
|
|
|
|
0xa0, 0xa4, 0xa8, 0xac,
|
|
|
|
0xb0, 0xb4, 0xb8, 0xbc,
|
|
|
|
};
|
|
|
|
#define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29)
|
|
|
|
#define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26)
|
|
|
|
|
|
|
|
static const u32 rir_way_limit[] = {
|
|
|
|
0x108, 0x10c, 0x110, 0x114, 0x118,
|
|
|
|
};
|
|
|
|
#define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit)
|
|
|
|
|
|
|
|
#define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31)
|
|
|
|
#define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29)
|
|
|
|
|
|
|
|
#define MAX_RIR_WAY 8
|
|
|
|
|
|
|
|
static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
|
|
|
|
{ 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c },
|
|
|
|
{ 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c },
|
|
|
|
{ 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c },
|
|
|
|
{ 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c },
|
|
|
|
{ 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
|
|
|
|
};
|
|
|
|
|
|
|
|
#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19)
|
|
|
|
#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14)
|
|
|
|
|
|
|
|
/* Device 16, functions 2-7 */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* FIXME: Implement the error count reads directly
|
|
|
|
*/
|
|
|
|
|
|
|
|
static const u32 correrrcnt[] = {
|
|
|
|
0x104, 0x108, 0x10c, 0x110,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31)
|
|
|
|
#define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30)
|
|
|
|
#define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15)
|
|
|
|
#define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14)
|
|
|
|
|
|
|
|
static const u32 correrrthrsld[] = {
|
|
|
|
0x11c, 0x120, 0x124, 0x128,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30)
|
|
|
|
#define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14)
|
|
|
|
|
|
|
|
|
|
|
|
/* Device 17, function 0 */
|
|
|
|
|
2013-10-30 13:26:56 -03:00
|
|
|
#define SB_RANK_CFG_A 0x0328
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
#define IB_RANK_CFG_A 0x0320
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* sbridge structs
|
|
|
|
*/
|
|
|
|
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
#define NUM_CHANNELS 8 /* 2MC per socket, four chan per MC */
|
2014-09-05 14:28:47 -05:00
|
|
|
#define MAX_DIMMS 3 /* Max DIMMS per channel */
|
|
|
|
#define CHANNEL_UNSPECIFIED 0xf /* Intel IA32 SDM 15-14 */
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
enum type {
|
|
|
|
SANDY_BRIDGE,
|
|
|
|
IVY_BRIDGE,
|
2014-06-20 10:27:54 -03:00
|
|
|
HASWELL,
|
2014-12-02 09:27:30 -08:00
|
|
|
BROADWELL,
|
2013-10-30 13:27:06 -03:00
|
|
|
};
|
|
|
|
|
2013-10-30 13:26:57 -03:00
|
|
|
struct sbridge_pvt;
|
2011-10-20 19:18:01 -02:00
|
|
|
struct sbridge_info {
|
2013-10-30 13:27:06 -03:00
|
|
|
enum type type;
|
2013-10-30 13:27:00 -03:00
|
|
|
u32 mcmtr;
|
|
|
|
u32 rankcfgr;
|
|
|
|
u64 (*get_tolm)(struct sbridge_pvt *pvt);
|
|
|
|
u64 (*get_tohm)(struct sbridge_pvt *pvt);
|
2014-06-02 15:15:24 -03:00
|
|
|
u64 (*rir_limit)(u32 reg);
|
2015-12-03 10:48:52 +01:00
|
|
|
u64 (*sad_limit)(u32 reg);
|
|
|
|
u32 (*interleave_mode)(u32 reg);
|
|
|
|
char* (*show_interleave_mode)(u32 reg);
|
|
|
|
u32 (*dram_attr)(u32 reg);
|
2013-10-30 13:27:00 -03:00
|
|
|
const u32 *dram_rule;
|
2013-10-30 13:27:01 -03:00
|
|
|
const u32 *interleave_list;
|
2013-10-30 13:27:02 -03:00
|
|
|
const struct interleave_pkg *interleave_pkg;
|
2013-10-30 13:27:00 -03:00
|
|
|
u8 max_sad;
|
2013-10-30 13:27:01 -03:00
|
|
|
u8 max_interleave;
|
2014-06-02 15:15:23 -03:00
|
|
|
u8 (*get_node_id)(struct sbridge_pvt *pvt);
|
2014-06-02 15:15:22 -03:00
|
|
|
enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt);
|
2015-06-12 15:08:17 -04:00
|
|
|
enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr);
|
2014-06-20 10:27:54 -03:00
|
|
|
struct pci_dev *pci_vtd;
|
2011-10-20 19:18:01 -02:00
|
|
|
};
|
|
|
|
|
|
|
|
struct sbridge_channel {
|
|
|
|
u32 ranks;
|
|
|
|
u32 dimms;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct pci_id_descr {
|
2014-06-26 15:35:14 -03:00
|
|
|
int dev_id;
|
2011-10-20 19:18:01 -02:00
|
|
|
int optional;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct pci_id_table {
|
|
|
|
const struct pci_id_descr *descr;
|
|
|
|
int n_devs;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sbridge_dev {
|
|
|
|
struct list_head list;
|
|
|
|
u8 bus, mc;
|
|
|
|
u8 node_id, source_id;
|
|
|
|
struct pci_dev **pdev;
|
|
|
|
int n_devs;
|
|
|
|
struct mem_ctl_info *mci;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sbridge_pvt {
|
|
|
|
struct pci_dev *pci_ta, *pci_ddrio, *pci_ras;
|
2013-10-30 13:27:06 -03:00
|
|
|
struct pci_dev *pci_sad0, *pci_sad1;
|
|
|
|
struct pci_dev *pci_ha0, *pci_ha1;
|
|
|
|
struct pci_dev *pci_br0, *pci_br1;
|
2014-06-20 10:27:54 -03:00
|
|
|
struct pci_dev *pci_ha1_ta;
|
2011-10-20 19:18:01 -02:00
|
|
|
struct pci_dev *pci_tad[NUM_CHANNELS];
|
|
|
|
|
|
|
|
struct sbridge_dev *sbridge_dev;
|
|
|
|
|
|
|
|
struct sbridge_info info;
|
|
|
|
struct sbridge_channel channel[NUM_CHANNELS];
|
|
|
|
|
|
|
|
/* Memory type detection */
|
|
|
|
bool is_mirrored, is_lockstep, is_close_pg;
|
|
|
|
|
|
|
|
/* Fifo double buffers */
|
|
|
|
struct mce mce_entry[MCE_LOG_LEN];
|
|
|
|
struct mce mce_outentry[MCE_LOG_LEN];
|
|
|
|
|
|
|
|
/* Fifo in/out counters */
|
|
|
|
unsigned mce_in, mce_out;
|
|
|
|
|
|
|
|
/* Count indicator to show errors not got */
|
|
|
|
unsigned mce_overrun;
|
|
|
|
|
|
|
|
/* Memory description */
|
|
|
|
u64 tolm, tohm;
|
|
|
|
};
|
|
|
|
|
2014-06-02 15:15:25 -03:00
|
|
|
#define PCI_DESCR(device_id, opt) \
|
|
|
|
.dev_id = (device_id), \
|
2013-03-28 09:59:15 -07:00
|
|
|
.optional = opt
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
static const struct pci_id_descr pci_dev_descr_sbridge[] = {
|
|
|
|
/* Processor Home Agent */
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) },
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* Memory controller */
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) },
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* System Address Decoder */
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) },
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* Broadcast Registers */
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) },
|
2011-10-20 19:18:01 -02:00
|
|
|
};
|
|
|
|
|
|
|
|
#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
|
|
|
|
static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
|
|
|
|
PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge),
|
|
|
|
{0,} /* 0 terminated list. */
|
|
|
|
};
|
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
/* This changes depending if 1HA or 2HA:
|
|
|
|
* 1HA:
|
|
|
|
* 0x0eb8 (17.0) is DDRIO0
|
|
|
|
* 2HA:
|
|
|
|
* 0x0ebc (17.4) is DDRIO0
|
|
|
|
*/
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0 0x0eb8
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0 0x0ebc
|
|
|
|
|
|
|
|
/* pci ids */
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0 0x0ea0
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA 0x0ea8
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS 0x0e71
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0 0x0eaa
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1 0x0eab
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2 0x0eac
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3 0x0ead
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_SAD 0x0ec8
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_BR0 0x0ec9
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_BR1 0x0eca
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1 0x0e60
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA 0x0e68
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS 0x0e79
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 0x0e6a
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1 0x0e6b
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2 0x0e6c
|
|
|
|
#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3 0x0e6d
|
2013-10-30 13:27:06 -03:00
|
|
|
|
|
|
|
static const struct pci_id_descr pci_dev_descr_ibridge[] = {
|
|
|
|
/* Processor Home Agent */
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0) },
|
2013-10-30 13:27:06 -03:00
|
|
|
|
|
|
|
/* Memory controller */
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0) },
|
2013-10-30 13:27:06 -03:00
|
|
|
|
|
|
|
/* System Address Decoder */
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0) },
|
2013-10-30 13:27:06 -03:00
|
|
|
|
|
|
|
/* Broadcast Registers */
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0) },
|
2013-10-30 13:27:06 -03:00
|
|
|
|
|
|
|
/* Optional, mode 2HA */
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1) },
|
2013-10-30 13:27:06 -03:00
|
|
|
#if 0
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1) },
|
2013-10-30 13:27:06 -03:00
|
|
|
#endif
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1) },
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3, 1) },
|
2013-10-30 13:27:06 -03:00
|
|
|
|
2014-06-02 15:15:25 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1) },
|
2013-10-30 13:27:06 -03:00
|
|
|
};
|
|
|
|
|
|
|
|
static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
|
|
|
|
PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge),
|
|
|
|
{0,} /* 0 terminated list. */
|
|
|
|
};
|
|
|
|
|
2014-06-20 10:27:54 -03:00
|
|
|
/* Haswell support */
|
|
|
|
/* EN processor:
|
|
|
|
* - 1 IMC
|
|
|
|
* - 3 DDR3 channels, 2 DPC per channel
|
|
|
|
* EP processor:
|
|
|
|
* - 1 or 2 IMC
|
|
|
|
* - 4 DDR4 channels, 3 DPC per channel
|
|
|
|
* EP 4S processor:
|
|
|
|
* - 2 IMC
|
|
|
|
* - 4 DDR4 channels, 3 DPC per channel
|
|
|
|
* EX processor:
|
|
|
|
* - 2 IMC
|
|
|
|
* - each IMC interfaces with a SMI 2 channel
|
|
|
|
* - each SMI channel interfaces with a scalable memory buffer
|
|
|
|
* - each scalable memory buffer supports 4 DDR3/DDR4 channels, 3 DPC
|
|
|
|
*/
|
2014-12-02 09:27:30 -08:00
|
|
|
#define HASWELL_DDRCRCLKCONTROLS 0xa10 /* Ditto on Broadwell */
|
2014-06-20 10:27:54 -03:00
|
|
|
#define HASWELL_HASYSDEFEATURE2 0x84
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_VTD_MISC 0x2f28
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0 0x2fa0
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1 0x2f60
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA 0x2fa8
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL 0x2f71
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA 0x2f68
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL 0x2f79
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0 0x2ffc
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1 0x2ffd
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0 0x2faa
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1 0x2fab
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2 0x2fac
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3 0x2fad
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0 0x2f6a
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1 0x2f6b
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2 0x2f6c
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3 0x2f6d
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0 0x2fbd
|
2015-06-12 09:44:52 -04:00
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1 0x2fbf
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2 0x2fb9
|
|
|
|
#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3 0x2fbb
|
2014-06-20 10:27:54 -03:00
|
|
|
static const struct pci_id_descr pci_dev_descr_haswell[] = {
|
|
|
|
/* first item must be the HA */
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0, 0) },
|
|
|
|
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0) },
|
|
|
|
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1, 1) },
|
|
|
|
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1) },
|
|
|
|
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0, 1) },
|
2015-06-12 09:44:52 -04:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3, 1) },
|
2014-06-20 10:27:54 -03:00
|
|
|
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1) },
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct pci_id_table pci_dev_descr_haswell_table[] = {
|
|
|
|
PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell),
|
|
|
|
{0,} /* 0 terminated list. */
|
|
|
|
};
|
|
|
|
|
2014-12-02 09:27:30 -08:00
|
|
|
/*
|
|
|
|
* Broadwell support
|
|
|
|
*
|
|
|
|
* DE processor:
|
|
|
|
* - 1 IMC
|
|
|
|
* - 2 DDR3 channels, 2 DPC per channel
|
2015-05-20 19:10:35 -03:00
|
|
|
* EP processor:
|
|
|
|
* - 1 or 2 IMC
|
|
|
|
* - 4 DDR4 channels, 3 DPC per channel
|
|
|
|
* EP 4S processor:
|
|
|
|
* - 2 IMC
|
|
|
|
* - 4 DDR4 channels, 3 DPC per channel
|
|
|
|
* EX processor:
|
|
|
|
* - 2 IMC
|
|
|
|
* - each IMC interfaces with a SMI 2 channel
|
|
|
|
* - each SMI channel interfaces with a scalable memory buffer
|
|
|
|
* - each scalable memory buffer supports 4 DDR3/DDR4 channels, 3 DPC
|
2014-12-02 09:27:30 -08:00
|
|
|
*/
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_VTD_MISC 0x6f28
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0 0x6fa0
|
2015-05-20 19:10:35 -03:00
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1 0x6f60
|
2014-12-02 09:27:30 -08:00
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA 0x6fa8
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL 0x6f71
|
2015-05-20 19:10:35 -03:00
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA 0x6f68
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL 0x6f79
|
2014-12-02 09:27:30 -08:00
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0 0x6ffc
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1 0x6ffd
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0 0x6faa
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1 0x6fab
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2 0x6fac
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3 0x6fad
|
2015-05-20 19:10:35 -03:00
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0 0x6f6a
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1 0x6f6b
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2 0x6f6c
|
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3 0x6f6d
|
2014-12-02 09:27:30 -08:00
|
|
|
#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0 0x6faf
|
|
|
|
|
|
|
|
static const struct pci_id_descr pci_dev_descr_broadwell[] = {
|
|
|
|
/* first item must be the HA */
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0, 0) },
|
|
|
|
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0) },
|
|
|
|
|
2015-05-20 19:10:35 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1, 1) },
|
|
|
|
|
2014-12-02 09:27:30 -08:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0) },
|
2015-05-20 19:10:35 -03:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1) },
|
|
|
|
|
2014-12-02 09:27:30 -08:00
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0, 1) },
|
2015-05-20 19:10:35 -03:00
|
|
|
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1) },
|
|
|
|
{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1) },
|
2014-12-02 09:27:30 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
static const struct pci_id_table pci_dev_descr_broadwell_table[] = {
|
|
|
|
PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell),
|
|
|
|
{0,} /* 0 terminated list. */
|
|
|
|
};
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
/*
|
|
|
|
* pci_device_id table for which devices we are looking for
|
|
|
|
*/
|
2013-12-06 10:23:08 +01:00
|
|
|
static const struct pci_device_id sbridge_pci_tbl[] = {
|
2014-08-14 14:45:41 -07:00
|
|
|
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0)},
|
2013-10-30 13:27:06 -03:00
|
|
|
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)},
|
2014-06-20 10:27:54 -03:00
|
|
|
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0)},
|
2014-12-02 09:27:30 -08:00
|
|
|
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0)},
|
2011-10-20 19:18:01 -02:00
|
|
|
{0,} /* 0 terminated list. */
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/****************************************************************************
|
2012-04-17 11:30:52 -07:00
|
|
|
Ancillary status routines
|
2011-10-20 19:18:01 -02:00
|
|
|
****************************************************************************/
|
|
|
|
|
2014-06-20 10:27:54 -03:00
|
|
|
static inline int numrank(enum type type, u32 mtr)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
int ranks = (1 << RANK_CNT_BITS(mtr));
|
2014-06-20 10:27:54 -03:00
|
|
|
int max = 4;
|
|
|
|
|
2015-05-20 19:10:35 -03:00
|
|
|
if (type == HASWELL || type == BROADWELL)
|
2014-06-20 10:27:54 -03:00
|
|
|
max = 8;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2014-06-20 10:27:54 -03:00
|
|
|
if (ranks > max) {
|
|
|
|
edac_dbg(0, "Invalid number of ranks: %d (max = %i) raw value = %x (%04x)\n",
|
|
|
|
ranks, max, (unsigned int)RANK_CNT_BITS(mtr), mtr);
|
2011-10-20 19:18:01 -02:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ranks;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int numrow(u32 mtr)
|
|
|
|
{
|
|
|
|
int rows = (RANK_WIDTH_BITS(mtr) + 12);
|
|
|
|
|
|
|
|
if (rows < 13 || rows > 18) {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n",
|
|
|
|
rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr);
|
2011-10-20 19:18:01 -02:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1 << rows;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int numcol(u32 mtr)
|
|
|
|
{
|
|
|
|
int cols = (COL_WIDTH_BITS(mtr) + 10);
|
|
|
|
|
|
|
|
if (cols > 12) {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n",
|
|
|
|
cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr);
|
2011-10-20 19:18:01 -02:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1 << cols;
|
|
|
|
}
|
|
|
|
|
2015-12-03 10:48:53 +01:00
|
|
|
static struct sbridge_dev *get_sbridge_dev(u8 bus, int multi_bus)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
struct sbridge_dev *sbridge_dev;
|
|
|
|
|
2015-12-03 10:48:53 +01:00
|
|
|
/*
|
|
|
|
* If we have devices scattered across several busses that pertain
|
|
|
|
* to the same memory controller, we'll lump them all together.
|
|
|
|
*/
|
|
|
|
if (multi_bus) {
|
|
|
|
return list_first_entry_or_null(&sbridge_edac_list,
|
|
|
|
struct sbridge_dev, list);
|
|
|
|
}
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
|
|
|
|
if (sbridge_dev->bus == bus)
|
|
|
|
return sbridge_dev;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
|
|
|
|
const struct pci_id_table *table)
|
|
|
|
{
|
|
|
|
struct sbridge_dev *sbridge_dev;
|
|
|
|
|
|
|
|
sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL);
|
|
|
|
if (!sbridge_dev)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!sbridge_dev->pdev) {
|
|
|
|
kfree(sbridge_dev);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
sbridge_dev->bus = bus;
|
|
|
|
sbridge_dev->n_devs = table->n_devs;
|
|
|
|
list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
|
|
|
|
|
|
|
|
return sbridge_dev;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void free_sbridge_dev(struct sbridge_dev *sbridge_dev)
|
|
|
|
{
|
|
|
|
list_del(&sbridge_dev->list);
|
|
|
|
kfree(sbridge_dev->pdev);
|
|
|
|
kfree(sbridge_dev);
|
|
|
|
}
|
|
|
|
|
2013-10-30 13:26:57 -03:00
|
|
|
static u64 sbridge_get_tolm(struct sbridge_pvt *pvt)
|
|
|
|
{
|
|
|
|
u32 reg;
|
|
|
|
|
|
|
|
/* Address range is 32:28 */
|
|
|
|
pci_read_config_dword(pvt->pci_sad1, TOLM, ®);
|
|
|
|
return GET_TOLM(reg);
|
|
|
|
}
|
|
|
|
|
2013-10-30 13:26:59 -03:00
|
|
|
static u64 sbridge_get_tohm(struct sbridge_pvt *pvt)
|
|
|
|
{
|
|
|
|
u32 reg;
|
|
|
|
|
|
|
|
pci_read_config_dword(pvt->pci_sad1, TOHM, ®);
|
|
|
|
return GET_TOHM(reg);
|
|
|
|
}
|
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
static u64 ibridge_get_tolm(struct sbridge_pvt *pvt)
|
|
|
|
{
|
|
|
|
u32 reg;
|
|
|
|
|
|
|
|
pci_read_config_dword(pvt->pci_br1, TOLM, ®);
|
|
|
|
|
|
|
|
return GET_TOLM(reg);
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 ibridge_get_tohm(struct sbridge_pvt *pvt)
|
|
|
|
{
|
|
|
|
u32 reg;
|
|
|
|
|
|
|
|
pci_read_config_dword(pvt->pci_br1, TOHM, ®);
|
|
|
|
|
|
|
|
return GET_TOHM(reg);
|
|
|
|
}
|
|
|
|
|
2014-06-02 15:15:24 -03:00
|
|
|
static u64 rir_limit(u32 reg)
|
|
|
|
{
|
|
|
|
return ((u64)GET_BITFIELD(reg, 1, 10) << 29) | 0x1fffffff;
|
|
|
|
}
|
|
|
|
|
2015-12-03 10:48:52 +01:00
|
|
|
static u64 sad_limit(u32 reg)
|
|
|
|
{
|
|
|
|
return (GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u32 interleave_mode(u32 reg)
|
|
|
|
{
|
|
|
|
return GET_BITFIELD(reg, 1, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
char *show_interleave_mode(u32 reg)
|
|
|
|
{
|
|
|
|
return interleave_mode(reg) ? "8:6" : "[8:6]XOR[18:16]";
|
|
|
|
}
|
|
|
|
|
|
|
|
static u32 dram_attr(u32 reg)
|
|
|
|
{
|
|
|
|
return GET_BITFIELD(reg, 2, 3);
|
|
|
|
}
|
|
|
|
|
2014-06-02 15:15:22 -03:00
|
|
|
static enum mem_type get_memory_type(struct sbridge_pvt *pvt)
|
|
|
|
{
|
|
|
|
u32 reg;
|
|
|
|
enum mem_type mtype;
|
|
|
|
|
|
|
|
if (pvt->pci_ddrio) {
|
|
|
|
pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr,
|
|
|
|
®);
|
|
|
|
if (GET_BITFIELD(reg, 11, 11))
|
|
|
|
/* FIXME: Can also be LRDIMM */
|
|
|
|
mtype = MEM_RDDR3;
|
|
|
|
else
|
|
|
|
mtype = MEM_DDR3;
|
|
|
|
} else
|
|
|
|
mtype = MEM_UNKNOWN;
|
|
|
|
|
|
|
|
return mtype;
|
|
|
|
}
|
|
|
|
|
2014-06-20 10:27:54 -03:00
|
|
|
static enum mem_type haswell_get_memory_type(struct sbridge_pvt *pvt)
|
|
|
|
{
|
|
|
|
u32 reg;
|
|
|
|
bool registered = false;
|
|
|
|
enum mem_type mtype = MEM_UNKNOWN;
|
|
|
|
|
|
|
|
if (!pvt->pci_ddrio)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
pci_read_config_dword(pvt->pci_ddrio,
|
|
|
|
HASWELL_DDRCRCLKCONTROLS, ®);
|
|
|
|
/* Is_Rdimm */
|
|
|
|
if (GET_BITFIELD(reg, 16, 16))
|
|
|
|
registered = true;
|
|
|
|
|
|
|
|
pci_read_config_dword(pvt->pci_ta, MCMTR, ®);
|
|
|
|
if (GET_BITFIELD(reg, 14, 14)) {
|
|
|
|
if (registered)
|
|
|
|
mtype = MEM_RDDR4;
|
|
|
|
else
|
|
|
|
mtype = MEM_DDR4;
|
|
|
|
} else {
|
|
|
|
if (registered)
|
|
|
|
mtype = MEM_RDDR3;
|
|
|
|
else
|
|
|
|
mtype = MEM_DDR3;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
return mtype;
|
|
|
|
}
|
|
|
|
|
2015-06-12 15:08:17 -04:00
|
|
|
static enum dev_type sbridge_get_width(struct sbridge_pvt *pvt, u32 mtr)
|
|
|
|
{
|
|
|
|
/* there's no way to figure out */
|
|
|
|
return DEV_UNKNOWN;
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum dev_type __ibridge_get_width(u32 mtr)
|
|
|
|
{
|
|
|
|
enum dev_type type;
|
|
|
|
|
|
|
|
switch (mtr) {
|
|
|
|
case 3:
|
|
|
|
type = DEV_UNKNOWN;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
type = DEV_X16;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
type = DEV_X8;
|
|
|
|
break;
|
|
|
|
case 0:
|
|
|
|
type = DEV_X4;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return type;
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum dev_type ibridge_get_width(struct sbridge_pvt *pvt, u32 mtr)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* ddr3_width on the documentation but also valid for DDR4 on
|
|
|
|
* Haswell
|
|
|
|
*/
|
|
|
|
return __ibridge_get_width(GET_BITFIELD(mtr, 7, 8));
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum dev_type broadwell_get_width(struct sbridge_pvt *pvt, u32 mtr)
|
|
|
|
{
|
|
|
|
/* ddr3_width on the documentation but also valid for DDR4 */
|
|
|
|
return __ibridge_get_width(GET_BITFIELD(mtr, 8, 9));
|
|
|
|
}
|
|
|
|
|
2014-06-02 15:15:23 -03:00
|
|
|
static u8 get_node_id(struct sbridge_pvt *pvt)
|
|
|
|
{
|
|
|
|
u32 reg;
|
|
|
|
pci_read_config_dword(pvt->pci_br0, SAD_CONTROL, ®);
|
|
|
|
return GET_BITFIELD(reg, 0, 2);
|
|
|
|
}
|
|
|
|
|
2014-06-20 10:27:54 -03:00
|
|
|
static u8 haswell_get_node_id(struct sbridge_pvt *pvt)
|
|
|
|
{
|
|
|
|
u32 reg;
|
|
|
|
|
|
|
|
pci_read_config_dword(pvt->pci_sad1, SAD_CONTROL, ®);
|
|
|
|
return GET_BITFIELD(reg, 0, 3);
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 haswell_get_tolm(struct sbridge_pvt *pvt)
|
|
|
|
{
|
|
|
|
u32 reg;
|
|
|
|
|
sb_edac: Fix discovery of top-of-low-memory for Haswell
Haswell moved the TOLM/TOHM registers to a different device and offset.
The sb_edac driver accounted for the change of device, but not for the
new offset. There was also a typo in the constant to fill in the low
26 bits (was 0x1ffffff, should be 0x3ffffff).
This resulted in a bogus value for the top of low memory:
EDAC DEBUG: get_memory_layout: TOLM: 0.032 GB (0x0000000001ffffff)
which would result in EDAC refusing to translate addresses for
errors above the bogus value and below 4GB:
sbridge MC3: HANDLING MCE MEMORY ERROR
sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
sbridge MC3: TSC 0
sbridge MC3: ADDR 2000000
sbridge MC3: MISC 523eac86
sbridge MC3: PROCESSOR 0:306f3 TIME 1414600951 SOCKET 0 APIC 0
MC3: 1 CE Error at TOLM area, on addr 0x02000000 on any memory ( page:0x0 offset:0x0 grain:32 syndrome:0x0)
With the fix we see the correct TOLM value:
DEBUG: get_memory_layout: TOLM: 2.048 GB (0x000000007fffffff)
and we decode address 2000000 correctly:
sbridge MC3: HANDLING MCE MEMORY ERROR
sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
sbridge MC3: TSC 0
sbridge MC3: ADDR 2000000
sbridge MC3: MISC 523e1086
sbridge MC3: PROCESSOR 0:306f3 TIME 1414601319 SOCKET 0 APIC 0
DEBUG: get_memory_error_data: SAD interleave package: 0 = CPU socket 0, HA 0, shiftup: 0
DEBUG: get_memory_error_data: TAD#0: address 0x0000000002000000 < 0x000000007fffffff, socket interleave 1, channel interleave 4 (offset 0x00000000), index 0, base ch: 0, ch mask: 0x01
DEBUG: get_memory_error_data: RIR#0, limit: 4.095 GB (0x00000000ffffffff), way: 1
DEBUG: get_memory_error_data: RIR#0: channel address 0x00200000 < 0xffffffff, RIR interleave 0, index 0
DEBUG: sbridge_mce_output_error: area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0
MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x2000 offset:0x0 grain:32 syndrome:0x0 - area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2014-10-29 10:36:50 -07:00
|
|
|
pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOLM, ®);
|
|
|
|
return (GET_BITFIELD(reg, 26, 31) << 26) | 0x3ffffff;
|
2014-06-20 10:27:54 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
static u64 haswell_get_tohm(struct sbridge_pvt *pvt)
|
|
|
|
{
|
|
|
|
u64 rc;
|
|
|
|
u32 reg;
|
|
|
|
|
|
|
|
pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOHM_0, ®);
|
|
|
|
rc = GET_BITFIELD(reg, 26, 31);
|
|
|
|
pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOHM_1, ®);
|
|
|
|
rc = ((reg << 6) | rc) << 26;
|
|
|
|
|
|
|
|
return rc | 0x1ffffff;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 haswell_rir_limit(u32 reg)
|
|
|
|
{
|
|
|
|
return (((u64)GET_BITFIELD(reg, 1, 11) + 1) << 29) - 1;
|
|
|
|
}
|
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
static inline u8 sad_pkg_socket(u8 pkg)
|
|
|
|
{
|
|
|
|
/* on Ivy Bridge, nodeID is SASS, where A is HA and S is node id */
|
2014-06-02 15:15:27 -03:00
|
|
|
return ((pkg >> 3) << 2) | (pkg & 0x3);
|
2013-10-30 13:27:06 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline u8 sad_pkg_ha(u8 pkg)
|
|
|
|
{
|
|
|
|
return (pkg >> 2) & 0x1;
|
|
|
|
}
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
/****************************************************************************
|
|
|
|
Memory check routines
|
|
|
|
****************************************************************************/
|
2014-06-02 15:15:25 -03:00
|
|
|
static struct pci_dev *get_pdev_same_bus(u8 bus, u32 id)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
2014-06-02 15:15:25 -03:00
|
|
|
struct pci_dev *pdev = NULL;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2014-06-02 15:15:25 -03:00
|
|
|
do {
|
|
|
|
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, pdev);
|
|
|
|
if (pdev && pdev->bus->number == bus)
|
|
|
|
break;
|
|
|
|
} while (pdev);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2014-06-02 15:15:25 -03:00
|
|
|
return pdev;
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2012-04-16 15:12:22 -03:00
|
|
|
* check_if_ecc_is_active() - Checks if ECC is active
|
2014-06-20 10:27:54 -03:00
|
|
|
* @bus: Device bus
|
|
|
|
* @type: Memory controller type
|
|
|
|
* returns: 0 in case ECC is active, -ENODEV if it can't be determined or
|
|
|
|
* disabled
|
2011-10-20 19:18:01 -02:00
|
|
|
*/
|
2014-06-02 15:15:25 -03:00
|
|
|
static int check_if_ecc_is_active(const u8 bus, enum type type)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
struct pci_dev *pdev = NULL;
|
2014-06-02 15:15:25 -03:00
|
|
|
u32 mcmtr, id;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2014-12-02 09:27:30 -08:00
|
|
|
switch (type) {
|
|
|
|
case IVY_BRIDGE:
|
2014-06-02 15:15:25 -03:00
|
|
|
id = PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA;
|
2014-12-02 09:27:30 -08:00
|
|
|
break;
|
|
|
|
case HASWELL:
|
2014-06-20 10:27:54 -03:00
|
|
|
id = PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA;
|
2014-12-02 09:27:30 -08:00
|
|
|
break;
|
|
|
|
case SANDY_BRIDGE:
|
2014-06-02 15:15:25 -03:00
|
|
|
id = PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA;
|
2014-12-02 09:27:30 -08:00
|
|
|
break;
|
|
|
|
case BROADWELL:
|
|
|
|
id = PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
2014-06-02 15:15:25 -03:00
|
|
|
|
|
|
|
pdev = get_pdev_same_bus(bus, id);
|
2011-10-20 19:18:01 -02:00
|
|
|
if (!pdev) {
|
|
|
|
sbridge_printk(KERN_ERR, "Couldn't find PCI device "
|
2014-06-02 15:15:25 -03:00
|
|
|
"%04x:%04x! on bus %02d\n",
|
|
|
|
PCI_VENDOR_ID_INTEL, id, bus);
|
2011-10-20 19:18:01 -02:00
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
pci_read_config_dword(pdev, MCMTR, &mcmtr);
|
|
|
|
if (!IS_ECC_ENABLED(mcmtr)) {
|
|
|
|
sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-01-27 18:38:08 -03:00
|
|
|
static int get_dimm_config(struct mem_ctl_info *mci)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
struct sbridge_pvt *pvt = mci->pvt_info;
|
2012-04-16 15:12:22 -03:00
|
|
|
struct dimm_info *dimm;
|
sb_edac: Avoid overflow errors at memory size calculation
Sandy bridge EDAC is calculating the memory size with overflow.
Basically, the size field and the integer calculation is using 32 bits.
More bits are needed, when the DIMM memories have high density.
The net result is that memories are improperly reported there, when
high-density DIMMs are used:
EDAC DEBUG: in drivers/edac/sb_edac.c, line at 591: mc#0: channel 0, dimm 0, -16384 Mb (-4194304 pages) bank: 8, rank: 2, row: 0x10000, col: 0x800
EDAC DEBUG: in drivers/edac/sb_edac.c, line at 591: mc#0: channel 1, dimm 0, -16384 Mb (-4194304 pages) bank: 8, rank: 2, row: 0x10000, col: 0x800
As the number of pages value is handled at the EDAC core as unsigned
ints, the driver shows the 16 GB memories at sysfs interface as 16760832
MB! The fix is simple: calculate the number of pages as unsigned 64-bits
integer.
After the patch, the memory size (16 GB) is properly detected:
EDAC DEBUG: in drivers/edac/sb_edac.c, line at 592: mc#0: channel 0, dimm 0, 16384 Mb (4194304 pages) bank: 8, rank: 2, row: 0x10000, col: 0x800
EDAC DEBUG: in drivers/edac/sb_edac.c, line at 592: mc#0: channel 1, dimm 0, 16384 Mb (4194304 pages) bank: 8, rank: 2, row: 0x10000, col: 0x800
Cc: stable@kernel.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-09-20 12:09:30 -03:00
|
|
|
unsigned i, j, banks, ranks, rows, cols, npages;
|
|
|
|
u64 size;
|
2011-10-20 19:18:01 -02:00
|
|
|
u32 reg;
|
|
|
|
enum edac_type mode;
|
2011-10-18 11:02:58 -02:00
|
|
|
enum mem_type mtype;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2014-12-02 09:27:30 -08:00
|
|
|
if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL)
|
2014-06-20 10:27:54 -03:00
|
|
|
pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, ®);
|
|
|
|
else
|
|
|
|
pci_read_config_dword(pvt->pci_br0, SAD_TARGET, ®);
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
pvt->sbridge_dev->source_id = SOURCE_ID(reg);
|
|
|
|
|
2014-06-02 15:15:23 -03:00
|
|
|
pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt);
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
|
|
|
|
pvt->sbridge_dev->mc,
|
|
|
|
pvt->sbridge_dev->node_id,
|
|
|
|
pvt->sbridge_dev->source_id);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
pci_read_config_dword(pvt->pci_ras, RASENABLES, ®);
|
|
|
|
if (IS_MIRROR_ENABLED(reg)) {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "Memory mirror is enabled\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
pvt->is_mirrored = true;
|
|
|
|
} else {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "Memory mirror is disabled\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
pvt->is_mirrored = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
|
|
|
|
if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "Lockstep is enabled\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
mode = EDAC_S8ECD8ED;
|
|
|
|
pvt->is_lockstep = true;
|
|
|
|
} else {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "Lockstep is disabled\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
mode = EDAC_S4ECD4ED;
|
|
|
|
pvt->is_lockstep = false;
|
|
|
|
}
|
|
|
|
if (IS_CLOSE_PG(pvt->info.mcmtr)) {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "address map is on closed page mode\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
pvt->is_close_pg = true;
|
|
|
|
} else {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "address map is on open page mode\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
pvt->is_close_pg = false;
|
|
|
|
}
|
|
|
|
|
2014-06-02 15:15:22 -03:00
|
|
|
mtype = pvt->info.get_memory_type(pvt);
|
2014-06-20 10:27:54 -03:00
|
|
|
if (mtype == MEM_RDDR3 || mtype == MEM_RDDR4)
|
2014-06-02 15:15:22 -03:00
|
|
|
edac_dbg(0, "Memory is registered\n");
|
|
|
|
else if (mtype == MEM_UNKNOWN)
|
2013-03-28 09:59:15 -07:00
|
|
|
edac_dbg(0, "Cannot determine memory type\n");
|
2014-06-02 15:15:22 -03:00
|
|
|
else
|
|
|
|
edac_dbg(0, "Memory is unregistered\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2014-12-02 09:41:58 -08:00
|
|
|
if (mtype == MEM_DDR4 || mtype == MEM_RDDR4)
|
2014-06-20 10:27:54 -03:00
|
|
|
banks = 16;
|
|
|
|
else
|
|
|
|
banks = 8;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
for (i = 0; i < NUM_CHANNELS; i++) {
|
|
|
|
u32 mtr;
|
|
|
|
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
if (!pvt->pci_tad[i])
|
|
|
|
continue;
|
2011-10-20 19:18:01 -02:00
|
|
|
for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
|
2012-04-16 15:12:22 -03:00
|
|
|
dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
|
|
|
|
i, j, 0);
|
2011-10-20 19:18:01 -02:00
|
|
|
pci_read_config_dword(pvt->pci_tad[i],
|
|
|
|
mtr_regs[j], &mtr);
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
|
2011-10-20 19:18:01 -02:00
|
|
|
if (IS_DIMM_PRESENT(mtr)) {
|
|
|
|
pvt->channel[i].dimms++;
|
|
|
|
|
2014-06-20 10:27:54 -03:00
|
|
|
ranks = numrank(pvt->info.type, mtr);
|
2011-10-20 19:18:01 -02:00
|
|
|
rows = numrow(mtr);
|
|
|
|
cols = numcol(mtr);
|
|
|
|
|
sb_edac: Avoid overflow errors at memory size calculation
Sandy bridge EDAC is calculating the memory size with overflow.
Basically, the size field and the integer calculation is using 32 bits.
More bits are needed, when the DIMM memories have high density.
The net result is that memories are improperly reported there, when
high-density DIMMs are used:
EDAC DEBUG: in drivers/edac/sb_edac.c, line at 591: mc#0: channel 0, dimm 0, -16384 Mb (-4194304 pages) bank: 8, rank: 2, row: 0x10000, col: 0x800
EDAC DEBUG: in drivers/edac/sb_edac.c, line at 591: mc#0: channel 1, dimm 0, -16384 Mb (-4194304 pages) bank: 8, rank: 2, row: 0x10000, col: 0x800
As the number of pages value is handled at the EDAC core as unsigned
ints, the driver shows the 16 GB memories at sysfs interface as 16760832
MB! The fix is simple: calculate the number of pages as unsigned 64-bits
integer.
After the patch, the memory size (16 GB) is properly detected:
EDAC DEBUG: in drivers/edac/sb_edac.c, line at 592: mc#0: channel 0, dimm 0, 16384 Mb (4194304 pages) bank: 8, rank: 2, row: 0x10000, col: 0x800
EDAC DEBUG: in drivers/edac/sb_edac.c, line at 592: mc#0: channel 1, dimm 0, 16384 Mb (4194304 pages) bank: 8, rank: 2, row: 0x10000, col: 0x800
Cc: stable@kernel.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-09-20 12:09:30 -03:00
|
|
|
size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
|
2011-10-20 19:18:01 -02:00
|
|
|
npages = MiB_TO_PAGES(size);
|
|
|
|
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
|
|
|
|
pvt->sbridge_dev->mc, i/4, i%4, j,
|
2012-04-29 17:08:39 -03:00
|
|
|
size, npages,
|
|
|
|
banks, ranks, rows, cols);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2012-01-28 09:09:38 -03:00
|
|
|
dimm->nr_pages = npages;
|
2012-01-27 18:38:08 -03:00
|
|
|
dimm->grain = 32;
|
2015-06-12 15:08:17 -04:00
|
|
|
dimm->dtype = pvt->info.get_width(pvt, mtr);
|
2012-01-27 18:38:08 -03:00
|
|
|
dimm->mtype = mtype;
|
|
|
|
dimm->edac_mode = mode;
|
|
|
|
snprintf(dimm->label, sizeof(dimm->label),
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
"CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
|
|
|
|
pvt->sbridge_dev->source_id, i/4, i%4, j);
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void get_memory_layout(const struct mem_ctl_info *mci)
|
|
|
|
{
|
|
|
|
struct sbridge_pvt *pvt = mci->pvt_info;
|
|
|
|
int i, j, k, n_sads, n_tads, sad_interl;
|
|
|
|
u32 reg;
|
|
|
|
u64 limit, prv = 0;
|
|
|
|
u64 tmp_mb;
|
2014-11-18 14:51:09 +01:00
|
|
|
u32 gb, mb;
|
2011-10-20 19:18:01 -02:00
|
|
|
u32 rir_way;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 1) Get TOLM/TOHM ranges
|
|
|
|
*/
|
|
|
|
|
2013-10-30 13:26:57 -03:00
|
|
|
pvt->tolm = pvt->info.get_tolm(pvt);
|
2011-10-20 19:18:01 -02:00
|
|
|
tmp_mb = (1 + pvt->tolm) >> 20;
|
|
|
|
|
2014-11-18 14:51:09 +01:00
|
|
|
gb = div_u64_rem(tmp_mb, 1024, &mb);
|
|
|
|
edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n",
|
|
|
|
gb, (mb*1000)/1024, (u64)pvt->tolm);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* Address range is already 45:25 */
|
2013-10-30 13:26:59 -03:00
|
|
|
pvt->tohm = pvt->info.get_tohm(pvt);
|
2011-10-20 19:18:01 -02:00
|
|
|
tmp_mb = (1 + pvt->tohm) >> 20;
|
|
|
|
|
2014-11-18 14:51:09 +01:00
|
|
|
gb = div_u64_rem(tmp_mb, 1024, &mb);
|
|
|
|
edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n",
|
|
|
|
gb, (mb*1000)/1024, (u64)pvt->tohm);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 2) Get SAD range and SAD Interleave list
|
|
|
|
* TAD registers contain the interleave wayness. However, it
|
|
|
|
* seems simpler to just discover it indirectly, with the
|
|
|
|
* algorithm bellow.
|
|
|
|
*/
|
|
|
|
prv = 0;
|
2013-10-30 13:27:00 -03:00
|
|
|
for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) {
|
2011-10-20 19:18:01 -02:00
|
|
|
/* SAD_LIMIT Address range is 45:26 */
|
2013-10-30 13:27:00 -03:00
|
|
|
pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads],
|
2011-10-20 19:18:01 -02:00
|
|
|
®);
|
2015-12-03 10:48:52 +01:00
|
|
|
limit = pvt->info.sad_limit(reg);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
if (!DRAM_RULE_ENABLE(reg))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (limit <= prv)
|
|
|
|
break;
|
|
|
|
|
|
|
|
tmp_mb = (limit + 1) >> 20;
|
2014-11-18 14:51:09 +01:00
|
|
|
gb = div_u64_rem(tmp_mb, 1024, &mb);
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n",
|
|
|
|
n_sads,
|
2015-12-03 10:48:52 +01:00
|
|
|
show_dram_attr(pvt->info.dram_attr(reg)),
|
2014-11-18 14:51:09 +01:00
|
|
|
gb, (mb*1000)/1024,
|
2012-04-29 17:08:39 -03:00
|
|
|
((u64)tmp_mb) << 20L,
|
2015-12-03 10:48:52 +01:00
|
|
|
pvt->info.show_interleave_mode(reg),
|
2012-04-29 17:08:39 -03:00
|
|
|
reg);
|
2011-10-20 19:18:01 -02:00
|
|
|
prv = limit;
|
|
|
|
|
2013-10-30 13:27:01 -03:00
|
|
|
pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads],
|
2011-10-20 19:18:01 -02:00
|
|
|
®);
|
2013-10-30 13:27:02 -03:00
|
|
|
sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0);
|
2011-10-20 19:18:01 -02:00
|
|
|
for (j = 0; j < 8; j++) {
|
2013-10-30 13:27:02 -03:00
|
|
|
u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, j);
|
|
|
|
if (j > 0 && sad_interl == pkg)
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
|
|
|
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "SAD#%d, interleave #%d: %d\n",
|
2013-10-30 13:27:02 -03:00
|
|
|
n_sads, j, pkg);
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 3) Get TAD range
|
|
|
|
*/
|
|
|
|
prv = 0;
|
|
|
|
for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
|
|
|
|
pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
|
|
|
|
®);
|
|
|
|
limit = TAD_LIMIT(reg);
|
|
|
|
if (limit <= prv)
|
|
|
|
break;
|
|
|
|
tmp_mb = (limit + 1) >> 20;
|
|
|
|
|
2014-11-18 14:51:09 +01:00
|
|
|
gb = div_u64_rem(tmp_mb, 1024, &mb);
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
|
2014-11-18 14:51:09 +01:00
|
|
|
n_tads, gb, (mb*1000)/1024,
|
2012-04-29 17:08:39 -03:00
|
|
|
((u64)tmp_mb) << 20L,
|
|
|
|
(u32)TAD_SOCK(reg),
|
|
|
|
(u32)TAD_CH(reg),
|
|
|
|
(u32)TAD_TGT0(reg),
|
|
|
|
(u32)TAD_TGT1(reg),
|
|
|
|
(u32)TAD_TGT2(reg),
|
|
|
|
(u32)TAD_TGT3(reg),
|
|
|
|
reg);
|
2012-02-06 04:11:01 -03:00
|
|
|
prv = limit;
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 4) Get TAD offsets, per each channel
|
|
|
|
*/
|
|
|
|
for (i = 0; i < NUM_CHANNELS; i++) {
|
|
|
|
if (!pvt->channel[i].dimms)
|
|
|
|
continue;
|
|
|
|
for (j = 0; j < n_tads; j++) {
|
|
|
|
pci_read_config_dword(pvt->pci_tad[i],
|
|
|
|
tad_ch_nilv_offset[j],
|
|
|
|
®);
|
|
|
|
tmp_mb = TAD_OFFSET(reg) >> 20;
|
2014-11-18 14:51:09 +01:00
|
|
|
gb = div_u64_rem(tmp_mb, 1024, &mb);
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
|
|
|
|
i, j,
|
2014-11-18 14:51:09 +01:00
|
|
|
gb, (mb*1000)/1024,
|
2012-04-29 17:08:39 -03:00
|
|
|
((u64)tmp_mb) << 20L,
|
|
|
|
reg);
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 6) Get RIR Wayness/Limit, per each channel
|
|
|
|
*/
|
|
|
|
for (i = 0; i < NUM_CHANNELS; i++) {
|
|
|
|
if (!pvt->channel[i].dimms)
|
|
|
|
continue;
|
|
|
|
for (j = 0; j < MAX_RIR_RANGES; j++) {
|
|
|
|
pci_read_config_dword(pvt->pci_tad[i],
|
|
|
|
rir_way_limit[j],
|
|
|
|
®);
|
|
|
|
|
|
|
|
if (!IS_RIR_VALID(reg))
|
|
|
|
continue;
|
|
|
|
|
2014-06-02 15:15:24 -03:00
|
|
|
tmp_mb = pvt->info.rir_limit(reg) >> 20;
|
2011-10-20 19:18:01 -02:00
|
|
|
rir_way = 1 << RIR_WAY(reg);
|
2014-11-18 14:51:09 +01:00
|
|
|
gb = div_u64_rem(tmp_mb, 1024, &mb);
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
|
|
|
|
i, j,
|
2014-11-18 14:51:09 +01:00
|
|
|
gb, (mb*1000)/1024,
|
2012-04-29 17:08:39 -03:00
|
|
|
((u64)tmp_mb) << 20L,
|
|
|
|
rir_way,
|
|
|
|
reg);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
for (k = 0; k < rir_way; k++) {
|
|
|
|
pci_read_config_dword(pvt->pci_tad[i],
|
|
|
|
rir_offset[j][k],
|
|
|
|
®);
|
|
|
|
tmp_mb = RIR_OFFSET(reg) << 6;
|
|
|
|
|
2014-11-18 14:51:09 +01:00
|
|
|
gb = div_u64_rem(tmp_mb, 1024, &mb);
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
|
|
|
|
i, j, k,
|
2014-11-18 14:51:09 +01:00
|
|
|
gb, (mb*1000)/1024,
|
2012-04-29 17:08:39 -03:00
|
|
|
((u64)tmp_mb) << 20L,
|
|
|
|
(u32)RIR_RNK_TGT(reg),
|
|
|
|
reg);
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-14 19:32:09 +05:30
|
|
|
static struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
struct sbridge_dev *sbridge_dev;
|
|
|
|
|
|
|
|
list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
|
|
|
|
if (sbridge_dev->node_id == node_id)
|
|
|
|
return sbridge_dev->mci;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int get_memory_error_data(struct mem_ctl_info *mci,
|
|
|
|
u64 addr,
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
u8 *socket, u8 *ha,
|
2011-10-20 19:18:01 -02:00
|
|
|
long *channel_mask,
|
|
|
|
u8 *rank,
|
2012-05-11 11:41:45 -03:00
|
|
|
char **area_type, char *msg)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
struct mem_ctl_info *new_mci;
|
|
|
|
struct sbridge_pvt *pvt = mci->pvt_info;
|
2013-10-30 13:27:06 -03:00
|
|
|
struct pci_dev *pci_ha;
|
2014-06-26 15:35:14 -03:00
|
|
|
int n_rir, n_sads, n_tads, sad_way, sck_xch;
|
2011-10-20 19:18:01 -02:00
|
|
|
int sad_interl, idx, base_ch;
|
2014-06-20 10:27:54 -03:00
|
|
|
int interleave_mode, shiftup = 0;
|
2013-10-30 13:27:01 -03:00
|
|
|
unsigned sad_interleave[pvt->info.max_interleave];
|
2014-06-20 10:27:54 -03:00
|
|
|
u32 reg, dram_rule;
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
u8 ch_way, sck_way, pkg, sad_ha = 0, ch_add = 0;
|
2011-10-20 19:18:01 -02:00
|
|
|
u32 tad_offset;
|
|
|
|
u32 rir_way;
|
2014-11-18 14:51:09 +01:00
|
|
|
u32 mb, gb;
|
2013-11-21 09:08:03 -05:00
|
|
|
u64 ch_addr, offset, limit = 0, prv = 0;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 0) Check if the address is at special memory ranges
|
|
|
|
* The check bellow is probably enough to fill all cases where
|
|
|
|
* the error is not inside a memory, except for the legacy
|
|
|
|
* range (e. g. VGA addresses). It is unlikely, however, that the
|
|
|
|
* memory controller would generate an error on that range.
|
|
|
|
*/
|
2011-11-07 18:26:53 -03:00
|
|
|
if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
|
2011-10-20 19:18:01 -02:00
|
|
|
sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (addr >= (u64)pvt->tohm) {
|
|
|
|
sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 1) Get socket
|
|
|
|
*/
|
2013-10-30 13:27:00 -03:00
|
|
|
for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) {
|
|
|
|
pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads],
|
2011-10-20 19:18:01 -02:00
|
|
|
®);
|
|
|
|
|
|
|
|
if (!DRAM_RULE_ENABLE(reg))
|
|
|
|
continue;
|
|
|
|
|
2015-12-03 10:48:52 +01:00
|
|
|
limit = pvt->info.sad_limit(reg);
|
2011-10-20 19:18:01 -02:00
|
|
|
if (limit <= prv) {
|
|
|
|
sprintf(msg, "Can't discover the memory socket");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (addr <= limit)
|
|
|
|
break;
|
|
|
|
prv = limit;
|
|
|
|
}
|
2013-10-30 13:27:00 -03:00
|
|
|
if (n_sads == pvt->info.max_sad) {
|
2011-10-20 19:18:01 -02:00
|
|
|
sprintf(msg, "Can't discover the memory socket");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2014-06-20 10:27:54 -03:00
|
|
|
dram_rule = reg;
|
2015-12-03 10:48:52 +01:00
|
|
|
*area_type = show_dram_attr(pvt->info.dram_attr(dram_rule));
|
|
|
|
interleave_mode = pvt->info.interleave_mode(dram_rule);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2013-10-30 13:27:01 -03:00
|
|
|
pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads],
|
2011-10-20 19:18:01 -02:00
|
|
|
®);
|
2013-10-30 13:27:06 -03:00
|
|
|
|
|
|
|
if (pvt->info.type == SANDY_BRIDGE) {
|
|
|
|
sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0);
|
|
|
|
for (sad_way = 0; sad_way < 8; sad_way++) {
|
|
|
|
u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, sad_way);
|
|
|
|
if (sad_way > 0 && sad_interl == pkg)
|
|
|
|
break;
|
|
|
|
sad_interleave[sad_way] = pkg;
|
|
|
|
edac_dbg(0, "SAD interleave #%d: %d\n",
|
|
|
|
sad_way, sad_interleave[sad_way]);
|
|
|
|
}
|
|
|
|
edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n",
|
|
|
|
pvt->sbridge_dev->mc,
|
|
|
|
n_sads,
|
|
|
|
addr,
|
|
|
|
limit,
|
|
|
|
sad_way + 7,
|
|
|
|
!interleave_mode ? "" : "XOR[18:16]");
|
|
|
|
if (interleave_mode)
|
|
|
|
idx = ((addr >> 6) ^ (addr >> 16)) & 7;
|
|
|
|
else
|
|
|
|
idx = (addr >> 6) & 7;
|
|
|
|
switch (sad_way) {
|
|
|
|
case 1:
|
|
|
|
idx = 0;
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
2013-10-30 13:27:06 -03:00
|
|
|
case 2:
|
|
|
|
idx = idx & 1;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
idx = idx & 3;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
sprintf(msg, "Can't discover socket interleave");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
*socket = sad_interleave[idx];
|
|
|
|
edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n",
|
|
|
|
idx, sad_way, *socket);
|
2014-12-02 09:27:30 -08:00
|
|
|
} else if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
|
2014-06-20 10:27:54 -03:00
|
|
|
int bits, a7mode = A7MODE(dram_rule);
|
|
|
|
|
|
|
|
if (a7mode) {
|
|
|
|
/* A7 mode swaps P9 with P6 */
|
|
|
|
bits = GET_BITFIELD(addr, 7, 8) << 1;
|
|
|
|
bits |= GET_BITFIELD(addr, 9, 9);
|
|
|
|
} else
|
sb_edac: Fix a typo and a thinko in address handling for Haswell
typo: "a7mode" chooses whether to use bits {8, 7, 9} or {8, 7, 6}
in the algorithm to spread access between memory resources. But
the non-a7mode path was incorrectly using GET_BITFIELD(addr, 7, 9)
and so picking bits {9, 8, 7}
thinko: BIT(1) of the dram_rule registers chooses whether to just
use the {8, 7, 6} (or {8, 7, 9}) bits mentioned above as they are,
or to XOR them with bits {18, 17, 16} but the code inverted the
test. We need the additional XOR when dram_rule{1} == 0.
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:39:06 -03:00
|
|
|
bits = GET_BITFIELD(addr, 6, 8);
|
2014-06-20 10:27:54 -03:00
|
|
|
|
sb_edac: Fix a typo and a thinko in address handling for Haswell
typo: "a7mode" chooses whether to use bits {8, 7, 9} or {8, 7, 6}
in the algorithm to spread access between memory resources. But
the non-a7mode path was incorrectly using GET_BITFIELD(addr, 7, 9)
and so picking bits {9, 8, 7}
thinko: BIT(1) of the dram_rule registers chooses whether to just
use the {8, 7, 6} (or {8, 7, 9}) bits mentioned above as they are,
or to XOR them with bits {18, 17, 16} but the code inverted the
test. We need the additional XOR when dram_rule{1} == 0.
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:39:06 -03:00
|
|
|
if (interleave_mode == 0) {
|
2014-06-20 10:27:54 -03:00
|
|
|
/* interleave mode will XOR {8,7,6} with {18,17,16} */
|
|
|
|
idx = GET_BITFIELD(addr, 16, 18);
|
|
|
|
idx ^= bits;
|
|
|
|
} else
|
|
|
|
idx = bits;
|
|
|
|
|
|
|
|
pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
|
|
|
|
*socket = sad_pkg_socket(pkg);
|
|
|
|
sad_ha = sad_pkg_ha(pkg);
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
if (sad_ha)
|
|
|
|
ch_add = 4;
|
2014-06-20 10:27:54 -03:00
|
|
|
|
|
|
|
if (a7mode) {
|
|
|
|
/* MCChanShiftUpEnable */
|
|
|
|
pci_read_config_dword(pvt->pci_ha0,
|
|
|
|
HASWELL_HASYSDEFEATURE2, ®);
|
|
|
|
shiftup = GET_BITFIELD(reg, 22, 22);
|
|
|
|
}
|
|
|
|
|
|
|
|
edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %i, shiftup: %i\n",
|
|
|
|
idx, *socket, sad_ha, shiftup);
|
2013-10-30 13:27:06 -03:00
|
|
|
} else {
|
|
|
|
/* Ivy Bridge's SAD mode doesn't support XOR interleave mode */
|
2011-10-20 19:18:01 -02:00
|
|
|
idx = (addr >> 6) & 7;
|
2013-10-30 13:27:06 -03:00
|
|
|
pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
|
|
|
|
*socket = sad_pkg_socket(pkg);
|
|
|
|
sad_ha = sad_pkg_ha(pkg);
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
if (sad_ha)
|
|
|
|
ch_add = 4;
|
2013-10-30 13:27:06 -03:00
|
|
|
edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n",
|
|
|
|
idx, *socket, sad_ha);
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
*ha = sad_ha;
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
/*
|
|
|
|
* Move to the proper node structure, in order to access the
|
|
|
|
* right PCI registers
|
|
|
|
*/
|
|
|
|
new_mci = get_mci_for_node_id(*socket);
|
|
|
|
if (!new_mci) {
|
|
|
|
sprintf(msg, "Struct for socket #%u wasn't initialized",
|
|
|
|
*socket);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
mci = new_mci;
|
|
|
|
pvt = mci->pvt_info;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 2) Get memory channel
|
|
|
|
*/
|
|
|
|
prv = 0;
|
2013-10-30 13:27:06 -03:00
|
|
|
if (pvt->info.type == SANDY_BRIDGE)
|
|
|
|
pci_ha = pvt->pci_ha0;
|
|
|
|
else {
|
|
|
|
if (sad_ha)
|
|
|
|
pci_ha = pvt->pci_ha1;
|
|
|
|
else
|
|
|
|
pci_ha = pvt->pci_ha0;
|
|
|
|
}
|
2011-10-20 19:18:01 -02:00
|
|
|
for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
|
2013-10-30 13:27:06 -03:00
|
|
|
pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], ®);
|
2011-10-20 19:18:01 -02:00
|
|
|
limit = TAD_LIMIT(reg);
|
|
|
|
if (limit <= prv) {
|
|
|
|
sprintf(msg, "Can't discover the memory channel");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (addr <= limit)
|
|
|
|
break;
|
|
|
|
prv = limit;
|
|
|
|
}
|
2013-10-30 13:27:06 -03:00
|
|
|
if (n_tads == MAX_TAD) {
|
|
|
|
sprintf(msg, "Can't discover the memory channel");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
ch_way = TAD_CH(reg) + 1;
|
|
|
|
sck_way = TAD_SOCK(reg) + 1;
|
|
|
|
|
|
|
|
if (ch_way == 3)
|
|
|
|
idx = addr >> 6;
|
|
|
|
else
|
2014-06-20 10:27:54 -03:00
|
|
|
idx = (addr >> (6 + sck_way + shiftup)) & 0x3;
|
2011-10-20 19:18:01 -02:00
|
|
|
idx = idx % ch_way;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ???
|
|
|
|
*/
|
|
|
|
switch (idx) {
|
|
|
|
case 0:
|
|
|
|
base_ch = TAD_TGT0(reg);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
base_ch = TAD_TGT1(reg);
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
base_ch = TAD_TGT2(reg);
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
base_ch = TAD_TGT3(reg);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
sprintf(msg, "Can't discover the TAD target");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
*channel_mask = 1 << base_ch;
|
|
|
|
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
|
2013-10-30 13:27:06 -03:00
|
|
|
tad_ch_nilv_offset[n_tads],
|
|
|
|
&tad_offset);
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
if (pvt->is_mirrored) {
|
|
|
|
*channel_mask |= 1 << ((base_ch + 2) % 4);
|
|
|
|
switch(ch_way) {
|
|
|
|
case 2:
|
|
|
|
case 4:
|
|
|
|
sck_xch = 1 << sck_way * (ch_way >> 1);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
sprintf(msg, "Invalid mirror set. Can't decode addr");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
sck_xch = (1 << sck_way) * ch_way;
|
|
|
|
|
|
|
|
if (pvt->is_lockstep)
|
|
|
|
*channel_mask |= 1 << ((base_ch + 1) % 4);
|
|
|
|
|
|
|
|
offset = TAD_OFFSET(tad_offset);
|
|
|
|
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n",
|
|
|
|
n_tads,
|
|
|
|
addr,
|
|
|
|
limit,
|
|
|
|
(u32)TAD_SOCK(reg),
|
|
|
|
ch_way,
|
|
|
|
offset,
|
|
|
|
idx,
|
|
|
|
base_ch,
|
|
|
|
*channel_mask);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* Calculate channel address */
|
|
|
|
/* Remove the TAD offset */
|
|
|
|
|
|
|
|
if (offset > addr) {
|
|
|
|
sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
|
|
|
|
offset, addr);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
addr -= offset;
|
|
|
|
/* Store the low bits [0:6] of the addr */
|
|
|
|
ch_addr = addr & 0x7f;
|
|
|
|
/* Remove socket wayness and remove 6 bits */
|
|
|
|
addr >>= 6;
|
2011-11-07 18:26:53 -03:00
|
|
|
addr = div_u64(addr, sck_xch);
|
2011-10-20 19:18:01 -02:00
|
|
|
#if 0
|
|
|
|
/* Divide by channel way */
|
|
|
|
addr = addr / ch_way;
|
|
|
|
#endif
|
|
|
|
/* Recover the last 6 bits */
|
|
|
|
ch_addr |= addr << 6;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 3) Decode rank
|
|
|
|
*/
|
|
|
|
for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
|
2011-10-20 19:18:01 -02:00
|
|
|
rir_way_limit[n_rir],
|
|
|
|
®);
|
|
|
|
|
|
|
|
if (!IS_RIR_VALID(reg))
|
|
|
|
continue;
|
|
|
|
|
2014-06-02 15:15:24 -03:00
|
|
|
limit = pvt->info.rir_limit(reg);
|
2014-11-18 14:51:09 +01:00
|
|
|
gb = div_u64_rem(limit >> 20, 1024, &mb);
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
|
|
|
|
n_rir,
|
2014-11-18 14:51:09 +01:00
|
|
|
gb, (mb*1000)/1024,
|
2012-04-29 17:08:39 -03:00
|
|
|
limit,
|
|
|
|
1 << RIR_WAY(reg));
|
2011-10-20 19:18:01 -02:00
|
|
|
if (ch_addr <= limit)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (n_rir == MAX_RIR_RANGES) {
|
|
|
|
sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
|
|
|
|
ch_addr);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
rir_way = RIR_WAY(reg);
|
2014-06-20 10:27:54 -03:00
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
if (pvt->is_close_pg)
|
|
|
|
idx = (ch_addr >> 6);
|
|
|
|
else
|
|
|
|
idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */
|
|
|
|
idx %= 1 << rir_way;
|
|
|
|
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
|
2011-10-20 19:18:01 -02:00
|
|
|
rir_offset[n_rir][idx],
|
|
|
|
®);
|
|
|
|
*rank = RIR_RNK_TGT(reg);
|
|
|
|
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
|
|
|
|
n_rir,
|
|
|
|
ch_addr,
|
|
|
|
limit,
|
|
|
|
rir_way,
|
|
|
|
idx);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
Device initialization routines: put/get, init/exit
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sbridge_put_all_devices 'put' all the devices that we have
|
|
|
|
* reserved via 'get'
|
|
|
|
*/
|
|
|
|
static void sbridge_put_devices(struct sbridge_dev *sbridge_dev)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
for (i = 0; i < sbridge_dev->n_devs; i++) {
|
|
|
|
struct pci_dev *pdev = sbridge_dev->pdev[i];
|
|
|
|
if (!pdev)
|
|
|
|
continue;
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "Removing dev %02x:%02x.%d\n",
|
|
|
|
pdev->bus->number,
|
|
|
|
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
|
2011-10-20 19:18:01 -02:00
|
|
|
pci_dev_put(pdev);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sbridge_put_all_devices(void)
|
|
|
|
{
|
|
|
|
struct sbridge_dev *sbridge_dev, *tmp;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) {
|
|
|
|
sbridge_put_devices(sbridge_dev);
|
|
|
|
free_sbridge_dev(sbridge_dev);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sbridge_get_onedevice(struct pci_dev **prev,
|
|
|
|
u8 *num_mc,
|
|
|
|
const struct pci_id_table *table,
|
2015-12-03 10:48:53 +01:00
|
|
|
const unsigned devno,
|
|
|
|
const int multi_bus)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
struct sbridge_dev *sbridge_dev;
|
|
|
|
const struct pci_id_descr *dev_descr = &table->descr[devno];
|
|
|
|
struct pci_dev *pdev = NULL;
|
|
|
|
u8 bus = 0;
|
|
|
|
|
2014-02-17 13:10:23 +08:00
|
|
|
sbridge_printk(KERN_DEBUG,
|
2014-06-02 15:15:25 -03:00
|
|
|
"Seeking for: PCI ID %04x:%04x\n",
|
2011-10-20 19:18:01 -02:00
|
|
|
PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
|
|
|
|
|
|
|
|
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
|
|
|
|
dev_descr->dev_id, *prev);
|
|
|
|
|
|
|
|
if (!pdev) {
|
|
|
|
if (*prev) {
|
|
|
|
*prev = pdev;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dev_descr->optional)
|
|
|
|
return 0;
|
|
|
|
|
2014-06-02 15:15:25 -03:00
|
|
|
/* if the HA wasn't found */
|
2011-10-20 19:18:01 -02:00
|
|
|
if (devno == 0)
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
sbridge_printk(KERN_INFO,
|
2014-06-02 15:15:25 -03:00
|
|
|
"Device not found: %04x:%04x\n",
|
2011-10-20 19:18:01 -02:00
|
|
|
PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
|
|
|
|
|
|
|
|
/* End of list, leave */
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
bus = pdev->bus->number;
|
|
|
|
|
2015-12-03 10:48:53 +01:00
|
|
|
sbridge_dev = get_sbridge_dev(bus, multi_bus);
|
2011-10-20 19:18:01 -02:00
|
|
|
if (!sbridge_dev) {
|
|
|
|
sbridge_dev = alloc_sbridge_dev(bus, table);
|
|
|
|
if (!sbridge_dev) {
|
|
|
|
pci_dev_put(pdev);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
(*num_mc)++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sbridge_dev->pdev[devno]) {
|
|
|
|
sbridge_printk(KERN_ERR,
|
2014-06-02 15:15:25 -03:00
|
|
|
"Duplicated device for %04x:%04x\n",
|
2011-10-20 19:18:01 -02:00
|
|
|
PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
|
|
|
|
pci_dev_put(pdev);
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
sbridge_dev->pdev[devno] = pdev;
|
|
|
|
|
|
|
|
/* Be sure that the device is enabled */
|
|
|
|
if (unlikely(pci_enable_device(pdev) < 0)) {
|
|
|
|
sbridge_printk(KERN_ERR,
|
2014-06-02 15:15:25 -03:00
|
|
|
"Couldn't enable %04x:%04x\n",
|
2011-10-20 19:18:01 -02:00
|
|
|
PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
2014-06-02 15:15:25 -03:00
|
|
|
edac_dbg(0, "Detected %04x:%04x\n",
|
2012-04-29 17:08:39 -03:00
|
|
|
PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* As stated on drivers/pci/search.c, the reference count for
|
|
|
|
* @from is always decremented if it is not %NULL. So, as we need
|
|
|
|
* to get all devices up to null, we need to do a get for the device
|
|
|
|
*/
|
|
|
|
pci_dev_get(pdev);
|
|
|
|
|
|
|
|
*prev = pdev;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-10-30 13:27:03 -03:00
|
|
|
/*
|
|
|
|
* sbridge_get_all_devices - Find and perform 'get' operation on the MCH's
|
2014-06-02 15:15:25 -03:00
|
|
|
* devices we want to reference for this driver.
|
2013-10-30 13:27:03 -03:00
|
|
|
* @num_mc: pointer to the memory controllers count, to be incremented in case
|
2014-06-26 15:35:14 -03:00
|
|
|
* of success.
|
2013-10-30 13:27:03 -03:00
|
|
|
* @table: model specific table
|
2015-12-03 10:48:53 +01:00
|
|
|
* @allow_dups: allow for multiple devices to exist with the same device id
|
|
|
|
* (as implemented, this isn't expected to work correctly in the
|
|
|
|
* multi-socket case).
|
|
|
|
* @multi_bus: don't assume devices on different buses belong to different
|
|
|
|
* memory controllers.
|
2013-10-30 13:27:03 -03:00
|
|
|
*
|
|
|
|
* returns 0 in case of success or error code
|
|
|
|
*/
|
2015-12-03 10:48:53 +01:00
|
|
|
static int sbridge_get_all_devices_full(u8 *num_mc,
|
|
|
|
const struct pci_id_table *table,
|
|
|
|
int allow_dups,
|
|
|
|
int multi_bus)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
int i, rc;
|
|
|
|
struct pci_dev *pdev = NULL;
|
|
|
|
|
|
|
|
while (table && table->descr) {
|
|
|
|
for (i = 0; i < table->n_devs; i++) {
|
2015-12-03 10:48:53 +01:00
|
|
|
if (!allow_dups || i == 0 ||
|
|
|
|
table->descr[i].dev_id !=
|
|
|
|
table->descr[i-1].dev_id) {
|
|
|
|
pdev = NULL;
|
|
|
|
}
|
2011-10-20 19:18:01 -02:00
|
|
|
do {
|
|
|
|
rc = sbridge_get_onedevice(&pdev, num_mc,
|
2015-12-03 10:48:53 +01:00
|
|
|
table, i, multi_bus);
|
2011-10-20 19:18:01 -02:00
|
|
|
if (rc < 0) {
|
|
|
|
if (i == 0) {
|
|
|
|
i = table->n_devs;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
sbridge_put_all_devices();
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
2015-12-03 10:48:53 +01:00
|
|
|
} while (pdev && !allow_dups);
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
table++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-12-03 10:48:53 +01:00
|
|
|
#define sbridge_get_all_devices(num_mc, table) \
|
|
|
|
sbridge_get_all_devices_full(num_mc, table, 0, 0)
|
|
|
|
|
2013-10-30 13:27:04 -03:00
|
|
|
static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
|
|
|
|
struct sbridge_dev *sbridge_dev)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
struct sbridge_pvt *pvt = mci->pvt_info;
|
|
|
|
struct pci_dev *pdev;
|
2015-08-05 13:16:01 -05:00
|
|
|
u8 saw_chan_mask = 0;
|
2014-06-02 15:15:25 -03:00
|
|
|
int i;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
for (i = 0; i < sbridge_dev->n_devs; i++) {
|
|
|
|
pdev = sbridge_dev->pdev[i];
|
|
|
|
if (!pdev)
|
|
|
|
continue;
|
2014-06-02 15:15:25 -03:00
|
|
|
|
|
|
|
switch (pdev->device) {
|
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0:
|
|
|
|
pvt->pci_sad0 = pdev;
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
2014-06-02 15:15:25 -03:00
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1:
|
|
|
|
pvt->pci_sad1 = pdev;
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
2014-06-02 15:15:25 -03:00
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_BR:
|
|
|
|
pvt->pci_br0 = pdev;
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
2014-06-02 15:15:25 -03:00
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
|
|
|
|
pvt->pci_ha0 = pdev;
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
2014-06-02 15:15:25 -03:00
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA:
|
|
|
|
pvt->pci_ta = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS:
|
|
|
|
pvt->pci_ras = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0:
|
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1:
|
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2:
|
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3:
|
|
|
|
{
|
|
|
|
int id = pdev->device - PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0;
|
|
|
|
pvt->pci_tad[id] = pdev;
|
2015-08-05 13:16:01 -05:00
|
|
|
saw_chan_mask |= 1 << id;
|
2014-06-02 15:15:25 -03:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO:
|
|
|
|
pvt->pci_ddrio = pdev;
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2014-06-02 15:15:25 -03:00
|
|
|
edac_dbg(0, "Associated PCI %02x:%02x, bus %d with dev = %p\n",
|
|
|
|
pdev->vendor, pdev->device,
|
2012-04-29 17:08:39 -03:00
|
|
|
sbridge_dev->bus,
|
|
|
|
pdev);
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if everything were registered */
|
|
|
|
if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
|
2013-03-28 09:59:15 -07:00
|
|
|
!pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta)
|
2011-10-20 19:18:01 -02:00
|
|
|
goto enodev;
|
|
|
|
|
2015-08-05 13:16:01 -05:00
|
|
|
if (saw_chan_mask != 0x0f)
|
|
|
|
goto enodev;
|
2011-10-20 19:18:01 -02:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
enodev:
|
|
|
|
sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
error:
|
2014-06-02 15:15:25 -03:00
|
|
|
sbridge_printk(KERN_ERR, "Unexpected device %02x:%02x\n",
|
|
|
|
PCI_VENDOR_ID_INTEL, pdev->device);
|
2011-10-20 19:18:01 -02:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
|
|
|
|
struct sbridge_dev *sbridge_dev)
|
|
|
|
{
|
|
|
|
struct sbridge_pvt *pvt = mci->pvt_info;
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
struct pci_dev *pdev;
|
|
|
|
u8 saw_chan_mask = 0;
|
2014-06-02 15:15:25 -03:00
|
|
|
int i;
|
2013-10-30 13:27:06 -03:00
|
|
|
|
|
|
|
for (i = 0; i < sbridge_dev->n_devs; i++) {
|
|
|
|
pdev = sbridge_dev->pdev[i];
|
|
|
|
if (!pdev)
|
|
|
|
continue;
|
|
|
|
|
2014-06-02 15:15:25 -03:00
|
|
|
switch (pdev->device) {
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0:
|
|
|
|
pvt->pci_ha0 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
|
|
|
|
pvt->pci_ta = pdev;
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS:
|
|
|
|
pvt->pci_ras = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0:
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1:
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2:
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3:
|
2014-06-02 15:15:25 -03:00
|
|
|
{
|
|
|
|
int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0;
|
|
|
|
pvt->pci_tad[id] = pdev;
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
saw_chan_mask |= 1 << id;
|
2014-06-02 15:15:25 -03:00
|
|
|
}
|
2013-10-30 13:27:06 -03:00
|
|
|
break;
|
2014-06-02 15:15:25 -03:00
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0:
|
|
|
|
pvt->pci_ddrio = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0:
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
pvt->pci_ddrio = pdev;
|
2013-10-30 13:27:06 -03:00
|
|
|
break;
|
2014-06-02 15:15:25 -03:00
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_SAD:
|
|
|
|
pvt->pci_sad0 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_BR0:
|
|
|
|
pvt->pci_br0 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_BR1:
|
|
|
|
pvt->pci_br1 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1:
|
|
|
|
pvt->pci_ha1 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0:
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1:
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2:
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3:
|
2014-06-02 15:15:25 -03:00
|
|
|
{
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 + 4;
|
2014-06-02 15:15:25 -03:00
|
|
|
pvt->pci_tad[id] = pdev;
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
saw_chan_mask |= 1 << id;
|
2014-06-02 15:15:25 -03:00
|
|
|
}
|
|
|
|
break;
|
2013-10-30 13:27:06 -03:00
|
|
|
default:
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n",
|
|
|
|
sbridge_dev->bus,
|
|
|
|
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
|
|
|
|
pdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if everything were registered */
|
|
|
|
if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 ||
|
|
|
|
!pvt->pci_br1 || !pvt->pci_tad || !pvt->pci_ras ||
|
|
|
|
!pvt->pci_ta)
|
|
|
|
goto enodev;
|
|
|
|
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
if (saw_chan_mask != 0x0f && /* -EN */
|
|
|
|
saw_chan_mask != 0x33 && /* -EP */
|
|
|
|
saw_chan_mask != 0xff) /* -EX */
|
|
|
|
goto enodev;
|
2013-10-30 13:27:06 -03:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
enodev:
|
|
|
|
sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
error:
|
|
|
|
sbridge_printk(KERN_ERR,
|
2014-06-02 15:15:25 -03:00
|
|
|
"Unexpected device %02x:%02x\n", PCI_VENDOR_ID_INTEL,
|
|
|
|
pdev->device);
|
2013-10-30 13:27:06 -03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2014-06-20 10:27:54 -03:00
|
|
|
static int haswell_mci_bind_devs(struct mem_ctl_info *mci,
|
|
|
|
struct sbridge_dev *sbridge_dev)
|
|
|
|
{
|
|
|
|
struct sbridge_pvt *pvt = mci->pvt_info;
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
struct pci_dev *pdev;
|
|
|
|
u8 saw_chan_mask = 0;
|
2014-06-20 10:27:54 -03:00
|
|
|
int i;
|
|
|
|
|
|
|
|
/* there's only one device per system; not tied to any bus */
|
|
|
|
if (pvt->info.pci_vtd == NULL)
|
|
|
|
/* result will be checked later */
|
|
|
|
pvt->info.pci_vtd = pci_get_device(PCI_VENDOR_ID_INTEL,
|
|
|
|
PCI_DEVICE_ID_INTEL_HASWELL_IMC_VTD_MISC,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
for (i = 0; i < sbridge_dev->n_devs; i++) {
|
|
|
|
pdev = sbridge_dev->pdev[i];
|
|
|
|
if (!pdev)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
switch (pdev->device) {
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0:
|
|
|
|
pvt->pci_sad0 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1:
|
|
|
|
pvt->pci_sad1 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
|
|
|
|
pvt->pci_ha0 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA:
|
|
|
|
pvt->pci_ta = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL:
|
|
|
|
pvt->pci_ras = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0:
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1:
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2:
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3:
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
{
|
|
|
|
int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0;
|
|
|
|
|
|
|
|
pvt->pci_tad[id] = pdev;
|
|
|
|
saw_chan_mask |= 1 << id;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0:
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1:
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2:
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3:
|
|
|
|
{
|
|
|
|
int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0 + 4;
|
|
|
|
|
|
|
|
pvt->pci_tad[id] = pdev;
|
|
|
|
saw_chan_mask |= 1 << id;
|
|
|
|
}
|
2014-06-20 10:27:54 -03:00
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0:
|
2015-06-12 09:44:52 -04:00
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1:
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2:
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3:
|
|
|
|
if (!pvt->pci_ddrio)
|
|
|
|
pvt->pci_ddrio = pdev;
|
2014-06-20 10:27:54 -03:00
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1:
|
|
|
|
pvt->pci_ha1 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA:
|
|
|
|
pvt->pci_ha1_ta = pdev;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n",
|
|
|
|
sbridge_dev->bus,
|
|
|
|
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
|
|
|
|
pdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if everything were registered */
|
|
|
|
if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 ||
|
|
|
|
!pvt->pci_ras || !pvt->pci_ta || !pvt->info.pci_vtd)
|
|
|
|
goto enodev;
|
|
|
|
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
if (saw_chan_mask != 0x0f && /* -EN */
|
|
|
|
saw_chan_mask != 0x33 && /* -EP */
|
|
|
|
saw_chan_mask != 0xff) /* -EX */
|
|
|
|
goto enodev;
|
2014-06-20 10:27:54 -03:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
enodev:
|
|
|
|
sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
2014-12-02 09:27:30 -08:00
|
|
|
static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
|
|
|
|
struct sbridge_dev *sbridge_dev)
|
|
|
|
{
|
|
|
|
struct sbridge_pvt *pvt = mci->pvt_info;
|
|
|
|
struct pci_dev *pdev;
|
2015-05-20 19:10:35 -03:00
|
|
|
u8 saw_chan_mask = 0;
|
2014-12-02 09:27:30 -08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
/* there's only one device per system; not tied to any bus */
|
|
|
|
if (pvt->info.pci_vtd == NULL)
|
|
|
|
/* result will be checked later */
|
|
|
|
pvt->info.pci_vtd = pci_get_device(PCI_VENDOR_ID_INTEL,
|
|
|
|
PCI_DEVICE_ID_INTEL_BROADWELL_IMC_VTD_MISC,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
for (i = 0; i < sbridge_dev->n_devs; i++) {
|
|
|
|
pdev = sbridge_dev->pdev[i];
|
|
|
|
if (!pdev)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
switch (pdev->device) {
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0:
|
|
|
|
pvt->pci_sad0 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1:
|
|
|
|
pvt->pci_sad1 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
|
|
|
|
pvt->pci_ha0 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA:
|
|
|
|
pvt->pci_ta = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL:
|
|
|
|
pvt->pci_ras = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0:
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1:
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2:
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3:
|
2015-05-20 19:10:35 -03:00
|
|
|
{
|
|
|
|
int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0;
|
|
|
|
pvt->pci_tad[id] = pdev;
|
|
|
|
saw_chan_mask |= 1 << id;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0:
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1:
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2:
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3:
|
|
|
|
{
|
|
|
|
int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0 + 4;
|
|
|
|
pvt->pci_tad[id] = pdev;
|
|
|
|
saw_chan_mask |= 1 << id;
|
|
|
|
}
|
2014-12-02 09:27:30 -08:00
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0:
|
|
|
|
pvt->pci_ddrio = pdev;
|
|
|
|
break;
|
2015-05-20 19:10:35 -03:00
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1:
|
|
|
|
pvt->pci_ha1 = pdev;
|
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA:
|
|
|
|
pvt->pci_ha1_ta = pdev;
|
|
|
|
break;
|
2014-12-02 09:27:30 -08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n",
|
|
|
|
sbridge_dev->bus,
|
|
|
|
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
|
|
|
|
pdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if everything were registered */
|
|
|
|
if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 ||
|
|
|
|
!pvt->pci_ras || !pvt->pci_ta || !pvt->info.pci_vtd)
|
|
|
|
goto enodev;
|
|
|
|
|
2015-05-20 19:10:35 -03:00
|
|
|
if (saw_chan_mask != 0x0f && /* -EN */
|
|
|
|
saw_chan_mask != 0x33 && /* -EP */
|
|
|
|
saw_chan_mask != 0xff) /* -EX */
|
|
|
|
goto enodev;
|
2014-12-02 09:27:30 -08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
enodev:
|
|
|
|
sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
/****************************************************************************
|
|
|
|
Error check routines
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* While Sandy Bridge has error count registers, SMI BIOS read values from
|
|
|
|
* and resets the counters. So, they are not reliable for the OS to read
|
|
|
|
* from them. So, we have no option but to just trust on whatever MCE is
|
|
|
|
* telling us about the errors.
|
|
|
|
*/
|
|
|
|
static void sbridge_mce_output_error(struct mem_ctl_info *mci,
|
|
|
|
const struct mce *m)
|
|
|
|
{
|
|
|
|
struct mem_ctl_info *new_mci;
|
|
|
|
struct sbridge_pvt *pvt = mci->pvt_info;
|
2012-04-16 15:12:22 -03:00
|
|
|
enum hw_event_mc_err_type tp_event;
|
2012-05-11 11:41:45 -03:00
|
|
|
char *type, *optype, msg[256];
|
2011-10-20 19:18:01 -02:00
|
|
|
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
|
|
|
|
bool overflow = GET_BITFIELD(m->status, 62, 62);
|
|
|
|
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
|
2013-10-30 13:27:06 -03:00
|
|
|
bool recoverable;
|
2011-10-20 19:18:01 -02:00
|
|
|
u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
|
|
|
|
u32 mscod = GET_BITFIELD(m->status, 16, 31);
|
|
|
|
u32 errcode = GET_BITFIELD(m->status, 0, 15);
|
|
|
|
u32 channel = GET_BITFIELD(m->status, 0, 3);
|
|
|
|
u32 optypenum = GET_BITFIELD(m->status, 4, 6);
|
|
|
|
long channel_mask, first_channel;
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
u8 rank, socket, ha;
|
2012-04-16 15:12:22 -03:00
|
|
|
int rc, dimm;
|
2012-05-11 11:41:45 -03:00
|
|
|
char *area_type = NULL;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2015-05-20 19:10:35 -03:00
|
|
|
if (pvt->info.type != SANDY_BRIDGE)
|
2013-10-30 13:27:06 -03:00
|
|
|
recoverable = true;
|
|
|
|
else
|
|
|
|
recoverable = GET_BITFIELD(m->status, 56, 56);
|
|
|
|
|
2012-04-16 15:12:22 -03:00
|
|
|
if (uncorrected_error) {
|
|
|
|
if (ripv) {
|
|
|
|
type = "FATAL";
|
|
|
|
tp_event = HW_EVENT_ERR_FATAL;
|
|
|
|
} else {
|
|
|
|
type = "NON_FATAL";
|
|
|
|
tp_event = HW_EVENT_ERR_UNCORRECTED;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
type = "CORRECTED";
|
|
|
|
tp_event = HW_EVENT_ERR_CORRECTED;
|
|
|
|
}
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/*
|
2012-04-17 11:30:52 -07:00
|
|
|
* According with Table 15-9 of the Intel Architecture spec vol 3A,
|
2011-10-20 19:18:01 -02:00
|
|
|
* memory errors should fit in this mask:
|
|
|
|
* 000f 0000 1mmm cccc (binary)
|
|
|
|
* where:
|
|
|
|
* f = Correction Report Filtering Bit. If 1, subsequent errors
|
|
|
|
* won't be shown
|
|
|
|
* mmm = error type
|
|
|
|
* cccc = channel
|
|
|
|
* If the mask doesn't match, report an error to the parsing logic
|
|
|
|
*/
|
|
|
|
if (! ((errcode & 0xef80) == 0x80)) {
|
|
|
|
optype = "Can't parse: it is not a mem";
|
|
|
|
} else {
|
|
|
|
switch (optypenum) {
|
|
|
|
case 0:
|
2012-04-16 15:12:22 -03:00
|
|
|
optype = "generic undef request error";
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
|
|
|
case 1:
|
2012-04-16 15:12:22 -03:00
|
|
|
optype = "memory read error";
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
|
|
|
case 2:
|
2012-04-16 15:12:22 -03:00
|
|
|
optype = "memory write error";
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
|
|
|
case 3:
|
2012-04-16 15:12:22 -03:00
|
|
|
optype = "addr/cmd error";
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
|
|
|
case 4:
|
2012-04-16 15:12:22 -03:00
|
|
|
optype = "memory scrubbing error";
|
2011-10-20 19:18:01 -02:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
optype = "reserved";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-10-30 13:27:05 -03:00
|
|
|
/* Only decode errors with an valid address (ADDRV) */
|
|
|
|
if (!GET_BITFIELD(m->status, 58, 58))
|
|
|
|
return;
|
|
|
|
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
rc = get_memory_error_data(mci, m->addr, &socket, &ha,
|
2012-05-11 11:41:45 -03:00
|
|
|
&channel_mask, &rank, &area_type, msg);
|
2011-10-20 19:18:01 -02:00
|
|
|
if (rc < 0)
|
2012-04-16 15:12:22 -03:00
|
|
|
goto err_parsing;
|
2011-10-20 19:18:01 -02:00
|
|
|
new_mci = get_mci_for_node_id(socket);
|
|
|
|
if (!new_mci) {
|
2012-04-16 15:12:22 -03:00
|
|
|
strcpy(msg, "Error: socket got corrupted!");
|
|
|
|
goto err_parsing;
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
mci = new_mci;
|
|
|
|
pvt = mci->pvt_info;
|
|
|
|
|
|
|
|
first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
|
|
|
|
|
|
|
|
if (rank < 4)
|
|
|
|
dimm = 0;
|
|
|
|
else if (rank < 8)
|
|
|
|
dimm = 1;
|
|
|
|
else
|
|
|
|
dimm = 2;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2012-05-11 11:41:45 -03:00
|
|
|
* FIXME: On some memory configurations (mirror, lockstep), the
|
|
|
|
* Memory Controller can't point the error to a single DIMM. The
|
|
|
|
* EDAC core should be handling the channel mask, in order to point
|
|
|
|
* to the group of dimm's where the error may be happening.
|
2011-10-20 19:18:01 -02:00
|
|
|
*/
|
2014-06-02 15:15:28 -03:00
|
|
|
if (!pvt->is_lockstep && !pvt->is_mirrored && !pvt->is_close_pg)
|
|
|
|
channel = first_channel;
|
|
|
|
|
2012-04-16 15:12:22 -03:00
|
|
|
snprintf(msg, sizeof(msg),
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
"%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d",
|
2012-05-11 11:41:45 -03:00
|
|
|
overflow ? " OVERFLOW" : "",
|
|
|
|
(uncorrected_error && recoverable) ? " recoverable" : "",
|
|
|
|
area_type,
|
|
|
|
mscod, errcode,
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
socket, ha,
|
2012-05-11 11:41:45 -03:00
|
|
|
channel_mask,
|
|
|
|
rank);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "%s\n", msg);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2012-04-16 15:12:22 -03:00
|
|
|
/* FIXME: need support for channel mask */
|
|
|
|
|
2014-09-05 14:28:47 -05:00
|
|
|
if (channel == CHANNEL_UNSPECIFIED)
|
|
|
|
channel = -1;
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
/* Call the helper to output message */
|
2012-06-04 13:40:05 -03:00
|
|
|
edac_mc_handle_error(tp_event, mci, core_err_cnt,
|
2012-04-16 15:12:22 -03:00
|
|
|
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
|
sb_edac: Fix support for systems with two home agents per socket
First noticed a problem on a 4 socket machine where EDAC only reported
half the DIMMS. Tracked this down to the code that assumes that systems
with two home agents only have two memory channels on each agent. This
is true on 2 sockect ("-EP") machines. But four socket ("-EX") machines
have four memory channels on each home agent.
The old code would have had problems on two socket systems as it did
a shuffling trick to make the internals of the code think that the
channels from the first agent were '0' and '1', with the second agent
providing '2' and '3'. But the code didn't uniformly convert from
{ha,channel} tuples to this internal representation.
New code always considers up to eight channels.
On a machine with a single home agent these map easily to edac channels
0, 1, 2, 3. On machines with two home agents we map using:
edac_channel = 4*ha# + channel
So on a -EP machine where each home agent supports only two channels
we'll fill in channels 0, 1, 4, 5, and on a -EX machine we use all of 0,
1, 2, 3, 4, 5, 6, 7.
[mchehab@osg.samsung.com: fold a fixup patch as per Tony's request and fixed
a few CodingStyle issues]
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2015-05-18 17:50:42 -03:00
|
|
|
4*ha+channel, dimm, -1,
|
2012-06-04 11:29:25 -03:00
|
|
|
optype, msg);
|
2012-04-16 15:12:22 -03:00
|
|
|
return;
|
|
|
|
err_parsing:
|
2012-06-04 13:40:05 -03:00
|
|
|
edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
|
2012-04-16 15:12:22 -03:00
|
|
|
-1, -1, -1,
|
2012-06-04 11:29:25 -03:00
|
|
|
msg, "");
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sbridge_check_error Retrieve and process errors reported by the
|
|
|
|
* hardware. Called by the Core module.
|
|
|
|
*/
|
|
|
|
static void sbridge_check_error(struct mem_ctl_info *mci)
|
|
|
|
{
|
|
|
|
struct sbridge_pvt *pvt = mci->pvt_info;
|
|
|
|
int i;
|
|
|
|
unsigned count = 0;
|
|
|
|
struct mce *m;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* MCE first step: Copy all mce errors into a temporary buffer
|
|
|
|
* We use a double buffering here, to reduce the risk of
|
|
|
|
* loosing an error.
|
|
|
|
*/
|
|
|
|
smp_rmb();
|
|
|
|
count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
|
|
|
|
% MCE_LOG_LEN;
|
|
|
|
if (!count)
|
|
|
|
return;
|
|
|
|
|
|
|
|
m = pvt->mce_outentry;
|
|
|
|
if (pvt->mce_in + count > MCE_LOG_LEN) {
|
|
|
|
unsigned l = MCE_LOG_LEN - pvt->mce_in;
|
|
|
|
|
|
|
|
memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
|
|
|
|
smp_wmb();
|
|
|
|
pvt->mce_in = 0;
|
|
|
|
count -= l;
|
|
|
|
m += l;
|
|
|
|
}
|
|
|
|
memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
|
|
|
|
smp_wmb();
|
|
|
|
pvt->mce_in += count;
|
|
|
|
|
|
|
|
smp_rmb();
|
|
|
|
if (pvt->mce_overrun) {
|
|
|
|
sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
|
|
|
|
pvt->mce_overrun);
|
|
|
|
smp_wmb();
|
|
|
|
pvt->mce_overrun = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* MCE second step: parse errors and display
|
|
|
|
*/
|
|
|
|
for (i = 0; i < count; i++)
|
|
|
|
sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sbridge_mce_check_error Replicates mcelog routine to get errors
|
|
|
|
* This routine simply queues mcelog errors, and
|
|
|
|
* return. The error itself should be handled later
|
|
|
|
* by sbridge_check_error.
|
|
|
|
* WARNING: As this routine should be called at NMI time, extra care should
|
|
|
|
* be taken to avoid deadlocks, and to be as fast as possible.
|
|
|
|
*/
|
2011-10-20 19:33:46 -02:00
|
|
|
static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
|
|
|
|
void *data)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
2011-10-20 19:33:46 -02:00
|
|
|
struct mce *mce = (struct mce *)data;
|
|
|
|
struct mem_ctl_info *mci;
|
|
|
|
struct sbridge_pvt *pvt;
|
2014-03-11 15:45:41 -04:00
|
|
|
char *type;
|
2011-10-20 19:33:46 -02:00
|
|
|
|
2013-12-06 01:17:09 -05:00
|
|
|
if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
|
|
|
|
return NOTIFY_DONE;
|
|
|
|
|
2011-10-20 19:33:46 -02:00
|
|
|
mci = get_mci_for_node_id(mce->socketid);
|
|
|
|
if (!mci)
|
|
|
|
return NOTIFY_BAD;
|
|
|
|
pvt = mci->pvt_info;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Just let mcelog handle it if the error is
|
|
|
|
* outside the memory controller. A memory error
|
|
|
|
* is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0.
|
|
|
|
* bit 12 has an special meaning.
|
|
|
|
*/
|
|
|
|
if ((mce->status & 0xefff) >> 7 != 1)
|
2011-10-20 19:33:46 -02:00
|
|
|
return NOTIFY_DONE;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2014-03-11 15:45:41 -04:00
|
|
|
if (mce->mcgstatus & MCG_STATUS_MCIP)
|
|
|
|
type = "Exception";
|
|
|
|
else
|
|
|
|
type = "Event";
|
|
|
|
|
2014-03-11 15:45:42 -04:00
|
|
|
sbridge_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2014-03-11 15:45:42 -04:00
|
|
|
sbridge_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx "
|
|
|
|
"Bank %d: %016Lx\n", mce->extcpu, type,
|
|
|
|
mce->mcgstatus, mce->bank, mce->status);
|
|
|
|
sbridge_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc);
|
|
|
|
sbridge_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr);
|
|
|
|
sbridge_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2014-03-11 15:45:42 -04:00
|
|
|
sbridge_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET "
|
|
|
|
"%u APIC %x\n", mce->cpuvendor, mce->cpuid,
|
|
|
|
mce->time, mce->socketid, mce->apicid);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
smp_rmb();
|
|
|
|
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
|
|
|
|
smp_wmb();
|
|
|
|
pvt->mce_overrun++;
|
2011-10-20 19:33:46 -02:00
|
|
|
return NOTIFY_DONE;
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Copy memory error at the ringbuffer */
|
|
|
|
memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
|
|
|
|
smp_wmb();
|
|
|
|
pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
|
|
|
|
|
|
|
|
/* Handle fatal errors immediately */
|
|
|
|
if (mce->mcgstatus & 1)
|
|
|
|
sbridge_check_error(mci);
|
|
|
|
|
|
|
|
/* Advice mcelog that the error were handled */
|
2011-10-20 19:33:46 -02:00
|
|
|
return NOTIFY_STOP;
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
|
2011-10-20 19:33:46 -02:00
|
|
|
static struct notifier_block sbridge_mce_dec = {
|
|
|
|
.notifier_call = sbridge_mce_check_error,
|
|
|
|
};
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
/****************************************************************************
|
|
|
|
EDAC register/unregister logic
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
|
|
|
|
{
|
|
|
|
struct mem_ctl_info *mci = sbridge_dev->mci;
|
|
|
|
struct sbridge_pvt *pvt;
|
|
|
|
|
|
|
|
if (unlikely(!mci || !mci->pvt_info)) {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
sbridge_printk(KERN_ERR, "Couldn't find mci handler\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
pvt = mci->pvt_info;
|
|
|
|
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "MC: mci = %p, dev = %p\n",
|
|
|
|
mci, &sbridge_dev->pdev[0]->dev);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* Remove MC sysfs nodes */
|
2012-03-16 07:44:18 -03:00
|
|
|
edac_mc_del_mc(mci->pdev);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
|
2011-10-20 19:18:01 -02:00
|
|
|
kfree(mci->ctl_name);
|
|
|
|
edac_mc_free(mci);
|
|
|
|
sbridge_dev->mci = NULL;
|
|
|
|
}
|
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
struct mem_ctl_info *mci;
|
2012-04-16 15:12:22 -03:00
|
|
|
struct edac_mc_layer layers[2];
|
2011-10-20 19:18:01 -02:00
|
|
|
struct sbridge_pvt *pvt;
|
2013-10-30 13:27:06 -03:00
|
|
|
struct pci_dev *pdev = sbridge_dev->pdev[0];
|
2012-04-16 15:12:22 -03:00
|
|
|
int rc;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* Check the number of active and not disabled channels */
|
2014-06-02 15:15:25 -03:00
|
|
|
rc = check_if_ecc_is_active(sbridge_dev->bus, type);
|
2011-10-20 19:18:01 -02:00
|
|
|
if (unlikely(rc < 0))
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
/* allocate a new MC control structure */
|
2012-04-16 15:12:22 -03:00
|
|
|
layers[0].type = EDAC_MC_LAYER_CHANNEL;
|
|
|
|
layers[0].size = NUM_CHANNELS;
|
|
|
|
layers[0].is_virt_csrow = false;
|
|
|
|
layers[1].type = EDAC_MC_LAYER_SLOT;
|
|
|
|
layers[1].size = MAX_DIMMS;
|
|
|
|
layers[1].is_virt_csrow = true;
|
2012-05-02 14:37:00 -03:00
|
|
|
mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
|
2012-04-16 15:12:22 -03:00
|
|
|
sizeof(*pvt));
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
if (unlikely(!mci))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "MC: mci = %p, dev = %p\n",
|
2013-10-30 13:27:06 -03:00
|
|
|
mci, &pdev->dev);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
pvt = mci->pvt_info;
|
|
|
|
memset(pvt, 0, sizeof(*pvt));
|
|
|
|
|
|
|
|
/* Associate sbridge_dev and mci for future usage */
|
|
|
|
pvt->sbridge_dev = sbridge_dev;
|
|
|
|
sbridge_dev->mci = mci;
|
|
|
|
|
|
|
|
mci->mtype_cap = MEM_FLAG_DDR3;
|
|
|
|
mci->edac_ctl_cap = EDAC_FLAG_NONE;
|
|
|
|
mci->edac_cap = EDAC_FLAG_NONE;
|
|
|
|
mci->mod_name = "sbridge_edac.c";
|
|
|
|
mci->mod_ver = SBRIDGE_REVISION;
|
2013-10-30 13:27:06 -03:00
|
|
|
mci->dev_name = pci_name(pdev);
|
2011-10-20 19:18:01 -02:00
|
|
|
mci->ctl_page_to_phys = NULL;
|
|
|
|
|
|
|
|
/* Set the function pointer to an actual operation function */
|
|
|
|
mci->edac_check = sbridge_check_error;
|
|
|
|
|
2013-10-30 13:27:06 -03:00
|
|
|
pvt->info.type = type;
|
2014-06-20 10:27:54 -03:00
|
|
|
switch (type) {
|
|
|
|
case IVY_BRIDGE:
|
2013-10-30 13:27:06 -03:00
|
|
|
pvt->info.rankcfgr = IB_RANK_CFG_A;
|
|
|
|
pvt->info.get_tolm = ibridge_get_tolm;
|
|
|
|
pvt->info.get_tohm = ibridge_get_tohm;
|
|
|
|
pvt->info.dram_rule = ibridge_dram_rule;
|
2014-06-02 15:15:22 -03:00
|
|
|
pvt->info.get_memory_type = get_memory_type;
|
2014-06-02 15:15:23 -03:00
|
|
|
pvt->info.get_node_id = get_node_id;
|
2014-06-02 15:15:24 -03:00
|
|
|
pvt->info.rir_limit = rir_limit;
|
2015-12-03 10:48:52 +01:00
|
|
|
pvt->info.sad_limit = sad_limit;
|
|
|
|
pvt->info.interleave_mode = interleave_mode;
|
|
|
|
pvt->info.show_interleave_mode = show_interleave_mode;
|
|
|
|
pvt->info.dram_attr = dram_attr;
|
2013-10-30 13:27:06 -03:00
|
|
|
pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule);
|
|
|
|
pvt->info.interleave_list = ibridge_interleave_list;
|
|
|
|
pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
|
|
|
|
pvt->info.interleave_pkg = ibridge_interleave_pkg;
|
2015-06-12 15:08:17 -04:00
|
|
|
pvt->info.get_width = ibridge_get_width;
|
2013-10-30 13:27:06 -03:00
|
|
|
mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx);
|
|
|
|
|
|
|
|
/* Store pci devices at mci for faster access */
|
|
|
|
rc = ibridge_mci_bind_devs(mci, sbridge_dev);
|
|
|
|
if (unlikely(rc < 0))
|
|
|
|
goto fail0;
|
2014-06-20 10:27:54 -03:00
|
|
|
break;
|
|
|
|
case SANDY_BRIDGE:
|
2013-10-30 13:27:06 -03:00
|
|
|
pvt->info.rankcfgr = SB_RANK_CFG_A;
|
|
|
|
pvt->info.get_tolm = sbridge_get_tolm;
|
|
|
|
pvt->info.get_tohm = sbridge_get_tohm;
|
|
|
|
pvt->info.dram_rule = sbridge_dram_rule;
|
2014-06-02 15:15:22 -03:00
|
|
|
pvt->info.get_memory_type = get_memory_type;
|
2014-06-02 15:15:23 -03:00
|
|
|
pvt->info.get_node_id = get_node_id;
|
2014-06-02 15:15:24 -03:00
|
|
|
pvt->info.rir_limit = rir_limit;
|
2015-12-03 10:48:52 +01:00
|
|
|
pvt->info.sad_limit = sad_limit;
|
|
|
|
pvt->info.interleave_mode = interleave_mode;
|
|
|
|
pvt->info.show_interleave_mode = show_interleave_mode;
|
|
|
|
pvt->info.dram_attr = dram_attr;
|
2013-10-30 13:27:06 -03:00
|
|
|
pvt->info.max_sad = ARRAY_SIZE(sbridge_dram_rule);
|
|
|
|
pvt->info.interleave_list = sbridge_interleave_list;
|
|
|
|
pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list);
|
|
|
|
pvt->info.interleave_pkg = sbridge_interleave_pkg;
|
2015-06-12 15:08:17 -04:00
|
|
|
pvt->info.get_width = sbridge_get_width;
|
2013-10-30 13:27:06 -03:00
|
|
|
mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
|
|
|
|
|
|
|
|
/* Store pci devices at mci for faster access */
|
|
|
|
rc = sbridge_mci_bind_devs(mci, sbridge_dev);
|
|
|
|
if (unlikely(rc < 0))
|
|
|
|
goto fail0;
|
2014-06-20 10:27:54 -03:00
|
|
|
break;
|
|
|
|
case HASWELL:
|
|
|
|
/* rankcfgr isn't used */
|
|
|
|
pvt->info.get_tolm = haswell_get_tolm;
|
|
|
|
pvt->info.get_tohm = haswell_get_tohm;
|
|
|
|
pvt->info.dram_rule = ibridge_dram_rule;
|
|
|
|
pvt->info.get_memory_type = haswell_get_memory_type;
|
|
|
|
pvt->info.get_node_id = haswell_get_node_id;
|
|
|
|
pvt->info.rir_limit = haswell_rir_limit;
|
2015-12-03 10:48:52 +01:00
|
|
|
pvt->info.sad_limit = sad_limit;
|
|
|
|
pvt->info.interleave_mode = interleave_mode;
|
|
|
|
pvt->info.show_interleave_mode = show_interleave_mode;
|
|
|
|
pvt->info.dram_attr = dram_attr;
|
2014-06-20 10:27:54 -03:00
|
|
|
pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule);
|
|
|
|
pvt->info.interleave_list = ibridge_interleave_list;
|
|
|
|
pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
|
|
|
|
pvt->info.interleave_pkg = ibridge_interleave_pkg;
|
2015-06-12 15:08:17 -04:00
|
|
|
pvt->info.get_width = ibridge_get_width;
|
2014-06-20 10:27:54 -03:00
|
|
|
mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell Socket#%d", mci->mc_idx);
|
2013-10-30 13:27:06 -03:00
|
|
|
|
2014-06-20 10:27:54 -03:00
|
|
|
/* Store pci devices at mci for faster access */
|
|
|
|
rc = haswell_mci_bind_devs(mci, sbridge_dev);
|
|
|
|
if (unlikely(rc < 0))
|
|
|
|
goto fail0;
|
|
|
|
break;
|
2014-12-02 09:27:30 -08:00
|
|
|
case BROADWELL:
|
|
|
|
/* rankcfgr isn't used */
|
|
|
|
pvt->info.get_tolm = haswell_get_tolm;
|
|
|
|
pvt->info.get_tohm = haswell_get_tohm;
|
|
|
|
pvt->info.dram_rule = ibridge_dram_rule;
|
|
|
|
pvt->info.get_memory_type = haswell_get_memory_type;
|
|
|
|
pvt->info.get_node_id = haswell_get_node_id;
|
|
|
|
pvt->info.rir_limit = haswell_rir_limit;
|
2015-12-03 10:48:52 +01:00
|
|
|
pvt->info.sad_limit = sad_limit;
|
|
|
|
pvt->info.interleave_mode = interleave_mode;
|
|
|
|
pvt->info.show_interleave_mode = show_interleave_mode;
|
|
|
|
pvt->info.dram_attr = dram_attr;
|
2014-12-02 09:27:30 -08:00
|
|
|
pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule);
|
|
|
|
pvt->info.interleave_list = ibridge_interleave_list;
|
|
|
|
pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
|
|
|
|
pvt->info.interleave_pkg = ibridge_interleave_pkg;
|
2015-06-12 15:08:17 -04:00
|
|
|
pvt->info.get_width = broadwell_get_width;
|
2014-12-02 09:27:30 -08:00
|
|
|
mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell Socket#%d", mci->mc_idx);
|
|
|
|
|
|
|
|
/* Store pci devices at mci for faster access */
|
|
|
|
rc = broadwell_mci_bind_devs(mci, sbridge_dev);
|
|
|
|
if (unlikely(rc < 0))
|
|
|
|
goto fail0;
|
|
|
|
break;
|
2014-06-20 10:27:54 -03:00
|
|
|
}
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* Get dimm basic config and the memory layout */
|
|
|
|
get_dimm_config(mci);
|
|
|
|
get_memory_layout(mci);
|
|
|
|
|
|
|
|
/* record ptr to the generic device */
|
2013-10-30 13:27:06 -03:00
|
|
|
mci->pdev = &pdev->dev;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* add this new MC control structure to EDAC's list of MCs */
|
|
|
|
if (unlikely(edac_mc_add_mc(mci))) {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
rc = -EINVAL;
|
|
|
|
goto fail0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
fail0:
|
|
|
|
kfree(mci->ctl_name);
|
|
|
|
edac_mc_free(mci);
|
|
|
|
sbridge_dev->mci = NULL;
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sbridge_probe Probe for ONE instance of device to see if it is
|
|
|
|
* present.
|
|
|
|
* return:
|
|
|
|
* 0 for FOUND a device
|
|
|
|
* < 0 for error code
|
|
|
|
*/
|
|
|
|
|
2012-12-21 13:23:51 -08:00
|
|
|
static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
2014-06-20 10:27:54 -03:00
|
|
|
int rc = -ENODEV;
|
2011-10-20 19:18:01 -02:00
|
|
|
u8 mc, num_mc = 0;
|
|
|
|
struct sbridge_dev *sbridge_dev;
|
2014-06-20 10:27:54 -03:00
|
|
|
enum type type = SANDY_BRIDGE;
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* get the pci devices we want to reserve for our use */
|
|
|
|
mutex_lock(&sbridge_edac_lock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* All memory controllers are allocated at the first pass.
|
|
|
|
*/
|
|
|
|
if (unlikely(probed >= 1)) {
|
|
|
|
mutex_unlock(&sbridge_edac_lock);
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
probed++;
|
|
|
|
|
2014-06-20 10:27:54 -03:00
|
|
|
switch (pdev->device) {
|
|
|
|
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
|
2013-10-30 13:27:06 -03:00
|
|
|
rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_ibridge_table);
|
|
|
|
type = IVY_BRIDGE;
|
2014-06-20 10:27:54 -03:00
|
|
|
break;
|
2015-02-05 12:39:36 +01:00
|
|
|
case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
|
2013-10-30 13:27:06 -03:00
|
|
|
rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_sbridge_table);
|
|
|
|
type = SANDY_BRIDGE;
|
2014-06-20 10:27:54 -03:00
|
|
|
break;
|
|
|
|
case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
|
|
|
|
rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_haswell_table);
|
|
|
|
type = HASWELL;
|
|
|
|
break;
|
2014-12-02 09:27:30 -08:00
|
|
|
case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
|
|
|
|
rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_broadwell_table);
|
|
|
|
type = BROADWELL;
|
|
|
|
break;
|
2013-10-30 13:27:06 -03:00
|
|
|
}
|
2015-02-05 12:39:36 +01:00
|
|
|
if (unlikely(rc < 0)) {
|
|
|
|
edac_dbg(0, "couldn't get all devices for 0x%x\n", pdev->device);
|
2011-10-20 19:18:01 -02:00
|
|
|
goto fail0;
|
2015-02-05 12:39:36 +01:00
|
|
|
}
|
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
mc = 0;
|
|
|
|
|
|
|
|
list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "Registering MC#%d (%d of %d)\n",
|
|
|
|
mc, mc + 1, num_mc);
|
2014-06-20 10:27:54 -03:00
|
|
|
|
2011-10-20 19:18:01 -02:00
|
|
|
sbridge_dev->mc = mc++;
|
2013-10-30 13:27:06 -03:00
|
|
|
rc = sbridge_register_mci(sbridge_dev, type);
|
2011-10-20 19:18:01 -02:00
|
|
|
if (unlikely(rc < 0))
|
|
|
|
goto fail1;
|
|
|
|
}
|
|
|
|
|
2015-02-05 12:39:36 +01:00
|
|
|
sbridge_printk(KERN_INFO, "%s\n", SBRIDGE_REVISION);
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
mutex_unlock(&sbridge_edac_lock);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
fail1:
|
|
|
|
list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
|
|
|
|
sbridge_unregister_mci(sbridge_dev);
|
|
|
|
|
|
|
|
sbridge_put_all_devices();
|
|
|
|
fail0:
|
|
|
|
mutex_unlock(&sbridge_edac_lock);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sbridge_remove destructor for one instance of device
|
|
|
|
*
|
|
|
|
*/
|
2012-12-21 13:23:51 -08:00
|
|
|
static void sbridge_remove(struct pci_dev *pdev)
|
2011-10-20 19:18:01 -02:00
|
|
|
{
|
|
|
|
struct sbridge_dev *sbridge_dev;
|
|
|
|
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(0, "\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* we have a trouble here: pdev value for removal will be wrong, since
|
|
|
|
* it will point to the X58 register used to detect that the machine
|
|
|
|
* is a Nehalem or upper design. However, due to the way several PCI
|
|
|
|
* devices are grouped together to provide MC functionality, we need
|
|
|
|
* to use a different method for releasing the devices
|
|
|
|
*/
|
|
|
|
|
|
|
|
mutex_lock(&sbridge_edac_lock);
|
|
|
|
|
|
|
|
if (unlikely(!probed)) {
|
|
|
|
mutex_unlock(&sbridge_edac_lock);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
|
|
|
|
sbridge_unregister_mci(sbridge_dev);
|
|
|
|
|
|
|
|
/* Release PCI resources */
|
|
|
|
sbridge_put_all_devices();
|
|
|
|
|
|
|
|
probed--;
|
|
|
|
|
|
|
|
mutex_unlock(&sbridge_edac_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sbridge_driver pci_driver structure for this module
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static struct pci_driver sbridge_driver = {
|
|
|
|
.name = "sbridge_edac",
|
|
|
|
.probe = sbridge_probe,
|
2012-12-21 13:23:51 -08:00
|
|
|
.remove = sbridge_remove,
|
2011-10-20 19:18:01 -02:00
|
|
|
.id_table = sbridge_pci_tbl,
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sbridge_init Module entry function
|
|
|
|
* Try to initialize this module for its devices
|
|
|
|
*/
|
|
|
|
static int __init sbridge_init(void)
|
|
|
|
{
|
|
|
|
int pci_rc;
|
|
|
|
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(2, "\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
|
|
|
|
opstate_init();
|
|
|
|
|
|
|
|
pci_rc = pci_register_driver(&sbridge_driver);
|
2012-05-08 20:40:12 -03:00
|
|
|
if (pci_rc >= 0) {
|
|
|
|
mce_register_decode_chain(&sbridge_mce_dec);
|
2013-12-06 01:17:09 -05:00
|
|
|
if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
|
|
|
|
sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
return 0;
|
2012-05-08 20:40:12 -03:00
|
|
|
}
|
2011-10-20 19:18:01 -02:00
|
|
|
|
|
|
|
sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
|
|
|
|
pci_rc);
|
|
|
|
|
|
|
|
return pci_rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sbridge_exit() Module exit function
|
|
|
|
* Unregister the driver
|
|
|
|
*/
|
|
|
|
static void __exit sbridge_exit(void)
|
|
|
|
{
|
2012-04-29 17:08:39 -03:00
|
|
|
edac_dbg(2, "\n");
|
2011-10-20 19:18:01 -02:00
|
|
|
pci_unregister_driver(&sbridge_driver);
|
2012-05-08 20:40:12 -03:00
|
|
|
mce_unregister_decode_chain(&sbridge_mce_dec);
|
2011-10-20 19:18:01 -02:00
|
|
|
}
|
|
|
|
|
|
|
|
module_init(sbridge_init);
|
|
|
|
module_exit(sbridge_exit);
|
|
|
|
|
|
|
|
module_param(edac_op_state, int, 0444);
|
|
|
|
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
|
|
|
|
|
|
|
|
MODULE_LICENSE("GPL");
|
2014-02-07 08:03:07 -02:00
|
|
|
MODULE_AUTHOR("Mauro Carvalho Chehab");
|
2011-10-20 19:18:01 -02:00
|
|
|
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
|
2013-10-30 13:27:06 -03:00
|
|
|
MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - "
|
2011-10-20 19:18:01 -02:00
|
|
|
SBRIDGE_REVISION);
|