Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, AMD: Fix ARAT feature setting again
  Revert "x86, AMD: Fix APIC timer erratum 400 affecting K8 Rev.A-E processors"
  x86, apic: Fix spurious error interrupts triggering on all non-boot APs
  x86, mce, AMD: Fix leaving freed data in a list
  x86: Fix UV BAU for non-consecutive nasids
  x86, UV: Fix NMI handler for UV platforms
This commit is contained in:
Linus Torvalds 2011-05-18 03:14:34 -07:00
commit 39dcfa552c
9 changed files with 148 additions and 47 deletions

View File

@ -78,6 +78,7 @@
#define APIC_DEST_LOGICAL 0x00800
#define APIC_DEST_PHYSICAL 0x00000
#define APIC_DM_FIXED 0x00000
#define APIC_DM_FIXED_MASK 0x00700
#define APIC_DM_LOWEST 0x00100
#define APIC_DM_SMI 0x00200
#define APIC_DM_REMRD 0x00300

View File

@ -94,6 +94,8 @@
/* after this # consecutive successes, bump up the throttle if it was lowered */
#define COMPLETE_THRESHOLD 5
#define UV_LB_SUBNODEID 0x10
/*
* number of entries in the destination side payload queue
*/
@ -124,7 +126,7 @@
* The distribution specification (32 bytes) is interpreted as a 256-bit
* distribution vector. Adjacent bits correspond to consecutive even numbered
* nodeIDs. The result of adding the index of a given bit to the 15-bit
* 'base_dest_nodeid' field of the header corresponds to the
* 'base_dest_nasid' field of the header corresponds to the
* destination nodeID associated with that specified bit.
*/
struct bau_target_uvhubmask {
@ -176,7 +178,7 @@ struct bau_msg_payload {
struct bau_msg_header {
unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
/* bits 5:0 */
unsigned int base_dest_nodeid:15; /* nasid of the */
unsigned int base_dest_nasid:15; /* nasid of the */
/* bits 20:6 */ /* first bit in uvhub map */
unsigned int command:8; /* message type */
/* bits 28:21 */
@ -378,6 +380,10 @@ struct ptc_stats {
unsigned long d_rcanceled; /* number of messages canceled by resets */
};
struct hub_and_pnode {
short uvhub;
short pnode;
};
/*
* one per-cpu; to locate the software tables
*/
@ -399,10 +405,12 @@ struct bau_control {
int baudisabled;
int set_bau_off;
short cpu;
short osnode;
short uvhub_cpu;
short uvhub;
short cpus_in_socket;
short cpus_in_uvhub;
short partition_base_pnode;
unsigned short message_number;
unsigned short uvhub_quiesce;
short socket_acknowledge_count[DEST_Q_SIZE];
@ -422,15 +430,16 @@ struct bau_control {
int congested_period;
cycles_t period_time;
long period_requests;
struct hub_and_pnode *target_hub_and_pnode;
};
static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
{
return constant_test_bit(uvhub, &dstp->bits[0]);
}
static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp)
static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
{
__set_bit(uvhub, &dstp->bits[0]);
__set_bit(pnode, &dstp->bits[0]);
}
static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
int nbits)

View File

@ -398,6 +398,8 @@ struct uv_blade_info {
unsigned short nr_online_cpus;
unsigned short pnode;
short memory_nid;
spinlock_t nmi_lock;
unsigned long nmi_count;
};
extern struct uv_blade_info *uv_blade_info;
extern short *uv_node_to_blade;

View File

@ -5,7 +5,7 @@
*
* SGI UV MMR definitions
*
* Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
* Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
*/
#ifndef _ASM_X86_UV_UV_MMRS_H
@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
} s;
};
/* ========================================================================= */
/* UVH_SCRATCH5 */
/* ========================================================================= */
#define UVH_SCRATCH5 0x2d0200UL
#define UVH_SCRATCH5_32 0x00778
#define UVH_SCRATCH5_SCRATCH5_SHFT 0
#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
union uvh_scratch5_u {
unsigned long v;
struct uvh_scratch5_s {
unsigned long scratch5 : 64; /* RW, W1CS */
} s;
};
#endif /* __ASM_UV_MMRS_X86_H__ */

View File

@ -37,6 +37,13 @@
#include <asm/smp.h>
#include <asm/x86_init.h>
#include <asm/emergency-restart.h>
#include <asm/nmi.h>
/* BMC sets a bit this MMR non-zero before sending an NMI */
#define UVH_NMI_MMR UVH_SCRATCH5
#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
#define UV_NMI_PENDING_MASK (1UL << 63)
DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
DEFINE_PER_CPU(int, x2apic_extra_bits);
@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
*/
int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
{
unsigned long real_uv_nmi;
int bid;
if (reason != DIE_NMIUNKNOWN)
return NOTIFY_OK;
if (in_crash_kexec)
/* do nothing if entering the crash kernel */
return NOTIFY_OK;
/*
* Use a lock so only one cpu prints at a time
* to prevent intermixed output.
* Each blade has an MMR that indicates when an NMI has been sent
* to cpus on the blade. If an NMI is detected, atomically
* clear the MMR and update a per-blade NMI count used to
* cause each cpu on the blade to notice a new NMI.
*/
bid = uv_numa_blade_id();
real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
if (unlikely(real_uv_nmi)) {
spin_lock(&uv_blade_info[bid].nmi_lock);
real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
if (real_uv_nmi) {
uv_blade_info[bid].nmi_count++;
uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
}
spin_unlock(&uv_blade_info[bid].nmi_lock);
}
if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
return NOTIFY_DONE;
__get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
/*
* Use a lock so only one cpu prints at a time.
* This prevents intermixed output.
*/
spin_lock(&uv_nmi_lock);
pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
dump_stack();
spin_unlock(&uv_nmi_lock);
@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
}
static struct notifier_block uv_dump_stack_nmi_nb = {
.notifier_call = uv_handle_nmi
.notifier_call = uv_handle_nmi,
.priority = NMI_LOCAL_LOW_PRIOR - 1,
};
void uv_register_nmi_notifier(void)
@ -720,8 +756,9 @@ void __init uv_system_init(void)
printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
uv_blade_info = kmalloc(bytes, GFP_KERNEL);
uv_blade_info = kzalloc(bytes, GFP_KERNEL);
BUG_ON(!uv_blade_info);
for (blade = 0; blade < uv_num_possible_blades(); blade++)
uv_blade_info[blade].memory_nid = -1;
@ -747,6 +784,7 @@ void __init uv_system_init(void)
uv_blade_info[blade].pnode = pnode;
uv_blade_info[blade].nr_possible_cpus = 0;
uv_blade_info[blade].nr_online_cpus = 0;
spin_lock_init(&uv_blade_info[blade].nmi_lock);
max_pnode = max(pnode, max_pnode);
blade++;
}

View File

@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
#endif
/* As a rule processors have APIC timer running in deep C states */
if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400))
if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
set_cpu_cap(c, X86_FEATURE_ARAT);
/*
@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev);
*/
const int amd_erratum_400[] =
AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf),
AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
EXPORT_SYMBOL_GPL(amd_erratum_400);

View File

@ -509,6 +509,7 @@ recurse:
out_free:
if (b) {
kobject_put(&b->kobj);
list_del(&b->miscj);
kfree(b);
}
return err;

View File

@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = lvtthmr_init;
/*
* The initial value of thermal LVT entries on all APs always reads
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
* sequence to them and LVT registers are reset to 0s except for
* the mask bits which are set to 1s when APs receive INIT IPI.
* Always restore the value that BIOS has programmed on AP based on
* BSP's info we saved since BIOS is always setting the same value
* for all threads/cores
* If BIOS takes over the thermal interrupt and sets its interrupt
* delivery mode to SMI (not fixed), it restores the value that the
* BIOS has programmed on AP based on BSP's info we saved since BIOS
* is always setting the same value for all threads/cores.
*/
apic_write(APIC_LVTTHMR, lvtthmr_init);
if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
apic_write(APIC_LVTTHMR, lvtthmr_init);
h = lvtthmr_init;
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG

View File

@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va, unsigned int cpu)
{
int tcpu;
int uvhub;
int locals = 0;
int remotes = 0;
int hubs = 0;
int tcpu;
int tpnode;
struct bau_desc *bau_desc;
struct cpumask *flush_mask;
struct ptc_stats *stat;
struct bau_control *bcp;
struct bau_control *tbcp;
struct hub_and_pnode *hpp;
/* kernel was booted 'nobau' */
if (nobau)
@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
/* cpu statistics */
for_each_cpu(tcpu, flush_mask) {
uvhub = uv_cpu_to_blade_id(tcpu);
bau_uvhub_set(uvhub, &bau_desc->distribution);
if (uvhub == bcp->uvhub)
/*
* The distribution vector is a bit map of pnodes, relative
* to the partition base pnode (and the partition base nasid
* in the header).
* Translate cpu to pnode and hub using an array stored
* in local memory.
*/
hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
tpnode = hpp->pnode - bcp->partition_base_pnode;
bau_uvhub_set(tpnode, &bau_desc->distribution);
if (hpp->uvhub == bcp->uvhub)
locals++;
else
remotes++;
@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
* an interrupt, but causes an error message to be returned to
* the sender.
*/
static void uv_enable_timeouts(void)
static void __init uv_enable_timeouts(void)
{
int uvhub;
int nuvhubs;
@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void)
}
/*
* initialize the sending side's sending buffers
* Initialize the sending side's sending buffers.
*/
static void
uv_activation_descriptor_init(int node, int pnode)
uv_activation_descriptor_init(int node, int pnode, int base_pnode)
{
int i;
int cpu;
@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode)
n = pa >> uv_nshift;
m = pa & uv_mmask;
/* the 14-bit pnode */
uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
(n << UV_DESC_BASE_PNODE_SHIFT | m));
/*
* initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
* Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
* cpu even though we only use the first one; one descriptor can
* describe a broadcast to 256 uv hubs.
*/
@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode)
memset(bd2, 0, sizeof(struct bau_desc));
bd2->header.sw_ack_flag = 1;
/*
* base_dest_nodeid is the nasid of the first uvhub
* in the partition. The bit map will indicate uvhub numbers,
* which are 0-N in a partition. Pnodes are unique system-wide.
* The base_dest_nasid set in the message header is the nasid
* of the first uvhub in the partition. The bit map will
* indicate destination pnode numbers relative to that base.
* They may not be consecutive if nasid striding is being used.
*/
bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
bd2->header.dest_subnodeid = 0x10; /* the LB */
bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
bd2->header.command = UV_NET_ENDPOINT_INTD;
bd2->header.int_both = 1;
/*
@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode)
/*
* Initialization of each UV hub's structures
*/
static void __init uv_init_uvhub(int uvhub, int vector)
static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
{
int node;
int pnode;
@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector)
node = uvhub_to_first_node(uvhub);
pnode = uv_blade_to_pnode(uvhub);
uv_activation_descriptor_init(node, pnode);
uv_activation_descriptor_init(node, pnode, base_pnode);
uv_payload_queue_init(node, pnode);
/*
* the below initialization can't be in firmware because the
* messaging IRQ will be determined by the OS
* The below initialization can't be in firmware because the
* messaging IRQ will be determined by the OS.
*/
apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@ -1491,10 +1500,11 @@ calculate_destination_timeout(void)
/*
* initialize the bau_control structure for each cpu
*/
static int __init uv_init_per_cpu(int nuvhubs)
static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
{
int i;
int cpu;
int tcpu;
int pnode;
int uvhub;
int have_hmaster;
@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs)
bcp = &per_cpu(bau_control, cpu);
memset(bcp, 0, sizeof(struct bau_control));
pnode = uv_cpu_hub_info(cpu)->pnode;
if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
printk(KERN_EMERG
"cpu %d pnode %d-%d beyond %d; BAU disabled\n",
cpu, pnode, base_part_pnode,
UV_DISTRIBUTION_SIZE);
return 1;
}
bcp->osnode = cpu_to_node(cpu);
bcp->partition_base_pnode = uv_partition_base_pnode;
uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
*(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
bdp = &uvhub_descs[uvhub];
@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs)
bdp->pnode = pnode;
/* kludge: 'assuming' one node per socket, and assuming that
disabling a socket just leaves a gap in node numbers */
socket = (cpu_to_node(cpu) & 1);
socket = bcp->osnode & 1;
bdp->socket_mask |= (1 << socket);
sdp = &bdp->socket[socket];
sdp->cpu_number[sdp->num_cpus] = cpu;
@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs)
nextsocket:
socket++;
socket_mask = (socket_mask >> 1);
/* each socket gets a local array of pnodes/hubs */
bcp = smaster;
bcp->target_hub_and_pnode = kmalloc_node(
sizeof(struct hub_and_pnode) *
num_possible_cpus(), GFP_KERNEL, bcp->osnode);
memset(bcp->target_hub_and_pnode, 0,
sizeof(struct hub_and_pnode) *
num_possible_cpus());
for_each_present_cpu(tcpu) {
bcp->target_hub_and_pnode[tcpu].pnode =
uv_cpu_hub_info(tcpu)->pnode;
bcp->target_hub_and_pnode[tcpu].uvhub =
uv_cpu_hub_info(tcpu)->numa_blade_id;
}
}
}
kfree(uvhub_descs);
@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void)
spin_lock_init(&disable_lock);
congested_cycles = microsec_2_cycles(congested_response_us);
if (uv_init_per_cpu(nuvhubs)) {
uv_partition_base_pnode = 0x7fffffff;
for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
if (uv_blade_nr_possible_cpus(uvhub) &&
(uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
}
if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
nobau = 1;
return 0;
}
uv_partition_base_pnode = 0x7fffffff;
for (uvhub = 0; uvhub < nuvhubs; uvhub++)
if (uv_blade_nr_possible_cpus(uvhub) &&
(uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
vector = UV_BAU_MESSAGE;
for_each_possible_blade(uvhub)
if (uv_blade_nr_possible_cpus(uvhub))
uv_init_uvhub(uvhub, vector);
uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
uv_enable_timeouts();
alloc_intr_gate(vector, uv_bau_message_intr1);