powerpc fixes for 5.10 #2

A fix for undetected data corruption on Power9 Nimbus <= DD2.1 in the emulation
 of VSX loads. The affected CPUs were not widely available.
 
 Two fixes for machine check handling in guests under PowerVM.
 
 A fix for our recent changes to SMP setup, when CONFIG_CPUMASK_OFFSTACK=y.
 
 Three fixes for races in the handling of some of our powernv sysfs attributes.
 
 One change to remove TM from the set of Power10 CPU features.
 
 A couple of other minor fixes.
 
 Thanks to:
   Aneesh Kumar K.V, Christophe Leroy, Ganesh Goudar, Jordan Niethe, Mahesh
   Salgaonkar, Michael Neuling, Oliver O'Halloran, Qian Cai, Srikar Dronamraju,
   Vasant Hegde.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCAAxFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAl+UASATHG1wZUBlbGxl
 cm1hbi5pZC5hdQAKCRBR6+o8yOGlgAbpD/4nN+0cM7M2iCPL1cqd3nmzziJ/tXsq
 1ZxU+2B+cU+pUy4LHgtH1arJb85iVqFR3cC9j705uo6kO9vqsppTj2752srSEioM
 er1UxzRza/lNZaVGaywCD9oApayPkzg74IbenXDDduI+oWvQuvWZbSBskJfdARg2
 7kBFhV7w8sUGa8e/JS1FITndPPO9tMurk+s0FgP4cjsGM/iTW8eUfGcOFsOlc+uZ
 tybZUCY/G4E77etE1KHVjw8IcwSh0P/ibQ6nLnIFpOtPCRs5tTqbuARYN8U55M9H
 0ebt3sv5QTyNvZY0bm5p9ZsC1AKyciUO5SWPNEEwzOdyYVQjlofHj3UvcHKW2D1t
 ymbglsdQeXM5uuexa23ape1e3UuwW1JhsHTQLnCbI3C/snkMA3ZegVsS66GIMXn2
 C0gef0RzQ7HrvwUEl3V/b6W87LL6NpGU6RRWyva7/0pLMZkMtKpGgWg/hVzPRTcC
 6yoUVWNN5p7pZu6VDkoqdJuw7hQPyo7t5Kj71G+/SdH5engcFjnbBxDiEge/4a7+
 RluvswpCn9SyyEvS2BL262LSPq8iYH4+at6n+uLbonZSY0P9Z5zSpPpkNJkyTnwz
 GXj1DBSEOBDZQ7pFeoCFOeYoo1Yk5EQpmA7YuxnZkzOdxFpIUgFU1wdRemzVZw2o
 PTw5VHoRgCmIsQ==
 =LMZv
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-5.10-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:

 - A fix for undetected data corruption on Power9 Nimbus <= DD2.1 in the
   emulation of VSX loads. The affected CPUs were not widely available.

 - Two fixes for machine check handling in guests under PowerVM.

 - A fix for our recent changes to SMP setup, when
   CONFIG_CPUMASK_OFFSTACK=y.

 - Three fixes for races in the handling of some of our powernv sysfs
   attributes.

 - One change to remove TM from the set of Power10 CPU features.

 - A couple of other minor fixes.

Thanks to: Aneesh Kumar K.V, Christophe Leroy, Ganesh Goudar, Jordan
Niethe, Mahesh Salgaonkar, Michael Neuling, Oliver O'Halloran, Qian Cai,
Srikar Dronamraju, Vasant Hegde.

* tag 'powerpc-5.10-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/pseries: Avoid using addr_to_pfn in real mode
  powerpc/uaccess: Don't use "m<>" constraint with GCC 4.9
  powerpc/eeh: Fix eeh_dev_check_failure() for PE#0
  powerpc/64s: Remove TM from Power10 features
  selftests/powerpc: Make alignment handler test P9N DD2.1 vector CI load workaround
  powerpc: Fix undetected data corruption with P9N DD2.1 VSX CI load emulation
  powerpc/powernv/dump: Handle multiple writes to ack attribute
  powerpc/powernv/dump: Fix race while processing OPAL dump
  powerpc/smp: Use GFP_ATOMIC while allocating tmp mask
  powerpc/smp: Remove unnecessary variable
  powerpc/mce: Avoid nmi_enter/exit in real mode on pseries hash
  powerpc/opal_elog: Handle multiple writes to ack attribute
This commit is contained in:
Linus Torvalds 2020-10-24 11:09:13 -07:00
commit b6f96e75ae
12 changed files with 186 additions and 121 deletions

View File

@ -11,4 +11,17 @@
# define __ASM_CONST(x) x##UL
# define ASM_CONST(x) __ASM_CONST(x)
#endif
/*
* Inline assembly memory constraint
*
* GCC 4.9 doesn't properly handle pre update memory constraint "m<>"
*
*/
#if defined(GCC_VERSION) && GCC_VERSION < 50000
#define UPD_CONSTR ""
#else
#define UPD_CONSTR "<>"
#endif
#endif /* _ASM_POWERPC_ASM_CONST_H */

View File

@ -477,7 +477,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \
CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \
CPU_FTR_DAWR | CPU_FTR_DAWR1)
#define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \

View File

@ -182,7 +182,7 @@ do { \
"1: " op "%U1%X1 %0,%1 # put_user\n" \
EX_TABLE(1b, %l2) \
: \
: "r" (x), "m<>" (*addr) \
: "r" (x), "m"UPD_CONSTR (*addr) \
: \
: label)
@ -253,7 +253,7 @@ extern long __get_user_bad(void);
".previous\n" \
EX_TABLE(1b, 3b) \
: "=r" (err), "=r" (x) \
: "m<>" (*addr), "i" (-EFAULT), "0" (err))
: "m"UPD_CONSTR (*addr), "i" (-EFAULT), "0" (err))
#ifdef __powerpc64__
#define __get_user_asm2(x, addr, err) \

View File

@ -121,9 +121,16 @@ extern void __restore_cpu_e6500(void);
PPC_FEATURE2_DARN | \
PPC_FEATURE2_SCV)
#define COMMON_USER_POWER10 COMMON_USER_POWER9
#define COMMON_USER2_POWER10 (COMMON_USER2_POWER9 | \
PPC_FEATURE2_ARCH_3_1 | \
PPC_FEATURE2_MMA)
#define COMMON_USER2_POWER10 (PPC_FEATURE2_ARCH_3_1 | \
PPC_FEATURE2_MMA | \
PPC_FEATURE2_ARCH_3_00 | \
PPC_FEATURE2_HAS_IEEE128 | \
PPC_FEATURE2_DARN | \
PPC_FEATURE2_SCV | \
PPC_FEATURE2_ARCH_2_07 | \
PPC_FEATURE2_DSCR | \
PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
PPC_FEATURE2_VEC_CRYPTO)
#ifdef CONFIG_PPC_BOOK3E_64
#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)

View File

@ -466,11 +466,6 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
return 0;
}
if (!pe->addr) {
eeh_stats.no_cfg_addr++;
return 0;
}
/*
* On PowerNV platform, we might already have fenced PHB
* there and we need take care of that firstly.

View File

@ -591,12 +591,11 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
long notrace machine_check_early(struct pt_regs *regs)
{
long handled = 0;
bool nested = in_nmi();
u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
this_cpu_set_ftrace_enabled(0);
if (!nested)
/* Do not use nmi_enter/exit for pseries hpte guest */
if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
nmi_enter();
hv_nmi_check_nonrecoverable(regs);
@ -607,7 +606,7 @@ long notrace machine_check_early(struct pt_regs *regs)
if (ppc_md.machine_check_early)
handled = ppc_md.machine_check_early(regs);
if (!nested)
if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
nmi_exit();
this_cpu_set_ftrace_enabled(ftrace_enabled);

View File

@ -1240,43 +1240,33 @@ static struct device_node *cpu_to_l2cache(int cpu)
return cache;
}
static bool update_mask_by_l2(int cpu)
static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
{
struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
struct device_node *l2_cache, *np;
cpumask_var_t mask;
int i;
if (has_big_cores)
submask_fn = cpu_smallcore_mask;
l2_cache = cpu_to_l2cache(cpu);
if (!l2_cache) {
struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
/*
* If no l2cache for this CPU, assume all siblings to share
* cache with this CPU.
*/
if (has_big_cores)
sibling_mask = cpu_smallcore_mask;
for_each_cpu(i, sibling_mask(cpu))
if (!l2_cache || !*mask) {
/* Assume only core siblings share cache with this CPU */
for_each_cpu(i, submask_fn(cpu))
set_cpus_related(cpu, i, cpu_l2_cache_mask);
return false;
}
alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu));
cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu));
if (has_big_cores)
submask_fn = cpu_smallcore_mask;
cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
/* Update l2-cache mask with all the CPUs that are part of submask */
or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
/* Skip all CPUs already part of current CPU l2-cache mask */
cpumask_andnot(mask, mask, cpu_l2_cache_mask(cpu));
cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(cpu));
for_each_cpu(i, mask) {
for_each_cpu(i, *mask) {
/*
* when updating the marks the current CPU has not been marked
* online, but we need to update the cache masks
@ -1286,15 +1276,14 @@ static bool update_mask_by_l2(int cpu)
/* Skip all CPUs already part of current CPU l2-cache */
if (np == l2_cache) {
or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
cpumask_andnot(mask, mask, submask_fn(i));
cpumask_andnot(*mask, *mask, submask_fn(i));
} else {
cpumask_andnot(mask, mask, cpu_l2_cache_mask(i));
cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(i));
}
of_node_put(np);
}
of_node_put(l2_cache);
free_cpumask_var(mask);
return true;
}
@ -1337,40 +1326,46 @@ static inline void add_cpu_to_smallcore_masks(int cpu)
}
}
static void update_coregroup_mask(int cpu)
static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
{
struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
cpumask_var_t mask;
int coregroup_id = cpu_to_coregroup_id(cpu);
int i;
alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu));
cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu));
if (shared_caches)
submask_fn = cpu_l2_cache_mask;
if (!*mask) {
/* Assume only siblings are part of this CPU's coregroup */
for_each_cpu(i, submask_fn(cpu))
set_cpus_related(cpu, i, cpu_coregroup_mask);
return;
}
cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
/* Update coregroup mask with all the CPUs that are part of submask */
or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
/* Skip all CPUs already part of coregroup mask */
cpumask_andnot(mask, mask, cpu_coregroup_mask(cpu));
cpumask_andnot(*mask, *mask, cpu_coregroup_mask(cpu));
for_each_cpu(i, mask) {
for_each_cpu(i, *mask) {
/* Skip all CPUs not part of this coregroup */
if (coregroup_id == cpu_to_coregroup_id(i)) {
or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
cpumask_andnot(mask, mask, submask_fn(i));
cpumask_andnot(*mask, *mask, submask_fn(i));
} else {
cpumask_andnot(mask, mask, cpu_coregroup_mask(i));
cpumask_andnot(*mask, *mask, cpu_coregroup_mask(i));
}
}
free_cpumask_var(mask);
}
static void add_cpu_to_masks(int cpu)
{
int first_thread = cpu_first_thread_sibling(cpu);
cpumask_var_t mask;
int i;
/*
@ -1384,10 +1379,15 @@ static void add_cpu_to_masks(int cpu)
set_cpus_related(i, cpu, cpu_sibling_mask);
add_cpu_to_smallcore_masks(cpu);
update_mask_by_l2(cpu);
/* In CPU-hotplug path, hence use GFP_ATOMIC */
alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
update_mask_by_l2(cpu, &mask);
if (has_coregroup_support())
update_coregroup_mask(cpu);
update_coregroup_mask(cpu, &mask);
free_cpumask_var(mask);
}
/* Activate a secondary processor. */

View File

@ -885,7 +885,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs)
{
unsigned int ra, rb, t, i, sel, instr, rc;
const void __user *addr;
u8 vbuf[16], *vdst;
u8 vbuf[16] __aligned(16), *vdst;
unsigned long ea, msr, msr_mask;
bool swap;

View File

@ -88,9 +88,14 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj,
const char *buf,
size_t count)
{
dump_send_ack(dump_obj->id);
sysfs_remove_file_self(&dump_obj->kobj, &attr->attr);
kobject_put(&dump_obj->kobj);
/*
* Try to self remove this attribute. If we are successful,
* delete the kobject itself.
*/
if (sysfs_remove_file_self(&dump_obj->kobj, &attr->attr)) {
dump_send_ack(dump_obj->id);
kobject_put(&dump_obj->kobj);
}
return count;
}
@ -318,15 +323,14 @@ static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj,
return count;
}
static struct dump_obj *create_dump_obj(uint32_t id, size_t size,
uint32_t type)
static void create_dump_obj(uint32_t id, size_t size, uint32_t type)
{
struct dump_obj *dump;
int rc;
dump = kzalloc(sizeof(*dump), GFP_KERNEL);
if (!dump)
return NULL;
return;
dump->kobj.kset = dump_kset;
@ -346,21 +350,39 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t size,
rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id);
if (rc) {
kobject_put(&dump->kobj);
return NULL;
return;
}
/*
* As soon as the sysfs file for this dump is created/activated there is
* a chance the opal_errd daemon (or any userspace) might read and
* acknowledge the dump before kobject_uevent() is called. If that
* happens then there is a potential race between
* dump_ack_store->kobject_put() and kobject_uevent() which leads to a
* use-after-free of a kernfs object resulting in a kernel crash.
*
* To avoid that, we need to take a reference on behalf of the bin file,
* so that our reference remains valid while we call kobject_uevent().
* We then drop our reference before exiting the function, leaving the
* bin file to drop the last reference (if it hasn't already).
*/
/* Take a reference for the bin file */
kobject_get(&dump->kobj);
rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr);
if (rc) {
if (rc == 0) {
kobject_uevent(&dump->kobj, KOBJ_ADD);
pr_info("%s: New platform dump. ID = 0x%x Size %u\n",
__func__, dump->id, dump->size);
} else {
/* Drop reference count taken for bin file */
kobject_put(&dump->kobj);
return NULL;
}
pr_info("%s: New platform dump. ID = 0x%x Size %u\n",
__func__, dump->id, dump->size);
kobject_uevent(&dump->kobj, KOBJ_ADD);
return dump;
/* Drop our reference */
kobject_put(&dump->kobj);
return;
}
static irqreturn_t process_dump(int irq, void *data)

View File

@ -72,9 +72,14 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj,
const char *buf,
size_t count)
{
opal_send_ack_elog(elog_obj->id);
sysfs_remove_file_self(&elog_obj->kobj, &attr->attr);
kobject_put(&elog_obj->kobj);
/*
* Try to self remove this attribute. If we are successful,
* delete the kobject itself.
*/
if (sysfs_remove_file_self(&elog_obj->kobj, &attr->attr)) {
opal_send_ack_elog(elog_obj->id);
kobject_put(&elog_obj->kobj);
}
return count;
}

View File

@ -521,18 +521,55 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
return 0; /* need to perform reset */
}
static int mce_handle_err_realmode(int disposition, u8 error_type)
{
#ifdef CONFIG_PPC_BOOK3S_64
if (disposition == RTAS_DISP_NOT_RECOVERED) {
switch (error_type) {
case MC_ERROR_TYPE_SLB:
case MC_ERROR_TYPE_ERAT:
/*
* Store the old slb content in paca before flushing.
* Print this when we go to virtual mode.
* There are chances that we may hit MCE again if there
* is a parity error on the SLB entry we trying to read
* for saving. Hence limit the slb saving to single
* level of recursion.
*/
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
flush_and_reload_slb();
disposition = RTAS_DISP_FULLY_RECOVERED;
break;
default:
break;
}
} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
/* Platform corrected itself but could be degraded */
pr_err("MCE: limited recovery, system may be degraded\n");
disposition = RTAS_DISP_FULLY_RECOVERED;
}
#endif
return disposition;
}
static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
static int mce_handle_err_virtmode(struct pt_regs *regs,
struct rtas_error_log *errp,
struct pseries_mc_errorlog *mce_log,
int disposition)
{
struct mce_error_info mce_err = { 0 };
unsigned long eaddr = 0, paddr = 0;
struct pseries_errorlog *pseries_log;
struct pseries_mc_errorlog *mce_log;
int disposition = rtas_error_disposition(errp);
int initiator = rtas_error_initiator(errp);
int severity = rtas_error_severity(errp);
unsigned long eaddr = 0, paddr = 0;
u8 error_type, err_sub_type;
if (!mce_log)
goto out;
error_type = mce_log->error_type;
err_sub_type = rtas_mc_error_sub_type(mce_log);
if (initiator == RTAS_INITIATOR_UNKNOWN)
mce_err.initiator = MCE_INITIATOR_UNKNOWN;
else if (initiator == RTAS_INITIATOR_CPU)
@ -571,18 +608,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
mce_err.error_class = MCE_ECLASS_UNKNOWN;
if (!rtas_error_extended(errp))
goto out;
pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
if (pseries_log == NULL)
goto out;
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
error_type = mce_log->error_type;
err_sub_type = rtas_mc_error_sub_type(mce_log);
switch (mce_log->error_type) {
switch (error_type) {
case MC_ERROR_TYPE_UE:
mce_err.error_type = MCE_ERROR_TYPE_UE;
mce_common_process_ue(regs, &mce_err);
@ -682,37 +708,31 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
break;
}
#ifdef CONFIG_PPC_BOOK3S_64
if (disposition == RTAS_DISP_NOT_RECOVERED) {
switch (error_type) {
case MC_ERROR_TYPE_SLB:
case MC_ERROR_TYPE_ERAT:
/*
* Store the old slb content in paca before flushing.
* Print this when we go to virtual mode.
* There are chances that we may hit MCE again if there
* is a parity error on the SLB entry we trying to read
* for saving. Hence limit the slb saving to single
* level of recursion.
*/
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
flush_and_reload_slb();
disposition = RTAS_DISP_FULLY_RECOVERED;
break;
default:
break;
}
} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
/* Platform corrected itself but could be degraded */
printk(KERN_ERR "MCE: limited recovery, system may "
"be degraded\n");
disposition = RTAS_DISP_FULLY_RECOVERED;
}
#endif
out:
save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
&mce_err, regs->nip, eaddr, paddr);
return disposition;
}
static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
{
struct pseries_errorlog *pseries_log;
struct pseries_mc_errorlog *mce_log = NULL;
int disposition = rtas_error_disposition(errp);
u8 error_type;
if (!rtas_error_extended(errp))
goto out;
pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
if (!pseries_log)
goto out;
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
error_type = mce_log->error_type;
disposition = mce_handle_err_realmode(disposition, error_type);
/*
* Enable translation as we will be accessing per-cpu variables
* in save_mce_event() which may fall outside RMO region, also
@ -723,10 +743,10 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
* Note: All the realmode handling like flushing SLB entries for
* SLB multihit is done by now.
*/
out:
mtmsr(mfmsr() | MSR_IR | MSR_DR);
save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
&mce_err, regs->nip, eaddr, paddr);
disposition = mce_handle_err_virtmode(regs, errp, mce_log,
disposition);
return disposition;
}

View File

@ -266,8 +266,12 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
}
rc = 0;
/* offset = 0 no alignment fault, so skip */
for (offset = 1; offset < 16; offset++) {
/*
* offset = 0 is aligned but tests the workaround for the P9N
* DD2.1 vector CI load issue (see 5080332c2c89 "powerpc/64s:
* Add workaround for P9 vector CI load issue")
*/
for (offset = 0; offset < 16; offset++) {
width = 16; /* vsx == 16 bytes */
r = 0;