linux-next/arch/powerpc/kvm/book3s_hv_tm_builtin.c
Gustavo Romero 1dff3064c7 KVM: PPC: Book3S HV: Treat TM-related invalid form instructions on P9 like the valid ones
On P9 DD2.2 due to a CPU defect some TM instructions need to be emulated by
KVM. This is handled at first by the hardware raising a softpatch interrupt
when certain TM instructions that need KVM assistance are executed in the
guest. Althought some TM instructions per Power ISA are invalid forms they
can raise a softpatch interrupt too. For instance, 'tresume.' instruction
as defined in the ISA must have bit 31 set (1), but an instruction that
matches 'tresume.' PO and XO opcode fields but has bit 31 not set (0), like
0x7cfe9ddc, also raises a softpatch interrupt. Similarly for 'treclaim.'
and 'trechkpt.' instructions with bit 31 = 0, i.e. 0x7c00075c and
0x7c0007dc, respectively. Hence, if a code like the following is executed
in the guest it will raise a softpatch interrupt just like a 'tresume.'
when the TM facility is enabled ('tabort. 0' in the example is used only
to enable the TM facility):

int main() { asm("tabort. 0; .long 0x7cfe9ddc;"); }

Currently in such a case KVM throws a complete trace like:

[345523.705984] WARNING: CPU: 24 PID: 64413 at arch/powerpc/kvm/book3s_hv_tm.c:211 kvmhv_p9_tm_emulation+0x68/0x620 [kvm_hv]
[345523.705985] Modules linked in: kvm_hv(E) xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp ip6table_mangle ip6table_nat
iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ebtable_filter ebtables ip6table_filter
ip6_tables iptable_filter bridge stp llc sch_fq_codel ipmi_powernv at24 vmx_crypto ipmi_devintf ipmi_msghandler
ibmpowernv uio_pdrv_genirq kvm opal_prd uio leds_powernv ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp
libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456
async_raid6_recov async_memcpy async_pq async_xor async_tx libcrc32c xor raid6_pq raid1 raid0 multipath linear tg3
crct10dif_vpmsum crc32c_vpmsum ipr [last unloaded: kvm_hv]
[345523.706030] CPU: 24 PID: 64413 Comm: CPU 0/KVM Tainted: G        W   E     5.5.0+ #1
[345523.706031] NIP:  c0080000072cb9c0 LR: c0080000072b5e80 CTR: c0080000085c7850
[345523.706034] REGS: c000000399467680 TRAP: 0700   Tainted: G        W   E      (5.5.0+)
[345523.706034] MSR:  900000010282b033 <SF,HV,VEC,VSX,EE,FP,ME,IR,DR,RI,LE,TM[E]>  CR: 24022428  XER: 00000000
[345523.706042] CFAR: c0080000072b5e7c IRQMASK: 0
                GPR00: c0080000072b5e80 c000000399467910 c0080000072db500 c000000375ccc720
                GPR04: c000000375ccc720 00000003fbec0000 0000a10395dda5a6 0000000000000000
                GPR08: 000000007cfe9ddc 7cfe9ddc000005dc 7cfe9ddc7c0005dc c0080000072cd530
                GPR12: c0080000085c7850 c0000003fffeb800 0000000000000001 00007dfb737f0000
                GPR16: c0002001edcca558 0000000000000000 0000000000000000 0000000000000001
                GPR20: c000000001b21258 c0002001edcca558 0000000000000018 0000000000000000
                GPR24: 0000000001000000 ffffffffffffffff 0000000000000001 0000000000001500
                GPR28: c0002001edcc4278 c00000037dd80000 800000050280f033 c000000375ccc720
[345523.706062] NIP [c0080000072cb9c0] kvmhv_p9_tm_emulation+0x68/0x620 [kvm_hv]
[345523.706065] LR [c0080000072b5e80] kvmppc_handle_exit_hv.isra.53+0x3e8/0x798 [kvm_hv]
[345523.706066] Call Trace:
[345523.706069] [c000000399467910] [c000000399467940] 0xc000000399467940 (unreliable)
[345523.706071] [c000000399467950] [c000000399467980] 0xc000000399467980
[345523.706075] [c0000003994679f0] [c0080000072bd1c4] kvmhv_run_single_vcpu+0xa1c/0xb80 [kvm_hv]
[345523.706079] [c000000399467ac0] [c0080000072bd8e0] kvmppc_vcpu_run_hv+0x5b8/0xb00 [kvm_hv]
[345523.706087] [c000000399467b90] [c0080000085c93cc] kvmppc_vcpu_run+0x34/0x48 [kvm]
[345523.706095] [c000000399467bb0] [c0080000085c582c] kvm_arch_vcpu_ioctl_run+0x244/0x420 [kvm]
[345523.706101] [c000000399467c40] [c0080000085b7498] kvm_vcpu_ioctl+0x3d0/0x7b0 [kvm]
[345523.706105] [c000000399467db0] [c0000000004adf9c] ksys_ioctl+0x13c/0x170
[345523.706107] [c000000399467e00] [c0000000004adff8] sys_ioctl+0x28/0x80
[345523.706111] [c000000399467e20] [c00000000000b278] system_call+0x5c/0x68
[345523.706112] Instruction dump:
[345523.706114] 419e0390 7f8a4840 409d0048 6d497c00 2f89075d 419e021c 6d497c00 2f8907dd
[345523.706119] 419e01c0 6d497c00 2f8905dd 419e00a4 <0fe00000> 38210040 38600000 ebc1fff0

and then treats the executed instruction as a 'nop'.

However the POWER9 User's Manual, in section "4.6.10 Book II Invalid
Forms", informs that for TM instructions bit 31 is in fact ignored, thus
for the TM-related invalid forms ignoring bit 31 and handling them like the
valid forms is an acceptable way to handle them. POWER8 behaves the same
way too.

This commit changes the handling of the cases here described by treating
the TM-related invalid forms that can generate a softpatch interrupt
just like their valid forms (w/ bit 31 = 1) instead of as a 'nop' and by
gently reporting any other unrecognized case to the host and treating it as
illegal instruction instead of throwing a trace and treating it as a 'nop'.

Signed-off-by: Gustavo Romero <gromero@linux.ibm.com>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Acked-By: Michael Neuling <mikey@neuling.org>
Reviewed-by: Leonardo Bras <leonardo@linux.ibm.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2020-03-19 16:39:52 +11:00

120 lines
3.9 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
#include <linux/kvm_host.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_book3s_64.h>
#include <asm/reg.h>
#include <asm/ppc-opcode.h>
/*
* This handles the cases where the guest is in real suspend mode
* and we want to get back to the guest without dooming the transaction.
* The caller has checked that the guest is in real-suspend mode
* (MSR[TS] = S and the fake-suspend flag is not set).
*/
int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
{
u32 instr = vcpu->arch.emul_inst;
u64 newmsr, msr, bescr;
int rs;
/*
* rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
* in these instructions, so masking bit 31 out doesn't change these
* instructions. For the tsr. instruction if bit 31 = 0 then it is per
* ISA an invalid form, however P9 UM, in section 4.6.10 Book II Invalid
* Forms, informs specifically that ignoring bit 31 is an acceptable way
* to handle TM-related invalid forms that have bit 31 = 0. Moreover,
* for emulation purposes both forms (w/ and wo/ bit 31 set) can
* generate a softpatch interrupt. Hence both forms are handled below
* for tsr. to make them behave the same way.
*/
switch (instr & PO_XOP_OPCODE_MASK) {
case PPC_INST_RFID:
/* XXX do we need to check for PR=0 here? */
newmsr = vcpu->arch.shregs.srr1;
/* should only get here for Sx -> T1 transition */
if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
return 0;
newmsr = sanitize_msr(newmsr);
vcpu->arch.shregs.msr = newmsr;
vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
return 1;
case PPC_INST_RFEBB:
/* check for PR=1 and arch 2.06 bit set in PCR */
msr = vcpu->arch.shregs.msr;
if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
return 0;
/* check EBB facility is available */
if (!(vcpu->arch.hfscr & HFSCR_EBB) ||
((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB)))
return 0;
bescr = mfspr(SPRN_BESCR);
/* expect to see a S->T transition requested */
if (((bescr >> 30) & 3) != 2)
return 0;
bescr &= ~BESCR_GE;
if (instr & (1 << 11))
bescr |= BESCR_GE;
mtspr(SPRN_BESCR, bescr);
msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
vcpu->arch.shregs.msr = msr;
vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
vcpu->arch.regs.nip = mfspr(SPRN_EBBRR);
return 1;
case PPC_INST_MTMSRD:
/* XXX do we need to check for PR=0 here? */
rs = (instr >> 21) & 0x1f;
newmsr = kvmppc_get_gpr(vcpu, rs);
msr = vcpu->arch.shregs.msr;
/* check this is a Sx -> T1 transition */
if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
return 0;
/* mtmsrd doesn't change LE */
newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
newmsr = sanitize_msr(newmsr);
vcpu->arch.shregs.msr = newmsr;
return 1;
/* ignore bit 31, see comment above */
case (PPC_INST_TSR & PO_XOP_OPCODE_MASK):
/* we know the MSR has the TS field = S (0b01) here */
msr = vcpu->arch.shregs.msr;
/* check for PR=1 and arch 2.06 bit set in PCR */
if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
return 0;
/* check for TM disabled in the HFSCR or MSR */
if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM))
return 0;
/* L=1 => tresume => set TS to T (0b10) */
if (instr & (1 << 21))
vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
/* Set CR0 to 0b0010 */
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
0x20000000;
return 1;
}
return 0;
}
/*
* This is called when we are returning to a guest in TM transactional
* state. We roll the guest state back to the checkpointed state.
*/
void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
{
vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */
vcpu->arch.regs.nip = vcpu->arch.tfhar;
copy_from_checkpoint(vcpu);
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | 0xa0000000;
}