2019-05-29 07:12:40 -07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2008-04-16 23:28:09 -05:00
|
|
|
/*
|
|
|
|
*
|
|
|
|
* Copyright IBM Corp. 2008
|
|
|
|
*
|
|
|
|
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __POWERPC_KVM_ASM_H__
|
|
|
|
#define __POWERPC_KVM_ASM_H__
|
|
|
|
|
2012-04-25 13:48:54 +02:00
|
|
|
#ifdef __ASSEMBLY__
|
2012-04-25 01:26:43 +00:00
|
|
|
#ifdef CONFIG_64BIT
|
|
|
|
#define PPC_STD(sreg, offset, areg) std sreg, (offset)(areg)
|
|
|
|
#define PPC_LD(treg, offset, areg) ld treg, (offset)(areg)
|
|
|
|
#else
|
|
|
|
#define PPC_STD(sreg, offset, areg) stw sreg, (offset+4)(areg)
|
|
|
|
#define PPC_LD(treg, offset, areg) lwz treg, (offset+4)(areg)
|
|
|
|
#endif
|
2012-04-25 13:48:54 +02:00
|
|
|
#endif
|
2012-04-25 01:26:43 +00:00
|
|
|
|
2008-04-16 23:28:09 -05:00
|
|
|
/* IVPR must be 64KiB-aligned. */
|
|
|
|
#define VCPU_SIZE_ORDER 4
|
|
|
|
#define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12)
|
|
|
|
#define VCPU_SIZE_BYTES (1<<VCPU_SIZE_LOG)
|
|
|
|
|
|
|
|
#define BOOKE_INTERRUPT_CRITICAL 0
|
|
|
|
#define BOOKE_INTERRUPT_MACHINE_CHECK 1
|
|
|
|
#define BOOKE_INTERRUPT_DATA_STORAGE 2
|
|
|
|
#define BOOKE_INTERRUPT_INST_STORAGE 3
|
|
|
|
#define BOOKE_INTERRUPT_EXTERNAL 4
|
|
|
|
#define BOOKE_INTERRUPT_ALIGNMENT 5
|
|
|
|
#define BOOKE_INTERRUPT_PROGRAM 6
|
|
|
|
#define BOOKE_INTERRUPT_FP_UNAVAIL 7
|
|
|
|
#define BOOKE_INTERRUPT_SYSCALL 8
|
|
|
|
#define BOOKE_INTERRUPT_AP_UNAVAIL 9
|
|
|
|
#define BOOKE_INTERRUPT_DECREMENTER 10
|
|
|
|
#define BOOKE_INTERRUPT_FIT 11
|
|
|
|
#define BOOKE_INTERRUPT_WATCHDOG 12
|
|
|
|
#define BOOKE_INTERRUPT_DTLB_MISS 13
|
|
|
|
#define BOOKE_INTERRUPT_ITLB_MISS 14
|
|
|
|
#define BOOKE_INTERRUPT_DEBUG 15
|
2009-01-03 16:23:13 -06:00
|
|
|
|
|
|
|
/* E500 */
|
2014-09-01 13:17:43 +03:00
|
|
|
#ifdef CONFIG_SPE_POSSIBLE
|
|
|
|
#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
|
|
|
|
#define BOOKE_INTERRUPT_SPE_FP_DATA 33
|
2009-01-03 16:23:13 -06:00
|
|
|
#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
|
2014-09-01 13:17:43 +03:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_E500MC
|
|
|
|
#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32
|
|
|
|
#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33
|
|
|
|
#endif
|
|
|
|
|
2009-01-03 16:23:13 -06:00
|
|
|
#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
|
2011-12-20 15:34:43 +00:00
|
|
|
#define BOOKE_INTERRUPT_DOORBELL 36
|
|
|
|
#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37
|
|
|
|
|
|
|
|
/* booke_hv */
|
|
|
|
#define BOOKE_INTERRUPT_GUEST_DBELL 38
|
|
|
|
#define BOOKE_INTERRUPT_GUEST_DBELL_CRIT 39
|
|
|
|
#define BOOKE_INTERRUPT_HV_SYSCALL 40
|
|
|
|
#define BOOKE_INTERRUPT_HV_PRIV 41
|
2013-08-08 15:56:09 +03:00
|
|
|
#define BOOKE_INTERRUPT_LRAT_ERROR 42
|
2008-04-16 23:28:09 -05:00
|
|
|
|
2009-10-30 05:47:03 +00:00
|
|
|
/* book3s */
|
|
|
|
|
|
|
|
#define BOOK3S_INTERRUPT_SYSTEM_RESET 0x100
|
|
|
|
#define BOOK3S_INTERRUPT_MACHINE_CHECK 0x200
|
|
|
|
#define BOOK3S_INTERRUPT_DATA_STORAGE 0x300
|
|
|
|
#define BOOK3S_INTERRUPT_DATA_SEGMENT 0x380
|
|
|
|
#define BOOK3S_INTERRUPT_INST_STORAGE 0x400
|
|
|
|
#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480
|
|
|
|
#define BOOK3S_INTERRUPT_EXTERNAL 0x500
|
2011-04-05 14:20:31 +10:00
|
|
|
#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502
|
2009-10-30 05:47:03 +00:00
|
|
|
#define BOOK3S_INTERRUPT_ALIGNMENT 0x600
|
|
|
|
#define BOOK3S_INTERRUPT_PROGRAM 0x700
|
|
|
|
#define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800
|
|
|
|
#define BOOK3S_INTERRUPT_DECREMENTER 0x900
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 00:21:34 +00:00
|
|
|
#define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980
|
KVM: PPC: Book3S PR: Cope with doorbell interrupts
When the PR host is running on a POWER8 machine in POWER8 mode, it
will use doorbell interrupts for IPIs. If one of them arrives while
we are in the guest, we pop out of the guest with trap number 0xA00,
which isn't handled by kvmppc_handle_exit_pr, leading to the following
BUG_ON:
[ 331.436215] exit_nr=0xa00 | pc=0x1d2c | msr=0x800000000000d032
[ 331.437522] ------------[ cut here ]------------
[ 331.438296] kernel BUG at arch/powerpc/kvm/book3s_pr.c:982!
[ 331.439063] Oops: Exception in kernel mode, sig: 5 [#2]
[ 331.439819] SMP NR_CPUS=1024 NUMA pSeries
[ 331.440552] Modules linked in: tun nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6t_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw virtio_net kvm binfmt_misc ibmvscsi scsi_transport_srp scsi_tgt virtio_blk
[ 331.447614] CPU: 11 PID: 1296 Comm: qemu-system-ppc Tainted: G D 3.11.7-200.2.fc19.ppc64p7 #1
[ 331.448920] task: c0000003bdc8c000 ti: c0000003bd32c000 task.ti: c0000003bd32c000
[ 331.450088] NIP: d0000000025d6b9c LR: d0000000025d6b98 CTR: c0000000004cfdd0
[ 331.451042] REGS: c0000003bd32f420 TRAP: 0700 Tainted: G D (3.11.7-200.2.fc19.ppc64p7)
[ 331.452331] MSR: 800000000282b032 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI> CR: 28004824 XER: 20000000
[ 331.454616] SOFTE: 1
[ 331.455106] CFAR: c000000000848bb8
[ 331.455726]
GPR00: d0000000025d6b98 c0000003bd32f6a0 d0000000026017b8 0000000000000032
GPR04: c0000000018627f8 c000000001873208 320d0a3030303030 3030303030643033
GPR08: c000000000c490a8 0000000000000000 0000000000000000 0000000000000002
GPR12: 0000000028004822 c00000000fdc6300 0000000000000000 00000100076ec310
GPR16: 000000002ae343b8 00003ffffd397398 0000000000000000 0000000000000000
GPR20: 00000100076f16f4 00000100076ebe60 0000000000000008 ffffffffffffffff
GPR24: 0000000000000000 0000008001041e60 0000000000000000 0000008001040ce8
GPR28: c0000003a2d80000 0000000000000a00 0000000000000001 c0000003a2681810
[ 331.466504] NIP [d0000000025d6b9c] .kvmppc_handle_exit_pr+0x75c/0xa80 [kvm]
[ 331.466999] LR [d0000000025d6b98] .kvmppc_handle_exit_pr+0x758/0xa80 [kvm]
[ 331.467517] Call Trace:
[ 331.467909] [c0000003bd32f6a0] [d0000000025d6b98] .kvmppc_handle_exit_pr+0x758/0xa80 [kvm] (unreliable)
[ 331.468553] [c0000003bd32f750] [d0000000025d98f0] kvm_start_lightweight+0xb4/0xc4 [kvm]
[ 331.469189] [c0000003bd32f920] [d0000000025d7648] .kvmppc_vcpu_run_pr+0xd8/0x270 [kvm]
[ 331.469838] [c0000003bd32f9c0] [d0000000025cf748] .kvmppc_vcpu_run+0xc8/0xf0 [kvm]
[ 331.470790] [c0000003bd32fa50] [d0000000025cc19c] .kvm_arch_vcpu_ioctl_run+0x5c/0x1b0 [kvm]
[ 331.471401] [c0000003bd32fae0] [d0000000025c4888] .kvm_vcpu_ioctl+0x478/0x730 [kvm]
[ 331.472026] [c0000003bd32fc90] [c00000000026192c] .do_vfs_ioctl+0x4dc/0x7a0
[ 331.472561] [c0000003bd32fd80] [c000000000261cc4] .SyS_ioctl+0xd4/0xf0
[ 331.473095] [c0000003bd32fe30] [c000000000009ed8] syscall_exit+0x0/0x98
[ 331.473633] Instruction dump:
[ 331.473766] 4bfff9b4 2b9d0800 419efc18 60000000 60420000 3d220000 e8bf11a0 e8df12a8
[ 331.474733] 7fa4eb78 e8698660 48015165 e8410028 <0fe00000> 813f00e4 3ba00000 39290001
[ 331.475386] ---[ end trace 49fc47d994c1f8f2 ]---
[ 331.479817]
This fixes the problem by making kvmppc_handle_exit_pr() recognize the
interrupt. We also need to jump to the doorbell interrupt handler in
book3s_segment.S to handle the interrupt on the way out of the guest.
Having done that, there's nothing further to be done in
kvmppc_handle_exit_pr().
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2014-01-08 21:25:36 +11:00
|
|
|
#define BOOK3S_INTERRUPT_DOORBELL 0xa00
|
2009-10-30 05:47:03 +00:00
|
|
|
#define BOOK3S_INTERRUPT_SYSCALL 0xc00
|
|
|
|
#define BOOK3S_INTERRUPT_TRACE 0xd00
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 00:21:34 +00:00
|
|
|
#define BOOK3S_INTERRUPT_H_DATA_STORAGE 0xe00
|
|
|
|
#define BOOK3S_INTERRUPT_H_INST_STORAGE 0xe20
|
|
|
|
#define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40
|
2014-07-29 18:40:01 +05:30
|
|
|
#define BOOK3S_INTERRUPT_HMI 0xe60
|
2014-01-08 21:25:28 +11:00
|
|
|
#define BOOK3S_INTERRUPT_H_DOORBELL 0xe80
|
2016-11-22 14:30:14 +11:00
|
|
|
#define BOOK3S_INTERRUPT_H_VIRT 0xea0
|
2009-10-30 05:47:03 +00:00
|
|
|
#define BOOK3S_INTERRUPT_PERFMON 0xf00
|
|
|
|
#define BOOK3S_INTERRUPT_ALTIVEC 0xf20
|
|
|
|
#define BOOK3S_INTERRUPT_VSX 0xf40
|
2014-04-29 16:48:44 +02:00
|
|
|
#define BOOK3S_INTERRUPT_FAC_UNAVAIL 0xf60
|
2014-01-08 21:25:23 +11:00
|
|
|
#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80
|
2009-10-30 05:47:03 +00:00
|
|
|
|
2016-08-19 15:35:52 +10:00
|
|
|
/* book3s_hv */
|
|
|
|
|
KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9
POWER9 has hardware bugs relating to transactional memory and thread
reconfiguration (changes to hardware SMT mode). Specifically, the core
does not have enough storage to store a complete checkpoint of all the
architected state for all four threads. The DD2.2 version of POWER9
includes hardware modifications designed to allow hypervisor software
to implement workarounds for these problems. This patch implements
those workarounds in KVM code so that KVM guests see a full, working
transactional memory implementation.
The problems center around the use of TM suspended state, where the
CPU has a checkpointed state but execution is not transactional. The
workaround is to implement a "fake suspend" state, which looks to the
guest like suspended state but the CPU does not store a checkpoint.
In this state, any instruction that would cause a transition to
transactional state (rfid, rfebb, mtmsrd, tresume) or would use the
checkpointed state (treclaim) causes a "soft patch" interrupt (vector
0x1500) to the hypervisor so that it can be emulated. The trechkpt
instruction also causes a soft patch interrupt.
On POWER9 DD2.2, we avoid returning to the guest in any state which
would require a checkpoint to be present. The trechkpt in the guest
entry path which would normally create that checkpoint is replaced by
either a transition to fake suspend state, if the guest is in suspend
state, or a rollback to the pre-transactional state if the guest is in
transactional state. Fake suspend state is indicated by a flag in the
PACA plus a new bit in the PSSCR. The new PSSCR bit is write-only and
reads back as 0.
On exit from the guest, if the guest is in fake suspend state, we still
do the treclaim instruction as we would in real suspend state, in order
to get into non-transactional state, but we do not save the resulting
register state since there was no checkpoint.
Emulation of the instructions that cause a softpatch interrupt is
handled in two paths. If the guest is in real suspend mode, we call
kvmhv_p9_tm_emulation_early() to handle the cases where the guest is
transitioning to transactional state. This is called before we do the
treclaim in the guest exit path; because we haven't done treclaim, we
can get back to the guest with the transaction still active. If the
instruction is a case that kvmhv_p9_tm_emulation_early() doesn't
handle, or if the guest is in fake suspend state, then we proceed to
do the complete guest exit path and subsequently call
kvmhv_p9_tm_emulation() in host context with the MMU on. This handles
all the cases including the cases that generate program interrupts
(illegal instruction or TM Bad Thing) and facility unavailable
interrupts.
The emulation is reasonably straightforward and is mostly concerned
with checking for exception conditions and updating the state of
registers such as MSR and CR0. The treclaim emulation takes care to
ensure that the TEXASR register gets updated as if it were the guest
treclaim instruction that had done failure recording, not the treclaim
done in hypervisor state in the guest exit path.
With this, the KVM_CAP_PPC_HTM capability returns true (1) even if
transactional memory is not available to host userspace.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-03-21 21:32:01 +11:00
|
|
|
#define BOOK3S_INTERRUPT_HV_SOFTPATCH 0x1500
|
|
|
|
|
2016-08-19 15:35:52 +10:00
|
|
|
/*
|
|
|
|
* Special trap used to indicate to host that this is a
|
|
|
|
* passthrough interrupt that could not be handled
|
|
|
|
* completely in the guest.
|
|
|
|
*/
|
|
|
|
#define BOOK3S_INTERRUPT_HV_RM_HARD 0x5555
|
|
|
|
|
2009-10-30 05:47:03 +00:00
|
|
|
#define BOOK3S_IRQPRIO_SYSTEM_RESET 0
|
|
|
|
#define BOOK3S_IRQPRIO_DATA_SEGMENT 1
|
|
|
|
#define BOOK3S_IRQPRIO_INST_SEGMENT 2
|
|
|
|
#define BOOK3S_IRQPRIO_DATA_STORAGE 3
|
|
|
|
#define BOOK3S_IRQPRIO_INST_STORAGE 4
|
|
|
|
#define BOOK3S_IRQPRIO_ALIGNMENT 5
|
|
|
|
#define BOOK3S_IRQPRIO_PROGRAM 6
|
|
|
|
#define BOOK3S_IRQPRIO_FP_UNAVAIL 7
|
|
|
|
#define BOOK3S_IRQPRIO_ALTIVEC 8
|
|
|
|
#define BOOK3S_IRQPRIO_VSX 9
|
2014-04-29 16:48:44 +02:00
|
|
|
#define BOOK3S_IRQPRIO_FAC_UNAVAIL 10
|
|
|
|
#define BOOK3S_IRQPRIO_SYSCALL 11
|
|
|
|
#define BOOK3S_IRQPRIO_MACHINE_CHECK 12
|
|
|
|
#define BOOK3S_IRQPRIO_DEBUG 13
|
|
|
|
#define BOOK3S_IRQPRIO_EXTERNAL 14
|
|
|
|
#define BOOK3S_IRQPRIO_DECREMENTER 15
|
|
|
|
#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16
|
2018-10-08 16:30:48 +11:00
|
|
|
#define BOOK3S_IRQPRIO_MAX 17
|
2009-10-30 05:47:03 +00:00
|
|
|
|
|
|
|
#define BOOK3S_HFLAG_DCBZ32 0x1
|
2009-11-30 03:02:02 +00:00
|
|
|
#define BOOK3S_HFLAG_SLB 0x2
|
2010-02-19 11:00:32 +01:00
|
|
|
#define BOOK3S_HFLAG_PAIRED_SINGLE 0x4
|
2010-04-20 02:49:54 +02:00
|
|
|
#define BOOK3S_HFLAG_NATIVE_PS 0x8
|
2013-09-20 14:52:44 +10:00
|
|
|
#define BOOK3S_HFLAG_MULTI_PGSIZE 0x10
|
|
|
|
#define BOOK3S_HFLAG_NEW_TLBIE 0x20
|
2014-07-11 02:58:58 +02:00
|
|
|
#define BOOK3S_HFLAG_SPLIT_HACK 0x40
|
2009-10-30 05:47:03 +00:00
|
|
|
|
2008-04-16 23:28:09 -05:00
|
|
|
#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
|
|
|
|
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
|
2012-10-15 01:16:48 +00:00
|
|
|
#define RESUME_FLAG_ARCH1 (1<<2)
|
2016-08-19 15:35:52 +10:00
|
|
|
#define RESUME_FLAG_ARCH2 (1<<3)
|
2008-04-16 23:28:09 -05:00
|
|
|
|
|
|
|
#define RESUME_GUEST 0
|
|
|
|
#define RESUME_GUEST_NV RESUME_FLAG_NV
|
|
|
|
#define RESUME_HOST RESUME_FLAG_HOST
|
|
|
|
#define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV)
|
|
|
|
|
2010-01-08 02:58:04 +01:00
|
|
|
#define KVM_GUEST_MODE_NONE 0
|
|
|
|
#define KVM_GUEST_MODE_GUEST 1
|
|
|
|
#define KVM_GUEST_MODE_SKIP 2
|
KVM: PPC: Book3S HV: Better handling of exceptions that happen in real mode
When an interrupt or exception happens in the guest that comes to the
host, the CPU goes to hypervisor real mode (MMU off) to handle the
exception but doesn't change the MMU context. After saving a few
registers, we then clear the "in guest" flag. If, for any reason,
we get an exception in the real-mode code, that then gets handled
by the normal kernel exception handlers, which turn the MMU on. This
is disastrous if the MMU is still set to the guest context, since we
end up executing instructions from random places in the guest kernel
with hypervisor privilege.
In order to catch this situation, we define a new value for the "in guest"
flag, KVM_GUEST_MODE_HOST_HV, to indicate that we are in hypervisor real
mode with guest MMU context. If the "in guest" flag is set to this value,
we branch off to an emergency handler. For the moment, this just does
a branch to self to stop the CPU from doing anything further.
While we're here, we define another new flag value to indicate that we
are in a HV guest, as distinct from a PR guest. This will be useful
when we have a kernel that can support both PR and HV guests concurrently.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-10-04 21:45:04 +10:00
|
|
|
#define KVM_GUEST_MODE_GUEST_HV 3
|
|
|
|
#define KVM_GUEST_MODE_HOST_HV 4
|
2010-01-08 02:58:04 +01:00
|
|
|
|
|
|
|
#define KVM_INST_FETCH_FAILED -1
|
|
|
|
|
KVM: PPC: Book3S HV: Treat TM-related invalid form instructions on P9 like the valid ones
On P9 DD2.2 due to a CPU defect some TM instructions need to be emulated by
KVM. This is handled at first by the hardware raising a softpatch interrupt
when certain TM instructions that need KVM assistance are executed in the
guest. Althought some TM instructions per Power ISA are invalid forms they
can raise a softpatch interrupt too. For instance, 'tresume.' instruction
as defined in the ISA must have bit 31 set (1), but an instruction that
matches 'tresume.' PO and XO opcode fields but has bit 31 not set (0), like
0x7cfe9ddc, also raises a softpatch interrupt. Similarly for 'treclaim.'
and 'trechkpt.' instructions with bit 31 = 0, i.e. 0x7c00075c and
0x7c0007dc, respectively. Hence, if a code like the following is executed
in the guest it will raise a softpatch interrupt just like a 'tresume.'
when the TM facility is enabled ('tabort. 0' in the example is used only
to enable the TM facility):
int main() { asm("tabort. 0; .long 0x7cfe9ddc;"); }
Currently in such a case KVM throws a complete trace like:
[345523.705984] WARNING: CPU: 24 PID: 64413 at arch/powerpc/kvm/book3s_hv_tm.c:211 kvmhv_p9_tm_emulation+0x68/0x620 [kvm_hv]
[345523.705985] Modules linked in: kvm_hv(E) xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp ip6table_mangle ip6table_nat
iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ebtable_filter ebtables ip6table_filter
ip6_tables iptable_filter bridge stp llc sch_fq_codel ipmi_powernv at24 vmx_crypto ipmi_devintf ipmi_msghandler
ibmpowernv uio_pdrv_genirq kvm opal_prd uio leds_powernv ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp
libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456
async_raid6_recov async_memcpy async_pq async_xor async_tx libcrc32c xor raid6_pq raid1 raid0 multipath linear tg3
crct10dif_vpmsum crc32c_vpmsum ipr [last unloaded: kvm_hv]
[345523.706030] CPU: 24 PID: 64413 Comm: CPU 0/KVM Tainted: G W E 5.5.0+ #1
[345523.706031] NIP: c0080000072cb9c0 LR: c0080000072b5e80 CTR: c0080000085c7850
[345523.706034] REGS: c000000399467680 TRAP: 0700 Tainted: G W E (5.5.0+)
[345523.706034] MSR: 900000010282b033 <SF,HV,VEC,VSX,EE,FP,ME,IR,DR,RI,LE,TM[E]> CR: 24022428 XER: 00000000
[345523.706042] CFAR: c0080000072b5e7c IRQMASK: 0
GPR00: c0080000072b5e80 c000000399467910 c0080000072db500 c000000375ccc720
GPR04: c000000375ccc720 00000003fbec0000 0000a10395dda5a6 0000000000000000
GPR08: 000000007cfe9ddc 7cfe9ddc000005dc 7cfe9ddc7c0005dc c0080000072cd530
GPR12: c0080000085c7850 c0000003fffeb800 0000000000000001 00007dfb737f0000
GPR16: c0002001edcca558 0000000000000000 0000000000000000 0000000000000001
GPR20: c000000001b21258 c0002001edcca558 0000000000000018 0000000000000000
GPR24: 0000000001000000 ffffffffffffffff 0000000000000001 0000000000001500
GPR28: c0002001edcc4278 c00000037dd80000 800000050280f033 c000000375ccc720
[345523.706062] NIP [c0080000072cb9c0] kvmhv_p9_tm_emulation+0x68/0x620 [kvm_hv]
[345523.706065] LR [c0080000072b5e80] kvmppc_handle_exit_hv.isra.53+0x3e8/0x798 [kvm_hv]
[345523.706066] Call Trace:
[345523.706069] [c000000399467910] [c000000399467940] 0xc000000399467940 (unreliable)
[345523.706071] [c000000399467950] [c000000399467980] 0xc000000399467980
[345523.706075] [c0000003994679f0] [c0080000072bd1c4] kvmhv_run_single_vcpu+0xa1c/0xb80 [kvm_hv]
[345523.706079] [c000000399467ac0] [c0080000072bd8e0] kvmppc_vcpu_run_hv+0x5b8/0xb00 [kvm_hv]
[345523.706087] [c000000399467b90] [c0080000085c93cc] kvmppc_vcpu_run+0x34/0x48 [kvm]
[345523.706095] [c000000399467bb0] [c0080000085c582c] kvm_arch_vcpu_ioctl_run+0x244/0x420 [kvm]
[345523.706101] [c000000399467c40] [c0080000085b7498] kvm_vcpu_ioctl+0x3d0/0x7b0 [kvm]
[345523.706105] [c000000399467db0] [c0000000004adf9c] ksys_ioctl+0x13c/0x170
[345523.706107] [c000000399467e00] [c0000000004adff8] sys_ioctl+0x28/0x80
[345523.706111] [c000000399467e20] [c00000000000b278] system_call+0x5c/0x68
[345523.706112] Instruction dump:
[345523.706114] 419e0390 7f8a4840 409d0048 6d497c00 2f89075d 419e021c 6d497c00 2f8907dd
[345523.706119] 419e01c0 6d497c00 2f8905dd 419e00a4 <0fe00000> 38210040 38600000 ebc1fff0
and then treats the executed instruction as a 'nop'.
However the POWER9 User's Manual, in section "4.6.10 Book II Invalid
Forms", informs that for TM instructions bit 31 is in fact ignored, thus
for the TM-related invalid forms ignoring bit 31 and handling them like the
valid forms is an acceptable way to handle them. POWER8 behaves the same
way too.
This commit changes the handling of the cases here described by treating
the TM-related invalid forms that can generate a softpatch interrupt
just like their valid forms (w/ bit 31 = 1) instead of as a 'nop' and by
gently reporting any other unrecognized case to the host and treating it as
illegal instruction instead of throwing a trace and treating it as a 'nop'.
Signed-off-by: Gustavo Romero <gromero@linux.ibm.com>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Acked-By: Michael Neuling <mikey@neuling.org>
Reviewed-by: Leonardo Bras <leonardo@linux.ibm.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2020-02-21 11:29:50 -05:00
|
|
|
/* Extract PO and XOP opcode fields */
|
|
|
|
#define PO_XOP_OPCODE_MASK 0xfc0007fe
|
|
|
|
|
2008-04-16 23:28:09 -05:00
|
|
|
#endif /* __POWERPC_KVM_ASM_H__ */
|