mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 10:43:43 +00:00
KVM fixes for 3.11
On the x86 side, there are some optimizations and documentation updates. The big ARM/KVM change for 3.11, support for AArch64, will come through Catalin Marinas's tree. s390 and PPC have misc cleanups and bugfixes. There is a conflict due to "s390/pgtable: fix ipte notify bit" having entered 3.10 through Martin Schwidefsky's s390 tree. This pull request has additional changes on top, so this tree's version is the correct one. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.13 (GNU/Linux) iQIcBAABAgAGBQJR0oU6AAoJEBvWZb6bTYbynnsP/RSUrrHrA8Wu1tqVfAKu+1y5 6OIihqZ9x11/YMaNofAfv86jqxFu0/j7CzMGphNdjzujqKI+Q1tGe7oiVCmKzoG+ UvSctWsz0lpllgBtnnrm5tcfmG6rrddhLtpA7m320+xCVx8KV5P4VfyHZEU+Ho8h ziPmb2mAQ65gBNX6nLHEJ3ITTgad6gt4NNbrKIYpyXuWZQJypzaRqT/vpc4md+Ed dCebMXsL1xgyb98EcnOdrWH1wV30MfucR7IpObOhXnnMKeeltqAQPvaOlKzZh4dK +QfxJfdRZVS0cepcxzx1Q2X3dgjoKQsHq1nlIyz3qu1vhtfaqBlixLZk0SguZ/R9 1S1YqucZiLRO57RD4q0Ak5oxwobu18ZoqJZ6nledNdWwDe8bz/W2wGAeVty19ky0 qstBdM9jnwXrc0qrVgZp3+s5dsx3NAm/KKZBoq4sXiDLd/yBzdEdWIVkIrU3X9wU 3X26wOmBxtsB7so/JR7ciTsQHelmLicnVeXohAEP9CjIJffB81xVXnXs0P0SYuiQ RzbSCwjPzET4JBOaHWT0Dhv0DTS/EaI97KzlN32US3Bn3WiLlS1oDCoPFoaLqd2K LxQMsXS8anAWxFvexfSuUpbJGPnKSidSQoQmJeMGBa9QhmZCht3IL16/Fb641ToN xBohzi49L9FDbpOnTYfz =1zpG -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM fixes from Paolo Bonzini: "On the x86 side, there are some optimizations and documentation updates. The big ARM/KVM change for 3.11, support for AArch64, will come through Catalin Marinas's tree. s390 and PPC have misc cleanups and bugfixes" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (87 commits) KVM: PPC: Ignore PIR writes KVM: PPC: Book3S PR: Invalidate SLB entries properly KVM: PPC: Book3S PR: Allow guest to use 1TB segments KVM: PPC: Book3S PR: Don't keep scanning HPTEG after we find a match KVM: PPC: Book3S PR: Fix invalidation of SLB entry 0 on guest entry KVM: PPC: Book3S PR: Fix proto-VSID calculations KVM: PPC: Guard doorbell exception with CONFIG_PPC_DOORBELL KVM: Fix RTC interrupt coalescing tracking kvm: Add a tracepoint write_tsc_offset KVM: MMU: Inform users of mmio generation wraparound KVM: MMU: document fast invalidate all mmio sptes KVM: MMU: document fast invalidate all pages KVM: MMU: document fast page fault KVM: MMU: document mmio page fault KVM: MMU: document write_flooding_count KVM: MMU: document clear_spte_count KVM: MMU: drop kvm_mmu_zap_mmio_sptes KVM: MMU: init kvm generation close to mmio wrap-around value KVM: MMU: add tracepoint for check_mmio_spte KVM: MMU: fast invalidate all mmio sptes ...
This commit is contained in:
commit
fe489bf450
@ -2278,7 +2278,7 @@ return indicates the attribute is implemented. It does not necessarily
|
||||
indicate that the attribute can be read or written in the device's
|
||||
current state. "addr" is ignored.
|
||||
|
||||
4.77 KVM_ARM_VCPU_INIT
|
||||
4.82 KVM_ARM_VCPU_INIT
|
||||
|
||||
Capability: basic
|
||||
Architectures: arm, arm64
|
||||
@ -2304,7 +2304,7 @@ Possible features:
|
||||
Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
|
||||
|
||||
|
||||
4.78 KVM_GET_REG_LIST
|
||||
4.83 KVM_GET_REG_LIST
|
||||
|
||||
Capability: basic
|
||||
Architectures: arm, arm64
|
||||
@ -2324,7 +2324,7 @@ This ioctl returns the guest registers that are supported for the
|
||||
KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
|
||||
|
||||
|
||||
4.80 KVM_ARM_SET_DEVICE_ADDR
|
||||
4.84 KVM_ARM_SET_DEVICE_ADDR
|
||||
|
||||
Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
|
||||
Architectures: arm, arm64
|
||||
@ -2362,7 +2362,7 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling
|
||||
KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the
|
||||
base addresses will return -EEXIST.
|
||||
|
||||
4.82 KVM_PPC_RTAS_DEFINE_TOKEN
|
||||
4.85 KVM_PPC_RTAS_DEFINE_TOKEN
|
||||
|
||||
Capability: KVM_CAP_PPC_RTAS
|
||||
Architectures: ppc
|
||||
|
@ -191,12 +191,12 @@ Shadow pages contain the following information:
|
||||
A counter keeping track of how many hardware registers (guest cr3 or
|
||||
pdptrs) are now pointing at the page. While this counter is nonzero, the
|
||||
page cannot be destroyed. See role.invalid.
|
||||
multimapped:
|
||||
Whether there exist multiple sptes pointing at this page.
|
||||
parent_pte/parent_ptes:
|
||||
If multimapped is zero, parent_pte points at the single spte that points at
|
||||
this page's spt. Otherwise, parent_ptes points at a data structure
|
||||
with a list of parent_ptes.
|
||||
parent_ptes:
|
||||
The reverse mapping for the pte/ptes pointing at this page's spt. If
|
||||
parent_ptes bit 0 is zero, only one spte points at this pages and
|
||||
parent_ptes points at this single spte, otherwise, there exists multiple
|
||||
sptes pointing at this page and (parent_ptes & ~0x1) points at a data
|
||||
structure with a list of parent_ptes.
|
||||
unsync:
|
||||
If true, then the translations in this page may not match the guest's
|
||||
translation. This is equivalent to the state of the tlb when a pte is
|
||||
@ -210,6 +210,24 @@ Shadow pages contain the following information:
|
||||
A bitmap indicating which sptes in spt point (directly or indirectly) at
|
||||
pages that may be unsynchronized. Used to quickly locate all unsychronized
|
||||
pages reachable from a given page.
|
||||
mmu_valid_gen:
|
||||
Generation number of the page. It is compared with kvm->arch.mmu_valid_gen
|
||||
during hash table lookup, and used to skip invalidated shadow pages (see
|
||||
"Zapping all pages" below.)
|
||||
clear_spte_count:
|
||||
Only present on 32-bit hosts, where a 64-bit spte cannot be written
|
||||
atomically. The reader uses this while running out of the MMU lock
|
||||
to detect in-progress updates and retry them until the writer has
|
||||
finished the write.
|
||||
write_flooding_count:
|
||||
A guest may write to a page table many times, causing a lot of
|
||||
emulations if the page needs to be write-protected (see "Synchronized
|
||||
and unsynchronized pages" below). Leaf pages can be unsynchronized
|
||||
so that they do not trigger frequent emulation, but this is not
|
||||
possible for non-leafs. This field counts the number of emulations
|
||||
since the last time the page table was actually used; if emulation
|
||||
is triggered too frequently on this page, KVM will unmap the page
|
||||
to avoid emulation in the future.
|
||||
|
||||
Reverse map
|
||||
===========
|
||||
@ -258,14 +276,26 @@ This is the most complicated event. The cause of a page fault can be:
|
||||
|
||||
Handling a page fault is performed as follows:
|
||||
|
||||
- if the RSV bit of the error code is set, the page fault is caused by guest
|
||||
accessing MMIO and cached MMIO information is available.
|
||||
- walk shadow page table
|
||||
- check for valid generation number in the spte (see "Fast invalidation of
|
||||
MMIO sptes" below)
|
||||
- cache the information to vcpu->arch.mmio_gva, vcpu->arch.access and
|
||||
vcpu->arch.mmio_gfn, and call the emulator
|
||||
- If both P bit and R/W bit of error code are set, this could possibly
|
||||
be handled as a "fast page fault" (fixed without taking the MMU lock). See
|
||||
the description in Documentation/virtual/kvm/locking.txt.
|
||||
- if needed, walk the guest page tables to determine the guest translation
|
||||
(gva->gpa or ngpa->gpa)
|
||||
- if permissions are insufficient, reflect the fault back to the guest
|
||||
- determine the host page
|
||||
- if this is an mmio request, there is no host page; call the emulator
|
||||
to emulate the instruction instead
|
||||
- if this is an mmio request, there is no host page; cache the info to
|
||||
vcpu->arch.mmio_gva, vcpu->arch.access and vcpu->arch.mmio_gfn
|
||||
- walk the shadow page table to find the spte for the translation,
|
||||
instantiating missing intermediate page tables as necessary
|
||||
- If this is an mmio request, cache the mmio info to the spte and set some
|
||||
reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
|
||||
- try to unsynchronize the page
|
||||
- if successful, we can let the guest continue and modify the gpte
|
||||
- emulate the instruction
|
||||
@ -351,6 +381,51 @@ causes its write_count to be incremented, thus preventing instantiation of
|
||||
a large spte. The frames at the end of an unaligned memory slot have
|
||||
artificially inflated ->write_counts so they can never be instantiated.
|
||||
|
||||
Zapping all pages (page generation count)
|
||||
=========================================
|
||||
|
||||
For the large memory guests, walking and zapping all pages is really slow
|
||||
(because there are a lot of pages), and also blocks memory accesses of
|
||||
all VCPUs because it needs to hold the MMU lock.
|
||||
|
||||
To make it be more scalable, kvm maintains a global generation number
|
||||
which is stored in kvm->arch.mmu_valid_gen. Every shadow page stores
|
||||
the current global generation-number into sp->mmu_valid_gen when it
|
||||
is created. Pages with a mismatching generation number are "obsolete".
|
||||
|
||||
When KVM need zap all shadow pages sptes, it just simply increases the global
|
||||
generation-number then reload root shadow pages on all vcpus. As the VCPUs
|
||||
create new shadow page tables, the old pages are not used because of the
|
||||
mismatching generation number.
|
||||
|
||||
KVM then walks through all pages and zaps obsolete pages. While the zap
|
||||
operation needs to take the MMU lock, the lock can be released periodically
|
||||
so that the VCPUs can make progress.
|
||||
|
||||
Fast invalidation of MMIO sptes
|
||||
===============================
|
||||
|
||||
As mentioned in "Reaction to events" above, kvm will cache MMIO
|
||||
information in leaf sptes. When a new memslot is added or an existing
|
||||
memslot is changed, this information may become stale and needs to be
|
||||
invalidated. This also needs to hold the MMU lock while walking all
|
||||
shadow pages, and is made more scalable with a similar technique.
|
||||
|
||||
MMIO sptes have a few spare bits, which are used to store a
|
||||
generation number. The global generation number is stored in
|
||||
kvm_memslots(kvm)->generation, and increased whenever guest memory info
|
||||
changes. This generation number is distinct from the one described in
|
||||
the previous section.
|
||||
|
||||
When KVM finds an MMIO spte, it checks the generation number of the spte.
|
||||
If the generation number of the spte does not equal the global generation
|
||||
number, it will ignore the cached MMIO information and handle the page
|
||||
fault through the slow path.
|
||||
|
||||
Since only 19 bits are used to store generation-number on mmio spte, all
|
||||
pages are zapped when there is an overflow.
|
||||
|
||||
|
||||
Further reading
|
||||
===============
|
||||
|
||||
|
@ -4733,10 +4733,10 @@ F: arch/s390/kvm/
|
||||
F: drivers/s390/kvm/
|
||||
|
||||
KERNEL VIRTUAL MACHINE (KVM) FOR ARM
|
||||
M: Christoffer Dall <cdall@cs.columbia.edu>
|
||||
M: Christoffer Dall <christoffer.dall@linaro.org>
|
||||
L: kvmarm@lists.cs.columbia.edu
|
||||
W: http://systems.cs.columbia.edu/projects/kvm-arm
|
||||
S: Maintained
|
||||
S: Supported
|
||||
F: arch/arm/include/uapi/asm/kvm*
|
||||
F: arch/arm/include/asm/kvm*
|
||||
F: arch/arm/kvm/
|
||||
|
@ -135,7 +135,6 @@
|
||||
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL)
|
||||
#define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30))
|
||||
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
|
||||
#define S2_PGD_SIZE (1 << S2_PGD_ORDER)
|
||||
|
||||
/* Virtualization Translation Control Register (VTCR) bits */
|
||||
#define VTCR_SH0 (3 << 12)
|
||||
|
@ -37,16 +37,18 @@
|
||||
#define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */
|
||||
#define c6_DFAR 16 /* Data Fault Address Register */
|
||||
#define c6_IFAR 17 /* Instruction Fault Address Register */
|
||||
#define c9_L2CTLR 18 /* Cortex A15 L2 Control Register */
|
||||
#define c10_PRRR 19 /* Primary Region Remap Register */
|
||||
#define c10_NMRR 20 /* Normal Memory Remap Register */
|
||||
#define c12_VBAR 21 /* Vector Base Address Register */
|
||||
#define c13_CID 22 /* Context ID Register */
|
||||
#define c13_TID_URW 23 /* Thread ID, User R/W */
|
||||
#define c13_TID_URO 24 /* Thread ID, User R/O */
|
||||
#define c13_TID_PRIV 25 /* Thread ID, Privileged */
|
||||
#define c14_CNTKCTL 26 /* Timer Control Register (PL1) */
|
||||
#define NR_CP15_REGS 27 /* Number of regs (incl. invalid) */
|
||||
#define c7_PAR 18 /* Physical Address Register */
|
||||
#define c7_PAR_high 19 /* PAR top 32 bits */
|
||||
#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */
|
||||
#define c10_PRRR 21 /* Primary Region Remap Register */
|
||||
#define c10_NMRR 22 /* Normal Memory Remap Register */
|
||||
#define c12_VBAR 23 /* Vector Base Address Register */
|
||||
#define c13_CID 24 /* Context ID Register */
|
||||
#define c13_TID_URW 25 /* Thread ID, User R/W */
|
||||
#define c13_TID_URO 26 /* Thread ID, User R/O */
|
||||
#define c13_TID_PRIV 27 /* Thread ID, Privileged */
|
||||
#define c14_CNTKCTL 28 /* Timer Control Register (PL1) */
|
||||
#define NR_CP15_REGS 29 /* Number of regs (incl. invalid) */
|
||||
|
||||
#define ARM_EXCEPTION_RESET 0
|
||||
#define ARM_EXCEPTION_UNDEFINED 1
|
||||
@ -72,8 +74,6 @@ extern char __kvm_hyp_vector[];
|
||||
extern char __kvm_hyp_code_start[];
|
||||
extern char __kvm_hyp_code_end[];
|
||||
|
||||
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
|
||||
|
||||
extern void __kvm_flush_vm_context(void);
|
||||
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
|
||||
|
||||
|
@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
|
||||
return cpsr_mode > USR_MODE;;
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg)
|
||||
{
|
||||
return reg == 15;
|
||||
}
|
||||
|
||||
static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.fault.hsr;
|
||||
|
@ -23,9 +23,14 @@
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmio.h>
|
||||
#include <asm/fpstate.h>
|
||||
#include <asm/kvm_arch_timer.h>
|
||||
#include <kvm/arm_arch_timer.h>
|
||||
|
||||
#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
|
||||
#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
|
||||
#else
|
||||
#define KVM_MAX_VCPUS 0
|
||||
#endif
|
||||
|
||||
#define KVM_USER_MEM_SLOTS 32
|
||||
#define KVM_PRIVATE_MEM_SLOTS 4
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
@ -38,7 +43,7 @@
|
||||
#define KVM_NR_PAGE_SIZES 1
|
||||
#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
|
||||
|
||||
#include <asm/kvm_vgic.h>
|
||||
#include <kvm/arm_vgic.h>
|
||||
|
||||
struct kvm_vcpu;
|
||||
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
|
||||
@ -190,8 +195,8 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
|
||||
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
int exception_index);
|
||||
|
||||
static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
|
||||
unsigned long long pgd_ptr,
|
||||
static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
|
||||
phys_addr_t pgd_ptr,
|
||||
unsigned long hyp_stack_ptr,
|
||||
unsigned long vector_ptr)
|
||||
{
|
||||
|
@ -41,9 +41,9 @@ config KVM_ARM_HOST
|
||||
Provides host support for ARM processors.
|
||||
|
||||
config KVM_ARM_MAX_VCPUS
|
||||
int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
|
||||
default 4 if KVM_ARM_HOST
|
||||
default 0
|
||||
int "Number maximum supported virtual CPUs per VM"
|
||||
depends on KVM_ARM_HOST
|
||||
default 4
|
||||
help
|
||||
Static number of max supported virtual CPUs per VM.
|
||||
|
||||
@ -67,6 +67,4 @@ config KVM_ARM_TIMER
|
||||
---help---
|
||||
Adds support for the Architected Timers in virtual machines
|
||||
|
||||
source drivers/virtio/Kconfig
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
@ -14,10 +14,11 @@ CFLAGS_mmu.o := -I.
|
||||
AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
|
||||
AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
|
||||
|
||||
kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
|
||||
KVM := ../../../virt/kvm
|
||||
kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
|
||||
|
||||
obj-y += kvm-arm.o init.o interrupts.o
|
||||
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
|
||||
obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
|
||||
obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
|
||||
obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
|
||||
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
|
||||
obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
|
||||
|
@ -800,8 +800,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
|
||||
static void cpu_init_hyp_mode(void *dummy)
|
||||
{
|
||||
unsigned long long boot_pgd_ptr;
|
||||
unsigned long long pgd_ptr;
|
||||
phys_addr_t boot_pgd_ptr;
|
||||
phys_addr_t pgd_ptr;
|
||||
unsigned long hyp_stack_ptr;
|
||||
unsigned long stack_page;
|
||||
unsigned long vector_ptr;
|
||||
@ -809,8 +809,8 @@ static void cpu_init_hyp_mode(void *dummy)
|
||||
/* Switch from the HYP stub to our own HYP init vector */
|
||||
__hyp_set_vectors(kvm_get_idmap_vector());
|
||||
|
||||
boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
|
||||
pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
|
||||
boot_pgd_ptr = kvm_mmu_get_boot_httbr();
|
||||
pgd_ptr = kvm_mmu_get_httbr();
|
||||
stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
|
||||
hyp_stack_ptr = stack_page + PAGE_SIZE;
|
||||
vector_ptr = (unsigned long)__kvm_hyp_vector;
|
||||
|
@ -180,6 +180,10 @@ static const struct coproc_reg cp15_regs[] = {
|
||||
NULL, reset_unknown, c6_DFAR },
|
||||
{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
|
||||
NULL, reset_unknown, c6_IFAR },
|
||||
|
||||
/* PAR swapped by interrupt.S */
|
||||
{ CRn( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
|
||||
|
||||
/*
|
||||
* DC{C,I,CI}SW operations:
|
||||
*/
|
||||
|
@ -52,9 +52,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
|
||||
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
if (kvm_psci_call(vcpu))
|
||||
return 1;
|
||||
|
||||
kvm_inject_undefined(vcpu);
|
||||
return 1;
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ __kvm_hyp_code_start:
|
||||
ENTRY(__kvm_tlb_flush_vmid_ipa)
|
||||
push {r2, r3}
|
||||
|
||||
dsb ishst
|
||||
add r0, r0, #KVM_VTTBR
|
||||
ldrd r2, r3, [r0]
|
||||
mcrr p15, 6, r2, r3, c2 @ Write VTTBR
|
||||
@ -291,6 +292,7 @@ THUMB( orr r2, r2, #PSR_T_BIT )
|
||||
ldr r2, =BSYM(panic)
|
||||
msr ELR_hyp, r2
|
||||
ldr r0, =\panic_str
|
||||
clrex @ Clear exclusive monitor
|
||||
eret
|
||||
.endm
|
||||
|
||||
@ -414,6 +416,10 @@ guest_trap:
|
||||
mrcne p15, 4, r2, c6, c0, 4 @ HPFAR
|
||||
bne 3f
|
||||
|
||||
/* Preserve PAR */
|
||||
mrrc p15, 0, r0, r1, c7 @ PAR
|
||||
push {r0, r1}
|
||||
|
||||
/* Resolve IPA using the xFAR */
|
||||
mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR
|
||||
isb
|
||||
@ -424,13 +430,20 @@ guest_trap:
|
||||
lsl r2, r2, #4
|
||||
orr r2, r2, r1, lsl #24
|
||||
|
||||
/* Restore PAR */
|
||||
pop {r0, r1}
|
||||
mcrr p15, 0, r0, r1, c7 @ PAR
|
||||
|
||||
3: load_vcpu @ Load VCPU pointer to r0
|
||||
str r2, [r0, #VCPU_HPFAR]
|
||||
|
||||
1: mov r1, #ARM_EXCEPTION_HVC
|
||||
b __kvm_vcpu_return
|
||||
|
||||
4: pop {r0, r1, r2} @ Failed translation, return to guest
|
||||
4: pop {r0, r1} @ Failed translation, return to guest
|
||||
mcrr p15, 0, r0, r1, c7 @ PAR
|
||||
clrex
|
||||
pop {r0, r1, r2}
|
||||
eret
|
||||
|
||||
/*
|
||||
@ -456,6 +469,7 @@ switch_to_guest_vfp:
|
||||
|
||||
pop {r3-r7}
|
||||
pop {r0-r2}
|
||||
clrex
|
||||
eret
|
||||
#endif
|
||||
|
||||
|
@ -302,11 +302,14 @@ vcpu .req r0 @ vcpu pointer always in r0
|
||||
.endif
|
||||
|
||||
mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL
|
||||
mrrc p15, 0, r4, r5, c7 @ PAR
|
||||
|
||||
.if \store_to_vcpu == 0
|
||||
push {r2}
|
||||
push {r2,r4-r5}
|
||||
.else
|
||||
str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
|
||||
add r12, vcpu, #CP15_OFFSET(c7_PAR)
|
||||
strd r4, r5, [r12]
|
||||
.endif
|
||||
.endm
|
||||
|
||||
@ -319,12 +322,15 @@ vcpu .req r0 @ vcpu pointer always in r0
|
||||
*/
|
||||
.macro write_cp15_state read_from_vcpu
|
||||
.if \read_from_vcpu == 0
|
||||
pop {r2}
|
||||
pop {r2,r4-r5}
|
||||
.else
|
||||
ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
|
||||
add r12, vcpu, #CP15_OFFSET(c7_PAR)
|
||||
ldrd r4, r5, [r12]
|
||||
.endif
|
||||
|
||||
mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL
|
||||
mcrr p15, 0, r4, r5, c7 @ PAR
|
||||
|
||||
.if \read_from_vcpu == 0
|
||||
pop {r2-r12}
|
||||
|
@ -86,12 +86,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
sign_extend = kvm_vcpu_dabt_issext(vcpu);
|
||||
rt = kvm_vcpu_dabt_get_rd(vcpu);
|
||||
|
||||
if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
|
||||
/* IO memory trying to read/write pc */
|
||||
kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
|
||||
return 1;
|
||||
}
|
||||
|
||||
mmio->is_write = is_write;
|
||||
mmio->phys_addr = fault_ipa;
|
||||
mmio->len = len;
|
||||
|
@ -382,9 +382,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
|
||||
if (!pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
/* stage-2 pgd must be aligned to its size */
|
||||
VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
|
||||
|
||||
memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
|
||||
kvm_clean_pgd(pgd);
|
||||
kvm->arch.pgd = pgd;
|
||||
|
@ -75,7 +75,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
|
||||
* kvm_psci_call - handle PSCI call if r0 value is in range
|
||||
* @vcpu: Pointer to the VCPU struct
|
||||
*
|
||||
* Handle PSCI calls from guests through traps from HVC or SMC instructions.
|
||||
* Handle PSCI calls from guests through traps from HVC instructions.
|
||||
* The calling convention is similar to SMC calls to the secure world where
|
||||
* the function number is placed in r0 and this function returns true if the
|
||||
* function number specified in r0 is withing the PSCI range, and false
|
||||
|
@ -27,6 +27,8 @@
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_coproc.h>
|
||||
|
||||
#include <kvm/arm_arch_timer.h>
|
||||
|
||||
/******************************************************************************
|
||||
* Cortex-A15 Reset Values
|
||||
*/
|
||||
@ -37,6 +39,11 @@ static struct kvm_regs a15_regs_reset = {
|
||||
.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
|
||||
};
|
||||
|
||||
static const struct kvm_irq_level a15_vtimer_irq = {
|
||||
.irq = 27,
|
||||
.level = 1,
|
||||
};
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
* Exported reset function
|
||||
@ -52,6 +59,7 @@ static struct kvm_regs a15_regs_reset = {
|
||||
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_regs *cpu_reset;
|
||||
const struct kvm_irq_level *cpu_vtimer_irq;
|
||||
|
||||
switch (vcpu->arch.target) {
|
||||
case KVM_ARM_TARGET_CORTEX_A15:
|
||||
@ -59,6 +67,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
return -EINVAL;
|
||||
cpu_reset = &a15_regs_reset;
|
||||
vcpu->arch.midr = read_cpuid_id();
|
||||
cpu_vtimer_irq = &a15_vtimer_irq;
|
||||
break;
|
||||
default:
|
||||
return -ENODEV;
|
||||
@ -70,5 +79,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
/* Reset CP15 registers */
|
||||
kvm_reset_coprocs(vcpu);
|
||||
|
||||
/* Reset arch_timer context */
|
||||
kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file)
|
||||
|
||||
ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
|
||||
asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
|
||||
KVM := ../../../virt/kvm
|
||||
|
||||
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
|
||||
coalesced_mmio.o irq_comm.o)
|
||||
common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
|
||||
$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
|
||||
|
||||
ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
|
||||
common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o)
|
||||
common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
|
||||
endif
|
||||
|
||||
kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
|
||||
|
@ -107,8 +107,9 @@ struct kvmppc_vcpu_book3s {
|
||||
#define CONTEXT_GUEST 1
|
||||
#define CONTEXT_GUEST_END 2
|
||||
|
||||
#define VSID_REAL 0x1fffffffffc00000ULL
|
||||
#define VSID_BAT 0x1fffffffffb00000ULL
|
||||
#define VSID_REAL 0x0fffffffffc00000ULL
|
||||
#define VSID_BAT 0x0fffffffffb00000ULL
|
||||
#define VSID_1T 0x1000000000000000ULL
|
||||
#define VSID_REAL_DR 0x2000000000000000ULL
|
||||
#define VSID_REAL_IR 0x4000000000000000ULL
|
||||
#define VSID_PR 0x8000000000000000ULL
|
||||
@ -123,6 +124,7 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
|
||||
extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
|
||||
extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
|
||||
extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
|
||||
extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size);
|
||||
extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
|
||||
extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu, unsigned long addr,
|
||||
|
@ -5,9 +5,10 @@
|
||||
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
|
||||
|
||||
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
|
||||
KVM := ../../../virt/kvm
|
||||
|
||||
common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
|
||||
eventfd.o)
|
||||
common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
|
||||
$(KVM)/eventfd.o
|
||||
|
||||
CFLAGS_44x_tlb.o := -I.
|
||||
CFLAGS_e500_mmu.o := -I.
|
||||
@ -53,7 +54,7 @@ kvm-e500mc-objs := \
|
||||
kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
|
||||
|
||||
kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
|
||||
../../../virt/kvm/coalesced_mmio.o \
|
||||
$(KVM)/coalesced_mmio.o \
|
||||
fpu.o \
|
||||
book3s_paired_singles.o \
|
||||
book3s_pr.o \
|
||||
@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
|
||||
book3s_xics.o
|
||||
|
||||
kvm-book3s_64-module-objs := \
|
||||
../../../virt/kvm/kvm_main.o \
|
||||
../../../virt/kvm/eventfd.o \
|
||||
$(KVM)/kvm_main.o \
|
||||
$(KVM)/eventfd.o \
|
||||
powerpc.o \
|
||||
emulate.o \
|
||||
book3s.o \
|
||||
@ -111,7 +112,7 @@ kvm-book3s_32-objs := \
|
||||
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
|
||||
|
||||
kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
|
||||
kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
|
||||
kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
|
||||
|
||||
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
|
||||
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/kvm_book3s.h>
|
||||
#include <asm/mmu-hash64.h>
|
||||
|
||||
/* #define DEBUG_MMU */
|
||||
|
||||
@ -76,6 +77,24 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int kvmppc_slb_sid_shift(struct kvmppc_slb *slbe)
|
||||
{
|
||||
return slbe->tb ? SID_SHIFT_1T : SID_SHIFT;
|
||||
}
|
||||
|
||||
static u64 kvmppc_slb_offset_mask(struct kvmppc_slb *slbe)
|
||||
{
|
||||
return (1ul << kvmppc_slb_sid_shift(slbe)) - 1;
|
||||
}
|
||||
|
||||
static u64 kvmppc_slb_calc_vpn(struct kvmppc_slb *slb, gva_t eaddr)
|
||||
{
|
||||
eaddr &= kvmppc_slb_offset_mask(slb);
|
||||
|
||||
return (eaddr >> VPN_SHIFT) |
|
||||
((slb->vsid) << (kvmppc_slb_sid_shift(slb) - VPN_SHIFT));
|
||||
}
|
||||
|
||||
static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
bool data)
|
||||
{
|
||||
@ -85,11 +104,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
if (!slb)
|
||||
return 0;
|
||||
|
||||
if (slb->tb)
|
||||
return (((u64)eaddr >> 12) & 0xfffffff) |
|
||||
(((u64)slb->vsid) << 28);
|
||||
|
||||
return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16);
|
||||
return kvmppc_slb_calc_vpn(slb, eaddr);
|
||||
}
|
||||
|
||||
static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
|
||||
@ -100,7 +115,8 @@ static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
|
||||
static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
|
||||
{
|
||||
int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
|
||||
return ((eaddr & 0xfffffff) >> p);
|
||||
|
||||
return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
|
||||
}
|
||||
|
||||
static hva_t kvmppc_mmu_book3s_64_get_pteg(
|
||||
@ -109,13 +125,15 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
|
||||
bool second)
|
||||
{
|
||||
u64 hash, pteg, htabsize;
|
||||
u32 page;
|
||||
u32 ssize;
|
||||
hva_t r;
|
||||
u64 vpn;
|
||||
|
||||
page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
|
||||
htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1);
|
||||
|
||||
hash = slbe->vsid ^ page;
|
||||
vpn = kvmppc_slb_calc_vpn(slbe, eaddr);
|
||||
ssize = slbe->tb ? MMU_SEGSIZE_1T : MMU_SEGSIZE_256M;
|
||||
hash = hpt_hash(vpn, kvmppc_mmu_book3s_64_get_pagesize(slbe), ssize);
|
||||
if (second)
|
||||
hash = ~hash;
|
||||
hash &= ((1ULL << 39ULL) - 1ULL);
|
||||
@ -146,7 +164,7 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
|
||||
u64 avpn;
|
||||
|
||||
avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
|
||||
avpn |= slbe->vsid << (28 - p);
|
||||
avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
|
||||
|
||||
if (p < 24)
|
||||
avpn >>= ((80 - p) - 56) - 8;
|
||||
@ -167,7 +185,6 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
int i;
|
||||
u8 key = 0;
|
||||
bool found = false;
|
||||
bool perm_err = false;
|
||||
int second = 0;
|
||||
ulong mp_ea = vcpu->arch.magic_page_ea;
|
||||
|
||||
@ -190,13 +207,15 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
if (!slbe)
|
||||
goto no_seg_found;
|
||||
|
||||
avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
|
||||
if (slbe->tb)
|
||||
avpn |= SLB_VSID_B_1T;
|
||||
|
||||
do_second:
|
||||
ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
|
||||
if (kvm_is_error_hva(ptegp))
|
||||
goto no_page_found;
|
||||
|
||||
avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
|
||||
|
||||
if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
|
||||
printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
|
||||
goto no_page_found;
|
||||
@ -219,7 +238,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
continue;
|
||||
|
||||
/* AVPN compare */
|
||||
if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) {
|
||||
if (HPTE_V_COMPARE(avpn, v)) {
|
||||
u8 pp = (r & HPTE_R_PP) | key;
|
||||
int eaddr_mask = 0xFFF;
|
||||
|
||||
@ -248,11 +267,6 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
break;
|
||||
}
|
||||
|
||||
if (!gpte->may_read) {
|
||||
perm_err = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
|
||||
"-> 0x%lx\n",
|
||||
eaddr, avpn, gpte->vpage, gpte->raddr);
|
||||
@ -281,6 +295,8 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
if (pteg[i+1] != oldr)
|
||||
copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
|
||||
|
||||
if (!gpte->may_read)
|
||||
return -EPERM;
|
||||
return 0;
|
||||
} else {
|
||||
dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx "
|
||||
@ -296,13 +312,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
no_page_found:
|
||||
|
||||
|
||||
if (perm_err)
|
||||
return -EPERM;
|
||||
|
||||
return -ENOENT;
|
||||
|
||||
no_seg_found:
|
||||
@ -334,7 +344,7 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
|
||||
slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
|
||||
slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
|
||||
slbe->esid = slbe->tb ? esid_1t : esid;
|
||||
slbe->vsid = rs >> 12;
|
||||
slbe->vsid = (rs & ~SLB_VSID_B) >> (kvmppc_slb_sid_shift(slbe) - 16);
|
||||
slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
|
||||
slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;
|
||||
slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0;
|
||||
@ -375,6 +385,7 @@ static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
|
||||
static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
|
||||
{
|
||||
struct kvmppc_slb *slbe;
|
||||
u64 seg_size;
|
||||
|
||||
dprintk("KVM MMU: slbie(0x%llx)\n", ea);
|
||||
|
||||
@ -386,8 +397,11 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
|
||||
dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid);
|
||||
|
||||
slbe->valid = false;
|
||||
slbe->orige = 0;
|
||||
slbe->origv = 0;
|
||||
|
||||
kvmppc_mmu_map_segment(vcpu, ea);
|
||||
seg_size = 1ull << kvmppc_slb_sid_shift(slbe);
|
||||
kvmppc_mmu_flush_segment(vcpu, ea & ~(seg_size - 1), seg_size);
|
||||
}
|
||||
|
||||
static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
|
||||
@ -396,8 +410,11 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
|
||||
|
||||
dprintk("KVM MMU: slbia()\n");
|
||||
|
||||
for (i = 1; i < vcpu->arch.slb_nr; i++)
|
||||
for (i = 1; i < vcpu->arch.slb_nr; i++) {
|
||||
vcpu->arch.slb[i].valid = false;
|
||||
vcpu->arch.slb[i].orige = 0;
|
||||
vcpu->arch.slb[i].origv = 0;
|
||||
}
|
||||
|
||||
if (vcpu->arch.shared->msr & MSR_IR) {
|
||||
kvmppc_mmu_flush_segments(vcpu);
|
||||
@ -467,8 +484,14 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
|
||||
|
||||
if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
|
||||
slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
|
||||
if (slb)
|
||||
if (slb) {
|
||||
gvsid = slb->vsid;
|
||||
if (slb->tb) {
|
||||
gvsid <<= SID_SHIFT_1T - SID_SHIFT;
|
||||
gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
|
||||
gvsid |= VSID_1T;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
|
||||
|
@ -301,6 +301,23 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
|
||||
{
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
ulong seg_mask = -seg_size;
|
||||
int i;
|
||||
|
||||
for (i = 1; i < svcpu->slb_max; i++) {
|
||||
if ((svcpu->slb[i].esid & SLB_ESID_V) &&
|
||||
(svcpu->slb[i].esid & seg_mask) == ea) {
|
||||
/* Invalidate this entry */
|
||||
svcpu->slb[i].esid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
svcpu_put(svcpu);
|
||||
}
|
||||
|
||||
void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
|
||||
@ -325,9 +342,9 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
|
||||
return -1;
|
||||
vcpu3s->context_id[0] = err;
|
||||
|
||||
vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1)
|
||||
vcpu3s->proto_vsid_max = ((u64)(vcpu3s->context_id[0] + 1)
|
||||
<< ESID_BITS) - 1;
|
||||
vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << ESID_BITS;
|
||||
vcpu3s->proto_vsid_first = (u64)vcpu3s->context_id[0] << ESID_BITS;
|
||||
vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first;
|
||||
|
||||
kvmppc_mmu_hpte_init(vcpu);
|
||||
|
@ -66,10 +66,6 @@ slb_exit_skip_ ## num:
|
||||
|
||||
ld r12, PACA_SLBSHADOWPTR(r13)
|
||||
|
||||
/* Save off the first entry so we can slbie it later */
|
||||
ld r10, SHADOW_SLB_ESID(0)(r12)
|
||||
ld r11, SHADOW_SLB_VSID(0)(r12)
|
||||
|
||||
/* Remove bolted entries */
|
||||
UNBOLT_SLB_ENTRY(0)
|
||||
UNBOLT_SLB_ENTRY(1)
|
||||
@ -81,15 +77,10 @@ slb_exit_skip_ ## num:
|
||||
|
||||
/* Flush SLB */
|
||||
|
||||
li r10, 0
|
||||
slbmte r10, r10
|
||||
slbia
|
||||
|
||||
/* r0 = esid & ESID_MASK */
|
||||
rldicr r10, r10, 0, 35
|
||||
/* r0 |= CLASS_BIT(VSID) */
|
||||
rldic r12, r11, 56 - 36, 36
|
||||
or r10, r10, r12
|
||||
slbie r10
|
||||
|
||||
/* Fill SLB with our shadow */
|
||||
|
||||
lbz r12, SVCPU_SLB_MAX(r3)
|
||||
|
@ -1239,8 +1239,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
||||
#ifdef CONFIG_PPC64
|
||||
int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
|
||||
{
|
||||
/* No flags */
|
||||
info->flags = 0;
|
||||
info->flags = KVM_PPC_1T_SEGMENTS;
|
||||
|
||||
/* SLB is always 64 entries */
|
||||
info->slb_size = 64;
|
||||
|
@ -796,7 +796,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
|
||||
kvmppc_fill_pt_regs(®s);
|
||||
timer_interrupt(®s);
|
||||
break;
|
||||
#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64)
|
||||
#if defined(CONFIG_PPC_DOORBELL)
|
||||
case BOOKE_INTERRUPT_DOORBELL:
|
||||
kvmppc_fill_pt_regs(®s);
|
||||
doorbell_exception(®s);
|
||||
|
@ -169,6 +169,9 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
vcpu->arch.shared->sprg3 = spr_val;
|
||||
break;
|
||||
|
||||
/* PIR can legally be written, but we ignore it */
|
||||
case SPRN_PIR: break;
|
||||
|
||||
default:
|
||||
emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
|
||||
spr_val);
|
||||
|
@ -62,13 +62,20 @@ struct sca_block {
|
||||
#define CPUSTAT_MCDS 0x00000100
|
||||
#define CPUSTAT_SM 0x00000080
|
||||
#define CPUSTAT_G 0x00000008
|
||||
#define CPUSTAT_GED 0x00000004
|
||||
#define CPUSTAT_J 0x00000002
|
||||
#define CPUSTAT_P 0x00000001
|
||||
|
||||
struct kvm_s390_sie_block {
|
||||
atomic_t cpuflags; /* 0x0000 */
|
||||
__u32 prefix; /* 0x0004 */
|
||||
__u8 reserved8[32]; /* 0x0008 */
|
||||
__u8 reserved08[4]; /* 0x0008 */
|
||||
#define PROG_IN_SIE (1<<0)
|
||||
__u32 prog0c; /* 0x000c */
|
||||
__u8 reserved10[16]; /* 0x0010 */
|
||||
#define PROG_BLOCK_SIE 0x00000001
|
||||
atomic_t prog20; /* 0x0020 */
|
||||
__u8 reserved24[4]; /* 0x0024 */
|
||||
__u64 cputm; /* 0x0028 */
|
||||
__u64 ckc; /* 0x0030 */
|
||||
__u64 epoch; /* 0x0038 */
|
||||
@ -90,7 +97,8 @@ struct kvm_s390_sie_block {
|
||||
__u32 scaoh; /* 0x005c */
|
||||
__u8 reserved60; /* 0x0060 */
|
||||
__u8 ecb; /* 0x0061 */
|
||||
__u8 reserved62[2]; /* 0x0062 */
|
||||
__u8 ecb2; /* 0x0062 */
|
||||
__u8 reserved63[1]; /* 0x0063 */
|
||||
__u32 scaol; /* 0x0064 */
|
||||
__u8 reserved68[4]; /* 0x0068 */
|
||||
__u32 todpr; /* 0x006c */
|
||||
@ -130,6 +138,7 @@ struct kvm_vcpu_stat {
|
||||
u32 deliver_program_int;
|
||||
u32 deliver_io_int;
|
||||
u32 exit_wait_state;
|
||||
u32 instruction_pfmf;
|
||||
u32 instruction_stidp;
|
||||
u32 instruction_spx;
|
||||
u32 instruction_stpx;
|
||||
@ -166,7 +175,7 @@ struct kvm_s390_ext_info {
|
||||
};
|
||||
|
||||
#define PGM_OPERATION 0x01
|
||||
#define PGM_PRIVILEGED_OPERATION 0x02
|
||||
#define PGM_PRIVILEGED_OP 0x02
|
||||
#define PGM_EXECUTE 0x03
|
||||
#define PGM_PROTECTION 0x04
|
||||
#define PGM_ADDRESSING 0x05
|
||||
@ -219,7 +228,7 @@ struct kvm_s390_local_interrupt {
|
||||
atomic_t active;
|
||||
struct kvm_s390_float_interrupt *float_int;
|
||||
int timer_due; /* event indicator for waitqueue below */
|
||||
wait_queue_head_t wq;
|
||||
wait_queue_head_t *wq;
|
||||
atomic_t *cpuflags;
|
||||
unsigned int action_bits;
|
||||
};
|
||||
@ -266,4 +275,5 @@ struct kvm_arch{
|
||||
};
|
||||
|
||||
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
|
||||
extern char sie_exit;
|
||||
#endif
|
||||
|
@ -14,3 +14,13 @@
|
||||
/* Per-CPU flags for PMU states */
|
||||
#define PMU_F_RESERVED 0x1000
|
||||
#define PMU_F_ENABLED 0x2000
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
|
||||
/* Perf callbacks */
|
||||
struct pt_regs;
|
||||
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
|
||||
extern unsigned long perf_misc_flags(struct pt_regs *regs);
|
||||
#define perf_misc_flags(regs) perf_misc_flags(regs)
|
||||
|
||||
#endif /* CONFIG_64BIT */
|
||||
|
@ -296,18 +296,16 @@ extern unsigned long MODULES_END;
|
||||
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
|
||||
|
||||
/* Page status table bits for virtualization */
|
||||
#define RCP_ACC_BITS 0xf0000000UL
|
||||
#define RCP_FP_BIT 0x08000000UL
|
||||
#define RCP_PCL_BIT 0x00800000UL
|
||||
#define RCP_HR_BIT 0x00400000UL
|
||||
#define RCP_HC_BIT 0x00200000UL
|
||||
#define RCP_GR_BIT 0x00040000UL
|
||||
#define RCP_GC_BIT 0x00020000UL
|
||||
#define RCP_IN_BIT 0x00002000UL /* IPTE notify bit */
|
||||
|
||||
/* User dirty / referenced bit for KVM's migration feature */
|
||||
#define KVM_UR_BIT 0x00008000UL
|
||||
#define KVM_UC_BIT 0x00004000UL
|
||||
#define PGSTE_ACC_BITS 0xf0000000UL
|
||||
#define PGSTE_FP_BIT 0x08000000UL
|
||||
#define PGSTE_PCL_BIT 0x00800000UL
|
||||
#define PGSTE_HR_BIT 0x00400000UL
|
||||
#define PGSTE_HC_BIT 0x00200000UL
|
||||
#define PGSTE_GR_BIT 0x00040000UL
|
||||
#define PGSTE_GC_BIT 0x00020000UL
|
||||
#define PGSTE_UR_BIT 0x00008000UL
|
||||
#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */
|
||||
#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */
|
||||
|
||||
#else /* CONFIG_64BIT */
|
||||
|
||||
@ -364,18 +362,16 @@ extern unsigned long MODULES_END;
|
||||
| _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
|
||||
|
||||
/* Page status table bits for virtualization */
|
||||
#define RCP_ACC_BITS 0xf000000000000000UL
|
||||
#define RCP_FP_BIT 0x0800000000000000UL
|
||||
#define RCP_PCL_BIT 0x0080000000000000UL
|
||||
#define RCP_HR_BIT 0x0040000000000000UL
|
||||
#define RCP_HC_BIT 0x0020000000000000UL
|
||||
#define RCP_GR_BIT 0x0004000000000000UL
|
||||
#define RCP_GC_BIT 0x0002000000000000UL
|
||||
#define RCP_IN_BIT 0x0000200000000000UL /* IPTE notify bit */
|
||||
|
||||
/* User dirty / referenced bit for KVM's migration feature */
|
||||
#define KVM_UR_BIT 0x0000800000000000UL
|
||||
#define KVM_UC_BIT 0x0000400000000000UL
|
||||
#define PGSTE_ACC_BITS 0xf000000000000000UL
|
||||
#define PGSTE_FP_BIT 0x0800000000000000UL
|
||||
#define PGSTE_PCL_BIT 0x0080000000000000UL
|
||||
#define PGSTE_HR_BIT 0x0040000000000000UL
|
||||
#define PGSTE_HC_BIT 0x0020000000000000UL
|
||||
#define PGSTE_GR_BIT 0x0004000000000000UL
|
||||
#define PGSTE_GC_BIT 0x0002000000000000UL
|
||||
#define PGSTE_UR_BIT 0x0000800000000000UL
|
||||
#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */
|
||||
#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */
|
||||
|
||||
#endif /* CONFIG_64BIT */
|
||||
|
||||
@ -615,8 +611,8 @@ static inline pgste_t pgste_get_lock(pte_t *ptep)
|
||||
asm(
|
||||
" lg %0,%2\n"
|
||||
"0: lgr %1,%0\n"
|
||||
" nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */
|
||||
" oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */
|
||||
" nihh %0,0xff7f\n" /* clear PCL bit in old */
|
||||
" oihh %1,0x0080\n" /* set PCL bit in new */
|
||||
" csg %0,%1,%2\n"
|
||||
" jl 0b\n"
|
||||
: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
|
||||
@ -629,7 +625,7 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
asm(
|
||||
" nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */
|
||||
" nihh %1,0xff7f\n" /* clear PCL bit */
|
||||
" stg %1,%0\n"
|
||||
: "=Q" (ptep[PTRS_PER_PTE])
|
||||
: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
|
||||
@ -662,14 +658,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
|
||||
else if (bits)
|
||||
page_reset_referenced(address);
|
||||
/* Transfer page changed & referenced bit to guest bits in pgste */
|
||||
pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */
|
||||
pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
|
||||
/* Get host changed & referenced bits from pgste */
|
||||
bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52;
|
||||
bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52;
|
||||
/* Transfer page changed & referenced bit to kvm user bits */
|
||||
pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */
|
||||
pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */
|
||||
/* Clear relevant host bits in pgste. */
|
||||
pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT);
|
||||
pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT);
|
||||
pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT);
|
||||
pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
|
||||
/* Copy page access key and fetch protection bit to pgste */
|
||||
pgste_val(pgste) |=
|
||||
(unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
|
||||
@ -690,15 +686,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
|
||||
/* Get referenced bit from storage key */
|
||||
young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
|
||||
if (young)
|
||||
pgste_val(pgste) |= RCP_GR_BIT;
|
||||
pgste_val(pgste) |= PGSTE_GR_BIT;
|
||||
/* Get host referenced bit from pgste */
|
||||
if (pgste_val(pgste) & RCP_HR_BIT) {
|
||||
pgste_val(pgste) &= ~RCP_HR_BIT;
|
||||
if (pgste_val(pgste) & PGSTE_HR_BIT) {
|
||||
pgste_val(pgste) &= ~PGSTE_HR_BIT;
|
||||
young = 1;
|
||||
}
|
||||
/* Transfer referenced bit to kvm user bits and pte */
|
||||
if (young) {
|
||||
pgste_val(pgste) |= KVM_UR_BIT;
|
||||
pgste_val(pgste) |= PGSTE_UR_BIT;
|
||||
pte_val(*ptep) |= _PAGE_SWR;
|
||||
}
|
||||
#endif
|
||||
@ -720,7 +716,7 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
|
||||
* The guest C/R information is still in the PGSTE, set real
|
||||
* key C/R to 0.
|
||||
*/
|
||||
nkey = (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
|
||||
nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
|
||||
page_set_storage_key(address, nkey, 0);
|
||||
#endif
|
||||
}
|
||||
@ -750,6 +746,7 @@ struct gmap {
|
||||
struct mm_struct *mm;
|
||||
unsigned long *table;
|
||||
unsigned long asce;
|
||||
void *private;
|
||||
struct list_head crst_list;
|
||||
};
|
||||
|
||||
@ -808,8 +805,8 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
|
||||
pte_t *ptep, pgste_t pgste)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
if (pgste_val(pgste) & RCP_IN_BIT) {
|
||||
pgste_val(pgste) &= ~RCP_IN_BIT;
|
||||
if (pgste_val(pgste) & PGSTE_IN_BIT) {
|
||||
pgste_val(pgste) &= ~PGSTE_IN_BIT;
|
||||
gmap_do_ipte_notify(mm, addr, ptep);
|
||||
}
|
||||
#endif
|
||||
@ -977,8 +974,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
|
||||
if (mm_has_pgste(mm)) {
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgste = pgste_update_all(ptep, pgste);
|
||||
dirty = !!(pgste_val(pgste) & KVM_UC_BIT);
|
||||
pgste_val(pgste) &= ~KVM_UC_BIT;
|
||||
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
|
||||
pgste_val(pgste) &= ~PGSTE_UC_BIT;
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
return dirty;
|
||||
}
|
||||
@ -997,8 +994,8 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
|
||||
if (mm_has_pgste(mm)) {
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgste = pgste_update_young(ptep, pgste);
|
||||
young = !!(pgste_val(pgste) & KVM_UR_BIT);
|
||||
pgste_val(pgste) &= ~KVM_UR_BIT;
|
||||
young = !!(pgste_val(pgste) & PGSTE_UR_BIT);
|
||||
pgste_val(pgste) &= ~PGSTE_UR_BIT;
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
}
|
||||
return young;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#define ASM_OFFSETS_C
|
||||
|
||||
#include <linux/kbuild.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/cputime.h>
|
||||
#include <asm/vdso.h>
|
||||
@ -162,6 +163,8 @@ int main(void)
|
||||
DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
|
||||
DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
|
||||
DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
|
||||
DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
|
||||
DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
|
||||
#endif /* CONFIG_32BIT */
|
||||
return 0;
|
||||
}
|
||||
|
@ -47,7 +47,6 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
|
||||
_TIF_MCCK_PENDING)
|
||||
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
|
||||
_TIF_SYSCALL_TRACEPOINT)
|
||||
_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
|
||||
|
||||
#define BASED(name) name-system_call(%r13)
|
||||
|
||||
@ -81,23 +80,27 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro HANDLE_SIE_INTERCEPT scratch,pgmcheck
|
||||
.macro HANDLE_SIE_INTERCEPT scratch,reason
|
||||
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
|
||||
tmhh %r8,0x0001 # interrupting from user ?
|
||||
jnz .+42
|
||||
jnz .+62
|
||||
lgr \scratch,%r9
|
||||
slg \scratch,BASED(.Lsie_loop)
|
||||
clg \scratch,BASED(.Lsie_length)
|
||||
.if \pgmcheck
|
||||
slg \scratch,BASED(.Lsie_critical)
|
||||
clg \scratch,BASED(.Lsie_critical_length)
|
||||
.if \reason==1
|
||||
# Some program interrupts are suppressing (e.g. protection).
|
||||
# We must also check the instruction after SIE in that case.
|
||||
# do_protection_exception will rewind to rewind_pad
|
||||
jh .+22
|
||||
jh .+42
|
||||
.else
|
||||
jhe .+22
|
||||
jhe .+42
|
||||
.endif
|
||||
lg %r9,BASED(.Lsie_loop)
|
||||
LPP BASED(.Lhost_id) # set host id
|
||||
lg %r14,__SF_EMPTY(%r15) # get control block pointer
|
||||
LPP __SF_EMPTY+16(%r15) # set host id
|
||||
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
|
||||
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
|
||||
larl %r9,sie_exit # skip forward to sie_exit
|
||||
mvi __SF_EMPTY+31(%r15),\reason # set exit reason
|
||||
#endif
|
||||
.endm
|
||||
|
||||
@ -450,7 +453,7 @@ ENTRY(io_int_handler)
|
||||
lg %r12,__LC_THREAD_INFO
|
||||
larl %r13,system_call
|
||||
lmg %r8,%r9,__LC_IO_OLD_PSW
|
||||
HANDLE_SIE_INTERCEPT %r14,0
|
||||
HANDLE_SIE_INTERCEPT %r14,2
|
||||
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
|
||||
tmhh %r8,0x0001 # interrupting from user?
|
||||
jz io_skip
|
||||
@ -603,7 +606,7 @@ ENTRY(ext_int_handler)
|
||||
lg %r12,__LC_THREAD_INFO
|
||||
larl %r13,system_call
|
||||
lmg %r8,%r9,__LC_EXT_OLD_PSW
|
||||
HANDLE_SIE_INTERCEPT %r14,0
|
||||
HANDLE_SIE_INTERCEPT %r14,3
|
||||
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
|
||||
tmhh %r8,0x0001 # interrupting from user ?
|
||||
jz ext_skip
|
||||
@ -651,7 +654,7 @@ ENTRY(mcck_int_handler)
|
||||
lg %r12,__LC_THREAD_INFO
|
||||
larl %r13,system_call
|
||||
lmg %r8,%r9,__LC_MCK_OLD_PSW
|
||||
HANDLE_SIE_INTERCEPT %r14,0
|
||||
HANDLE_SIE_INTERCEPT %r14,4
|
||||
tm __LC_MCCK_CODE,0x80 # system damage?
|
||||
jo mcck_panic # yes -> rest of mcck code invalid
|
||||
lghi %r14,__LC_CPU_TIMER_SAVE_AREA
|
||||
@ -945,56 +948,50 @@ ENTRY(sie64a)
|
||||
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
|
||||
stg %r2,__SF_EMPTY(%r15) # save control block pointer
|
||||
stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
|
||||
xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0
|
||||
xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
|
||||
lmg %r0,%r13,0(%r3) # load guest gprs 0-13
|
||||
# some program checks are suppressing. C code (e.g. do_protection_exception)
|
||||
# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
|
||||
# instructions in the sie_loop should not cause program interrupts. So
|
||||
# lets use a nop (47 00 00 00) as a landing pad.
|
||||
# See also HANDLE_SIE_INTERCEPT
|
||||
rewind_pad:
|
||||
nop 0
|
||||
sie_loop:
|
||||
lg %r14,__LC_THREAD_INFO # pointer thread_info struct
|
||||
tm __TI_flags+7(%r14),_TIF_EXIT_SIE
|
||||
jnz sie_exit
|
||||
lg %r14,__LC_GMAP # get gmap pointer
|
||||
ltgr %r14,%r14
|
||||
jz sie_gmap
|
||||
lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
|
||||
sie_gmap:
|
||||
lg %r14,__SF_EMPTY(%r15) # get control block pointer
|
||||
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
|
||||
tm __SIE_PROG20+3(%r14),1 # last exit...
|
||||
jnz sie_done
|
||||
LPP __SF_EMPTY(%r15) # set guest id
|
||||
sie 0(%r14)
|
||||
sie_done:
|
||||
LPP __SF_EMPTY+16(%r15) # set host id
|
||||
lg %r14,__LC_THREAD_INFO # pointer thread_info struct
|
||||
sie_exit:
|
||||
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
|
||||
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
|
||||
# some program checks are suppressing. C code (e.g. do_protection_exception)
|
||||
# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
|
||||
# instructions beween sie64a and sie_done should not cause program
|
||||
# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
|
||||
# See also HANDLE_SIE_INTERCEPT
|
||||
rewind_pad:
|
||||
nop 0
|
||||
.globl sie_exit
|
||||
sie_exit:
|
||||
lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
|
||||
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
|
||||
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
|
||||
lghi %r2,0
|
||||
lg %r2,__SF_EMPTY+24(%r15) # return exit reason code
|
||||
br %r14
|
||||
sie_fault:
|
||||
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
|
||||
lg %r14,__LC_THREAD_INFO # pointer thread_info struct
|
||||
lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
|
||||
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
|
||||
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
|
||||
lghi %r2,-EFAULT
|
||||
br %r14
|
||||
lghi %r14,-EFAULT
|
||||
stg %r14,__SF_EMPTY+24(%r15) # set exit reason code
|
||||
j sie_exit
|
||||
|
||||
.align 8
|
||||
.Lsie_loop:
|
||||
.quad sie_loop
|
||||
.Lsie_length:
|
||||
.quad sie_done - sie_loop
|
||||
.Lhost_id:
|
||||
.quad 0
|
||||
.Lsie_critical:
|
||||
.quad sie_gmap
|
||||
.Lsie_critical_length:
|
||||
.quad sie_done - sie_gmap
|
||||
|
||||
EX_TABLE(rewind_pad,sie_fault)
|
||||
EX_TABLE(sie_loop,sie_fault)
|
||||
EX_TABLE(sie_exit,sie_fault)
|
||||
#endif
|
||||
|
||||
.section .rodata, "a"
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/export.h>
|
||||
#include <asm/irq.h>
|
||||
@ -39,6 +40,57 @@ int perf_num_counters(void)
|
||||
}
|
||||
EXPORT_SYMBOL(perf_num_counters);
|
||||
|
||||
static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
|
||||
{
|
||||
struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
|
||||
|
||||
if (!stack)
|
||||
return NULL;
|
||||
|
||||
return (struct kvm_s390_sie_block *) stack->empty1[0];
|
||||
}
|
||||
|
||||
static bool is_in_guest(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long ip = instruction_pointer(regs);
|
||||
|
||||
if (user_mode(regs))
|
||||
return false;
|
||||
|
||||
return ip == (unsigned long) &sie_exit;
|
||||
}
|
||||
|
||||
static unsigned long guest_is_user_mode(struct pt_regs *regs)
|
||||
{
|
||||
return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
|
||||
}
|
||||
|
||||
static unsigned long instruction_pointer_guest(struct pt_regs *regs)
|
||||
{
|
||||
return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
|
||||
}
|
||||
|
||||
unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
||||
{
|
||||
return is_in_guest(regs) ? instruction_pointer_guest(regs)
|
||||
: instruction_pointer(regs);
|
||||
}
|
||||
|
||||
static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
|
||||
{
|
||||
return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
|
||||
: PERF_RECORD_MISC_GUEST_KERNEL;
|
||||
}
|
||||
|
||||
unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
if (is_in_guest(regs))
|
||||
return perf_misc_guest_flags(regs);
|
||||
|
||||
return user_mode(regs) ? PERF_RECORD_MISC_USER
|
||||
: PERF_RECORD_MISC_KERNEL;
|
||||
}
|
||||
|
||||
void perf_event_print_debug(void)
|
||||
{
|
||||
struct cpumf_ctr_info cf_info;
|
||||
|
@ -7,6 +7,7 @@ EXPORT_SYMBOL(_mcount);
|
||||
#endif
|
||||
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
|
||||
EXPORT_SYMBOL(sie64a);
|
||||
EXPORT_SYMBOL(sie_exit);
|
||||
#endif
|
||||
EXPORT_SYMBOL(memcpy);
|
||||
EXPORT_SYMBOL(memset);
|
||||
|
@ -6,7 +6,8 @@
|
||||
# it under the terms of the GNU General Public License (version 2 only)
|
||||
# as published by the Free Software Foundation.
|
||||
|
||||
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
|
||||
KVM := ../../../virt/kvm
|
||||
common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
|
||||
|
||||
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
|
||||
|
||||
|
@ -132,6 +132,9 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
trace_kvm_s390_handle_diag(vcpu, code);
|
||||
switch (code) {
|
||||
case 0x10:
|
||||
|
@ -22,87 +22,6 @@
|
||||
#include "trace.h"
|
||||
#include "trace-s390.h"
|
||||
|
||||
static int handle_lctlg(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
|
||||
int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
|
||||
u64 useraddr;
|
||||
int reg, rc;
|
||||
|
||||
vcpu->stat.instruction_lctlg++;
|
||||
|
||||
useraddr = kvm_s390_get_base_disp_rsy(vcpu);
|
||||
|
||||
if (useraddr & 7)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
reg = reg1;
|
||||
|
||||
VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
|
||||
useraddr);
|
||||
trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
|
||||
|
||||
do {
|
||||
rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
|
||||
(u64 __user *) useraddr);
|
||||
if (rc)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
useraddr += 8;
|
||||
if (reg == reg3)
|
||||
break;
|
||||
reg = (reg + 1) % 16;
|
||||
} while (1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_lctl(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
|
||||
int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
|
||||
u64 useraddr;
|
||||
u32 val = 0;
|
||||
int reg, rc;
|
||||
|
||||
vcpu->stat.instruction_lctl++;
|
||||
|
||||
useraddr = kvm_s390_get_base_disp_rs(vcpu);
|
||||
|
||||
if (useraddr & 3)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
|
||||
useraddr);
|
||||
trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
|
||||
|
||||
reg = reg1;
|
||||
do {
|
||||
rc = get_guest(vcpu, val, (u32 __user *) useraddr);
|
||||
if (rc)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
|
||||
vcpu->arch.sie_block->gcr[reg] |= val;
|
||||
useraddr += 4;
|
||||
if (reg == reg3)
|
||||
break;
|
||||
reg = (reg + 1) % 16;
|
||||
} while (1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const intercept_handler_t eb_handlers[256] = {
|
||||
[0x2f] = handle_lctlg,
|
||||
[0x8a] = kvm_s390_handle_priv_eb,
|
||||
};
|
||||
|
||||
static int handle_eb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t handler;
|
||||
|
||||
handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static const intercept_handler_t instruction_handlers[256] = {
|
||||
[0x01] = kvm_s390_handle_01,
|
||||
@ -110,10 +29,10 @@ static const intercept_handler_t instruction_handlers[256] = {
|
||||
[0x83] = kvm_s390_handle_diag,
|
||||
[0xae] = kvm_s390_handle_sigp,
|
||||
[0xb2] = kvm_s390_handle_b2,
|
||||
[0xb7] = handle_lctl,
|
||||
[0xb7] = kvm_s390_handle_lctl,
|
||||
[0xb9] = kvm_s390_handle_b9,
|
||||
[0xe5] = kvm_s390_handle_e5,
|
||||
[0xeb] = handle_eb,
|
||||
[0xeb] = kvm_s390_handle_eb,
|
||||
};
|
||||
|
||||
static int handle_noop(struct kvm_vcpu *vcpu)
|
||||
@ -174,47 +93,12 @@ static int handle_stop(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int handle_validity(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long vmaddr;
|
||||
int viwhy = vcpu->arch.sie_block->ipb >> 16;
|
||||
int rc;
|
||||
|
||||
vcpu->stat.exit_validity++;
|
||||
trace_kvm_s390_intercept_validity(vcpu, viwhy);
|
||||
if (viwhy == 0x37) {
|
||||
vmaddr = gmap_fault(vcpu->arch.sie_block->prefix,
|
||||
vcpu->arch.gmap);
|
||||
if (IS_ERR_VALUE(vmaddr)) {
|
||||
rc = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
rc = fault_in_pages_writeable((char __user *) vmaddr,
|
||||
PAGE_SIZE);
|
||||
if (rc) {
|
||||
/* user will receive sigsegv, exit to user */
|
||||
rc = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE,
|
||||
vcpu->arch.gmap);
|
||||
if (IS_ERR_VALUE(vmaddr)) {
|
||||
rc = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
rc = fault_in_pages_writeable((char __user *) vmaddr,
|
||||
PAGE_SIZE);
|
||||
if (rc) {
|
||||
/* user will receive sigsegv, exit to user */
|
||||
rc = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
rc = -EOPNOTSUPP;
|
||||
|
||||
out:
|
||||
if (rc)
|
||||
VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
|
||||
viwhy);
|
||||
return rc;
|
||||
WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int handle_instruction(struct kvm_vcpu *vcpu)
|
||||
|
@ -438,7 +438,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
|
||||
no_timer:
|
||||
spin_lock(&vcpu->arch.local_int.float_int->lock);
|
||||
spin_lock_bh(&vcpu->arch.local_int.lock);
|
||||
add_wait_queue(&vcpu->arch.local_int.wq, &wait);
|
||||
add_wait_queue(&vcpu->wq, &wait);
|
||||
while (list_empty(&vcpu->arch.local_int.list) &&
|
||||
list_empty(&vcpu->arch.local_int.float_int->list) &&
|
||||
(!vcpu->arch.local_int.timer_due) &&
|
||||
@ -452,7 +452,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
__unset_cpu_idle(vcpu);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
remove_wait_queue(&vcpu->arch.local_int.wq, &wait);
|
||||
remove_wait_queue(&vcpu->wq, &wait);
|
||||
spin_unlock_bh(&vcpu->arch.local_int.lock);
|
||||
spin_unlock(&vcpu->arch.local_int.float_int->lock);
|
||||
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
|
||||
@ -465,8 +465,8 @@ void kvm_s390_tasklet(unsigned long parm)
|
||||
|
||||
spin_lock(&vcpu->arch.local_int.lock);
|
||||
vcpu->arch.local_int.timer_due = 1;
|
||||
if (waitqueue_active(&vcpu->arch.local_int.wq))
|
||||
wake_up_interruptible(&vcpu->arch.local_int.wq);
|
||||
if (waitqueue_active(&vcpu->wq))
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
spin_unlock(&vcpu->arch.local_int.lock);
|
||||
}
|
||||
|
||||
@ -613,7 +613,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
|
||||
spin_lock_bh(&li->lock);
|
||||
list_add(&inti->list, &li->list);
|
||||
atomic_set(&li->active, 1);
|
||||
BUG_ON(waitqueue_active(&li->wq));
|
||||
BUG_ON(waitqueue_active(li->wq));
|
||||
spin_unlock_bh(&li->lock);
|
||||
return 0;
|
||||
}
|
||||
@ -746,8 +746,8 @@ int kvm_s390_inject_vm(struct kvm *kvm,
|
||||
li = fi->local_int[sigcpu];
|
||||
spin_lock_bh(&li->lock);
|
||||
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
|
||||
if (waitqueue_active(&li->wq))
|
||||
wake_up_interruptible(&li->wq);
|
||||
if (waitqueue_active(li->wq))
|
||||
wake_up_interruptible(li->wq);
|
||||
spin_unlock_bh(&li->lock);
|
||||
spin_unlock(&fi->lock);
|
||||
mutex_unlock(&kvm->lock);
|
||||
@ -832,8 +832,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
|
||||
if (inti->type == KVM_S390_SIGP_STOP)
|
||||
li->action_bits |= ACTION_STOP_ON_STOP;
|
||||
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
|
||||
if (waitqueue_active(&li->wq))
|
||||
wake_up_interruptible(&vcpu->arch.local_int.wq);
|
||||
if (waitqueue_active(&vcpu->wq))
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
spin_unlock_bh(&li->lock);
|
||||
mutex_unlock(&vcpu->kvm->lock);
|
||||
return 0;
|
||||
|
@ -59,6 +59,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
|
||||
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
|
||||
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
|
||||
{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
|
||||
{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
|
||||
{ "instruction_spx", VCPU_STAT(instruction_spx) },
|
||||
{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
|
||||
@ -84,6 +85,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
};
|
||||
|
||||
static unsigned long long *facilities;
|
||||
static struct gmap_notifier gmap_notifier;
|
||||
|
||||
/* Section: not file related */
|
||||
int kvm_arch_hardware_enable(void *garbage)
|
||||
@ -96,13 +98,18 @@ void kvm_arch_hardware_disable(void *garbage)
|
||||
{
|
||||
}
|
||||
|
||||
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
|
||||
|
||||
int kvm_arch_hardware_setup(void)
|
||||
{
|
||||
gmap_notifier.notifier_call = kvm_gmap_notifier;
|
||||
gmap_register_ipte_notifier(&gmap_notifier);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_unsetup(void)
|
||||
{
|
||||
gmap_unregister_ipte_notifier(&gmap_notifier);
|
||||
}
|
||||
|
||||
void kvm_arch_check_processor_compat(void *rtn)
|
||||
@ -239,6 +246,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
kvm->arch.gmap = gmap_alloc(current->mm);
|
||||
if (!kvm->arch.gmap)
|
||||
goto out_nogmap;
|
||||
kvm->arch.gmap->private = kvm;
|
||||
}
|
||||
|
||||
kvm->arch.css_support = 0;
|
||||
@ -270,7 +278,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
||||
free_page((unsigned long)(vcpu->arch.sie_block));
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kfree(vcpu);
|
||||
kmem_cache_free(kvm_vcpu_cache, vcpu);
|
||||
}
|
||||
|
||||
static void kvm_free_vcpus(struct kvm *kvm)
|
||||
@ -309,6 +317,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.gmap = gmap_alloc(current->mm);
|
||||
if (!vcpu->arch.gmap)
|
||||
return -ENOMEM;
|
||||
vcpu->arch.gmap->private = vcpu->kvm;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -373,8 +382,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
|
||||
CPUSTAT_SM |
|
||||
CPUSTAT_STOPPED);
|
||||
CPUSTAT_STOPPED |
|
||||
CPUSTAT_GED);
|
||||
vcpu->arch.sie_block->ecb = 6;
|
||||
vcpu->arch.sie_block->ecb2 = 8;
|
||||
vcpu->arch.sie_block->eca = 0xC1002001U;
|
||||
vcpu->arch.sie_block->fac = (int) (long) facilities;
|
||||
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
|
||||
@ -397,7 +408,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
||||
|
||||
rc = -ENOMEM;
|
||||
|
||||
vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
|
||||
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
|
||||
if (!vcpu)
|
||||
goto out;
|
||||
|
||||
@ -427,7 +438,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
||||
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
|
||||
spin_lock(&kvm->arch.float_int.lock);
|
||||
kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
|
||||
init_waitqueue_head(&vcpu->arch.local_int.wq);
|
||||
vcpu->arch.local_int.wq = &vcpu->wq;
|
||||
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
|
||||
spin_unlock(&kvm->arch.float_int.lock);
|
||||
|
||||
@ -442,7 +453,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
||||
out_free_sie_block:
|
||||
free_page((unsigned long)(vcpu->arch.sie_block));
|
||||
out_free_cpu:
|
||||
kfree(vcpu);
|
||||
kmem_cache_free(kvm_vcpu_cache, vcpu);
|
||||
out:
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
@ -454,6 +465,50 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void s390_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
|
||||
}
|
||||
|
||||
void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick a guest cpu out of SIE and wait until SIE is not running.
|
||||
* If the CPU is not running (e.g. waiting as idle) the function will
|
||||
* return immediately. */
|
||||
void exit_sie(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
|
||||
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/* Kick a guest cpu out of SIE and prevent SIE-reentry */
|
||||
void exit_sie_sync(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
s390_vcpu_block(vcpu);
|
||||
exit_sie(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
|
||||
{
|
||||
int i;
|
||||
struct kvm *kvm = gmap->private;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
/* match against both prefix pages */
|
||||
if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
|
||||
VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
|
||||
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
|
||||
exit_sie_sync(vcpu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* kvm common code refers to this, but never calls it */
|
||||
@ -606,6 +661,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL; /* not implemented yet */
|
||||
}
|
||||
|
||||
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* We use MMU_RELOAD just to re-arm the ipte notifier for the
|
||||
* guest prefix page. gmap_ipte_notify will wait on the ptl lock.
|
||||
* This ensures that the ipte instruction for this request has
|
||||
* already finished. We might race against a second unmapper that
|
||||
* wants to set the blocking bit. Lets just retry the request loop.
|
||||
*/
|
||||
while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
|
||||
int rc;
|
||||
rc = gmap_ipte_notify(vcpu->arch.gmap,
|
||||
vcpu->arch.sie_block->prefix,
|
||||
PAGE_SIZE * 2);
|
||||
if (rc)
|
||||
return rc;
|
||||
s390_vcpu_unblock(vcpu);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int rc;
|
||||
@ -621,6 +697,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
if (!kvm_is_ucontrol(vcpu->kvm))
|
||||
kvm_s390_deliver_pending_interrupts(vcpu);
|
||||
|
||||
rc = kvm_s390_handle_requests(vcpu);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
vcpu->arch.sie_block->icptcode = 0;
|
||||
preempt_disable();
|
||||
kvm_guest_enter();
|
||||
@ -630,7 +710,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
trace_kvm_s390_sie_enter(vcpu,
|
||||
atomic_read(&vcpu->arch.sie_block->cpuflags));
|
||||
rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
|
||||
if (rc) {
|
||||
if (rc > 0)
|
||||
rc = 0;
|
||||
if (rc < 0) {
|
||||
if (kvm_is_ucontrol(vcpu->kvm)) {
|
||||
rc = SIE_INTERCEPT_UCONTROL;
|
||||
} else {
|
||||
@ -1046,7 +1128,7 @@ static int __init kvm_s390_init(void)
|
||||
return -ENOMEM;
|
||||
}
|
||||
memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
|
||||
facilities[0] &= 0xff00fff3f47c0000ULL;
|
||||
facilities[0] &= 0xff82fff3f47c0000ULL;
|
||||
facilities[1] &= 0x001c000000000000ULL;
|
||||
return 0;
|
||||
}
|
||||
@ -1059,3 +1141,12 @@ static void __exit kvm_s390_exit(void)
|
||||
|
||||
module_init(kvm_s390_init);
|
||||
module_exit(kvm_s390_exit);
|
||||
|
||||
/*
|
||||
* Enable autoloading of the kvm module.
|
||||
* Note that we add the module alias here instead of virt/kvm/kvm_main.c
|
||||
* since x86 takes a different approach.
|
||||
*/
|
||||
#include <linux/miscdevice.h>
|
||||
MODULE_ALIAS_MISCDEV(KVM_MINOR);
|
||||
MODULE_ALIAS("devname:kvm");
|
||||
|
@ -63,6 +63,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
|
||||
{
|
||||
vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u;
|
||||
vcpu->arch.sie_block->ihcpu = 0xffff;
|
||||
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
|
||||
}
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
|
||||
@ -85,6 +86,12 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
|
||||
*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
|
||||
}
|
||||
|
||||
static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
|
||||
{
|
||||
*r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
|
||||
*r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
|
||||
}
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
@ -125,7 +132,8 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* implemented in sigp.c */
|
||||
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
|
||||
@ -133,6 +141,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
|
||||
/* implemented in kvm-s390.c */
|
||||
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
|
||||
unsigned long addr);
|
||||
void s390_vcpu_block(struct kvm_vcpu *vcpu);
|
||||
void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
|
||||
void exit_sie(struct kvm_vcpu *vcpu);
|
||||
void exit_sie_sync(struct kvm_vcpu *vcpu);
|
||||
/* implemented in diag.c */
|
||||
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* handling privileged instructions
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
* Copyright IBM Corp. 2008, 2013
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License (version 2 only)
|
||||
@ -20,6 +20,9 @@
|
||||
#include <asm/debug.h>
|
||||
#include <asm/ebcdic.h>
|
||||
#include <asm/sysinfo.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/compat.h>
|
||||
#include "gaccess.h"
|
||||
@ -34,6 +37,9 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->stat.instruction_spx++;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
operand2 = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
/* must be word boundary */
|
||||
@ -65,6 +71,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->stat.instruction_stpx++;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
operand2 = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
/* must be word boundary */
|
||||
@ -89,6 +98,9 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->stat.instruction_stap++;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
useraddr = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
if (useraddr & 1)
|
||||
@ -105,7 +117,12 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
|
||||
static int handle_skey(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->stat.instruction_storage_key++;
|
||||
vcpu->arch.sie_block->gpsw.addr -= 4;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
vcpu->arch.sie_block->gpsw.addr =
|
||||
__rewind_psw(vcpu->arch.sie_block->gpsw, 4);
|
||||
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
|
||||
return 0;
|
||||
}
|
||||
@ -129,9 +146,10 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
|
||||
* Store the two-word I/O interruption code into the
|
||||
* provided area.
|
||||
*/
|
||||
put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr);
|
||||
put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2));
|
||||
put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4));
|
||||
if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr)
|
||||
|| put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2))
|
||||
|| put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4)))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
} else {
|
||||
/*
|
||||
* Store the three-word I/O interruption code into
|
||||
@ -182,6 +200,9 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
if (vcpu->kvm->arch.css_support) {
|
||||
/*
|
||||
* Most I/O instructions will be handled by userspace.
|
||||
@ -210,8 +231,12 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
|
||||
int rc;
|
||||
|
||||
vcpu->stat.instruction_stfl++;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
/* only pass the facility bits, which we can handle */
|
||||
facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3;
|
||||
facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3;
|
||||
|
||||
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
|
||||
&facility_list, sizeof(facility_list));
|
||||
@ -255,8 +280,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
|
||||
u64 addr;
|
||||
|
||||
if (gpsw->mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
PGM_PRIVILEGED_OPERATION);
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
addr = kvm_s390_get_base_disp_s(vcpu);
|
||||
if (addr & 7)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
@ -278,6 +303,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
|
||||
psw_t new_psw;
|
||||
u64 addr;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
addr = kvm_s390_get_base_disp_s(vcpu);
|
||||
if (addr & 7)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
@ -296,6 +324,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->stat.instruction_stidp++;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
operand2 = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
if (operand2 & 7)
|
||||
@ -351,16 +382,30 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
vcpu->stat.instruction_stsi++;
|
||||
VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
if (fc > 3) {
|
||||
vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
|
||||
|| vcpu->run->s.regs.gprs[1] & 0xffff0000)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
if (fc == 0) {
|
||||
vcpu->run->s.regs.gprs[0] = 3 << 28;
|
||||
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */
|
||||
return 0;
|
||||
}
|
||||
|
||||
operand2 = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
if (operand2 & 0xfff && fc > 0)
|
||||
if (operand2 & 0xfff)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
switch (fc) {
|
||||
case 0:
|
||||
vcpu->run->s.regs.gprs[0] = 3 << 28;
|
||||
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
|
||||
return 0;
|
||||
case 1: /* same handling for 1 and 2 */
|
||||
case 2:
|
||||
mem = get_zeroed_page(GFP_KERNEL);
|
||||
@ -377,8 +422,6 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
goto out_no_data;
|
||||
handle_stsi_3_2_2(vcpu, (void *) mem);
|
||||
break;
|
||||
default:
|
||||
goto out_no_data;
|
||||
}
|
||||
|
||||
if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
|
||||
@ -432,20 +475,14 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
|
||||
intercept_handler_t handler;
|
||||
|
||||
/*
|
||||
* a lot of B2 instructions are priviledged. We first check for
|
||||
* the privileged ones, that we can handle in the kernel. If the
|
||||
* kernel can handle this instruction, we check for the problem
|
||||
* state bit and (a) handle the instruction or (b) send a code 2
|
||||
* program check.
|
||||
* Anything else goes to userspace.*/
|
||||
* A lot of B2 instructions are priviledged. Here we check for
|
||||
* the privileged ones, that we can handle in the kernel.
|
||||
* Anything else goes to userspace.
|
||||
*/
|
||||
handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
|
||||
if (handler) {
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
PGM_PRIVILEGED_OPERATION);
|
||||
else
|
||||
return handler(vcpu);
|
||||
}
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
@ -453,8 +490,7 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int reg1, reg2;
|
||||
|
||||
reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24;
|
||||
reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
|
||||
kvm_s390_get_regs_rre(vcpu, ®1, ®2);
|
||||
|
||||
/* This basically extracts the mask half of the psw. */
|
||||
vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000;
|
||||
@ -467,9 +503,88 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define PFMF_RESERVED 0xfffc0101UL
|
||||
#define PFMF_SK 0x00020000UL
|
||||
#define PFMF_CF 0x00010000UL
|
||||
#define PFMF_UI 0x00008000UL
|
||||
#define PFMF_FSC 0x00007000UL
|
||||
#define PFMF_NQ 0x00000800UL
|
||||
#define PFMF_MR 0x00000400UL
|
||||
#define PFMF_MC 0x00000200UL
|
||||
#define PFMF_KEY 0x000000feUL
|
||||
|
||||
static int handle_pfmf(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int reg1, reg2;
|
||||
unsigned long start, end;
|
||||
|
||||
vcpu->stat.instruction_pfmf++;
|
||||
|
||||
kvm_s390_get_regs_rre(vcpu, ®1, ®2);
|
||||
|
||||
if (!MACHINE_HAS_PFMF)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
/* Only provide non-quiescing support if the host supports it */
|
||||
if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
|
||||
S390_lowcore.stfl_fac_list & 0x00020000)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
/* No support for conditional-SSKE */
|
||||
if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
|
||||
switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
|
||||
case 0x00000000:
|
||||
end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
|
||||
break;
|
||||
case 0x00001000:
|
||||
end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
|
||||
break;
|
||||
/* We dont support EDAT2
|
||||
case 0x00002000:
|
||||
end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
|
||||
break;*/
|
||||
default:
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
}
|
||||
while (start < end) {
|
||||
unsigned long useraddr;
|
||||
|
||||
useraddr = gmap_translate(start, vcpu->arch.gmap);
|
||||
if (IS_ERR((void *)useraddr))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
|
||||
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
|
||||
if (clear_user((void __user *)useraddr, PAGE_SIZE))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
}
|
||||
|
||||
if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
|
||||
if (set_guest_storage_key(current->mm, useraddr,
|
||||
vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
|
||||
vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
}
|
||||
|
||||
start += PAGE_SIZE;
|
||||
}
|
||||
if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC)
|
||||
vcpu->run->s.regs.gprs[reg2] = end;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const intercept_handler_t b9_handlers[256] = {
|
||||
[0x8d] = handle_epsw,
|
||||
[0x9c] = handle_io_inst,
|
||||
[0xaf] = handle_pfmf,
|
||||
};
|
||||
|
||||
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
|
||||
@ -478,29 +593,96 @@ int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
|
||||
|
||||
/* This is handled just as for the B2 instructions. */
|
||||
handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
|
||||
if (handler) {
|
||||
if ((handler != handle_epsw) &&
|
||||
(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE))
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
PGM_PRIVILEGED_OPERATION);
|
||||
else
|
||||
return handler(vcpu);
|
||||
}
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
|
||||
int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
|
||||
u64 useraddr;
|
||||
u32 val = 0;
|
||||
int reg, rc;
|
||||
|
||||
vcpu->stat.instruction_lctl++;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
useraddr = kvm_s390_get_base_disp_rs(vcpu);
|
||||
|
||||
if (useraddr & 3)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
|
||||
useraddr);
|
||||
trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
|
||||
|
||||
reg = reg1;
|
||||
do {
|
||||
rc = get_guest(vcpu, val, (u32 __user *) useraddr);
|
||||
if (rc)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
|
||||
vcpu->arch.sie_block->gcr[reg] |= val;
|
||||
useraddr += 4;
|
||||
if (reg == reg3)
|
||||
break;
|
||||
reg = (reg + 1) % 16;
|
||||
} while (1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_lctlg(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
|
||||
int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
|
||||
u64 useraddr;
|
||||
int reg, rc;
|
||||
|
||||
vcpu->stat.instruction_lctlg++;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
useraddr = kvm_s390_get_base_disp_rsy(vcpu);
|
||||
|
||||
if (useraddr & 7)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
reg = reg1;
|
||||
|
||||
VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
|
||||
useraddr);
|
||||
trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
|
||||
|
||||
do {
|
||||
rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
|
||||
(u64 __user *) useraddr);
|
||||
if (rc)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
useraddr += 8;
|
||||
if (reg == reg3)
|
||||
break;
|
||||
reg = (reg + 1) % 16;
|
||||
} while (1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const intercept_handler_t eb_handlers[256] = {
|
||||
[0x2f] = handle_lctlg,
|
||||
[0x8a] = handle_io_inst,
|
||||
};
|
||||
|
||||
int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu)
|
||||
int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t handler;
|
||||
|
||||
/* All eb instructions that end up here are privileged. */
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
PGM_PRIVILEGED_OPERATION);
|
||||
handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
@ -515,6 +697,9 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->stat.instruction_tprot++;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
|
||||
|
||||
/* we only handle the Linux memory detection case:
|
||||
@ -560,8 +745,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
|
||||
u32 value;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
PGM_PRIVILEGED_OPERATION);
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
|
@ -79,8 +79,8 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
|
||||
list_add_tail(&inti->list, &li->list);
|
||||
atomic_set(&li->active, 1);
|
||||
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
|
||||
if (waitqueue_active(&li->wq))
|
||||
wake_up_interruptible(&li->wq);
|
||||
if (waitqueue_active(li->wq))
|
||||
wake_up_interruptible(li->wq);
|
||||
spin_unlock_bh(&li->lock);
|
||||
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
|
||||
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
|
||||
@ -117,8 +117,8 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
|
||||
list_add_tail(&inti->list, &li->list);
|
||||
atomic_set(&li->active, 1);
|
||||
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
|
||||
if (waitqueue_active(&li->wq))
|
||||
wake_up_interruptible(&li->wq);
|
||||
if (waitqueue_active(li->wq))
|
||||
wake_up_interruptible(li->wq);
|
||||
spin_unlock_bh(&li->lock);
|
||||
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
|
||||
VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
|
||||
@ -145,8 +145,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
|
||||
atomic_set(&li->active, 1);
|
||||
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
|
||||
li->action_bits |= action;
|
||||
if (waitqueue_active(&li->wq))
|
||||
wake_up_interruptible(&li->wq);
|
||||
if (waitqueue_active(li->wq))
|
||||
wake_up_interruptible(li->wq);
|
||||
out:
|
||||
spin_unlock_bh(&li->lock);
|
||||
|
||||
@ -250,8 +250,8 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
|
||||
|
||||
list_add_tail(&inti->list, &li->list);
|
||||
atomic_set(&li->active, 1);
|
||||
if (waitqueue_active(&li->wq))
|
||||
wake_up_interruptible(&li->wq);
|
||||
if (waitqueue_active(li->wq))
|
||||
wake_up_interruptible(li->wq);
|
||||
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
|
||||
|
||||
VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
|
||||
@ -333,8 +333,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
|
||||
/* sigp in userspace can exit */
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
PGM_PRIVILEGED_OPERATION);
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
order_code = kvm_s390_get_base_disp_rs(vcpu);
|
||||
|
||||
|
@ -689,7 +689,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
|
||||
entry = *ptep;
|
||||
if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) {
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgste_val(pgste) |= RCP_IN_BIT;
|
||||
pgste_val(pgste) |= PGSTE_IN_BIT;
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
start += PAGE_SIZE;
|
||||
len -= PAGE_SIZE;
|
||||
|
@ -222,14 +222,22 @@ struct kvm_mmu_page {
|
||||
int root_count; /* Currently serving as active root */
|
||||
unsigned int unsync_children;
|
||||
unsigned long parent_ptes; /* Reverse mapping for parent_pte */
|
||||
|
||||
/* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */
|
||||
unsigned long mmu_valid_gen;
|
||||
|
||||
DECLARE_BITMAP(unsync_child_bitmap, 512);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Used out of the mmu-lock to avoid reading spte values while an
|
||||
* update is in progress; see the comments in __get_spte_lockless().
|
||||
*/
|
||||
int clear_spte_count;
|
||||
#endif
|
||||
|
||||
/* Number of writes since the last time traversal visited this page. */
|
||||
int write_flooding_count;
|
||||
bool mmio_cached;
|
||||
};
|
||||
|
||||
struct kvm_pio_request {
|
||||
@ -529,11 +537,14 @@ struct kvm_arch {
|
||||
unsigned int n_requested_mmu_pages;
|
||||
unsigned int n_max_mmu_pages;
|
||||
unsigned int indirect_shadow_pages;
|
||||
unsigned long mmu_valid_gen;
|
||||
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
|
||||
/*
|
||||
* Hash table of struct kvm_mmu_page.
|
||||
*/
|
||||
struct list_head active_mmu_pages;
|
||||
struct list_head zapped_obsolete_pages;
|
||||
|
||||
struct list_head assigned_dev_head;
|
||||
struct iommu_domain *iommu_domain;
|
||||
int iommu_flags;
|
||||
@ -769,7 +780,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask);
|
||||
void kvm_mmu_zap_all(struct kvm *kvm);
|
||||
void kvm_mmu_zap_mmio_sptes(struct kvm *kvm);
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
|
||||
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
|
||||
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
|
||||
|
||||
|
@ -5,12 +5,13 @@ CFLAGS_x86.o := -I.
|
||||
CFLAGS_svm.o := -I.
|
||||
CFLAGS_vmx.o := -I.
|
||||
|
||||
kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
|
||||
coalesced_mmio.o irq_comm.o eventfd.o \
|
||||
irqchip.o)
|
||||
kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \
|
||||
assigned-dev.o iommu.o)
|
||||
kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
|
||||
KVM := ../../../virt/kvm
|
||||
|
||||
kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \
|
||||
$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
|
||||
$(KVM)/eventfd.o $(KVM)/irqchip.o
|
||||
kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o
|
||||
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
|
||||
|
||||
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
|
||||
i8254.o cpuid.o pmu.o
|
||||
|
@ -61,6 +61,8 @@
|
||||
#define OpMem8 26ull /* 8-bit zero extended memory operand */
|
||||
#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
|
||||
#define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
|
||||
#define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
|
||||
#define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
|
||||
|
||||
#define OpBits 5 /* Width of operand field */
|
||||
#define OpMask ((1ull << OpBits) - 1)
|
||||
@ -86,6 +88,7 @@
|
||||
#define DstMem64 (OpMem64 << DstShift)
|
||||
#define DstImmUByte (OpImmUByte << DstShift)
|
||||
#define DstDX (OpDX << DstShift)
|
||||
#define DstAccLo (OpAccLo << DstShift)
|
||||
#define DstMask (OpMask << DstShift)
|
||||
/* Source operand type. */
|
||||
#define SrcShift 6
|
||||
@ -108,6 +111,7 @@
|
||||
#define SrcImm64 (OpImm64 << SrcShift)
|
||||
#define SrcDX (OpDX << SrcShift)
|
||||
#define SrcMem8 (OpMem8 << SrcShift)
|
||||
#define SrcAccHi (OpAccHi << SrcShift)
|
||||
#define SrcMask (OpMask << SrcShift)
|
||||
#define BitOp (1<<11)
|
||||
#define MemAbs (1<<12) /* Memory operand is absolute displacement */
|
||||
@ -138,6 +142,7 @@
|
||||
/* Source 2 operand type */
|
||||
#define Src2Shift (31)
|
||||
#define Src2None (OpNone << Src2Shift)
|
||||
#define Src2Mem (OpMem << Src2Shift)
|
||||
#define Src2CL (OpCL << Src2Shift)
|
||||
#define Src2ImmByte (OpImmByte << Src2Shift)
|
||||
#define Src2One (OpOne << Src2Shift)
|
||||
@ -155,6 +160,9 @@
|
||||
#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
|
||||
#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
|
||||
#define NoWrite ((u64)1 << 45) /* No writeback */
|
||||
#define SrcWrite ((u64)1 << 46) /* Write back src operand */
|
||||
|
||||
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
|
||||
|
||||
#define X2(x...) x, x
|
||||
#define X3(x...) X2(x), x
|
||||
@ -171,10 +179,11 @@
|
||||
/*
|
||||
* fastop functions have a special calling convention:
|
||||
*
|
||||
* dst: [rdx]:rax (in/out)
|
||||
* src: rbx (in/out)
|
||||
* dst: rax (in/out)
|
||||
* src: rdx (in/out)
|
||||
* src2: rcx (in)
|
||||
* flags: rflags (in/out)
|
||||
* ex: rsi (in:fastop pointer, out:zero if exception)
|
||||
*
|
||||
* Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
|
||||
* different operand sizes can be reached by calculation, rather than a jump
|
||||
@ -275,175 +284,18 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
|
||||
ctxt->regs_valid = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Instruction emulation:
|
||||
* Most instructions are emulated directly via a fragment of inline assembly
|
||||
* code. This allows us to save/restore EFLAGS and thus very easily pick up
|
||||
* any modified flags.
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_X86_64)
|
||||
#define _LO32 "k" /* force 32-bit operand */
|
||||
#define _STK "%%rsp" /* stack pointer */
|
||||
#elif defined(__i386__)
|
||||
#define _LO32 "" /* force 32-bit operand */
|
||||
#define _STK "%%esp" /* stack pointer */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* These EFLAGS bits are restored from saved value during emulation, and
|
||||
* any changes are written back to the saved value after emulation.
|
||||
*/
|
||||
#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
|
||||
|
||||
/* Before executing instruction: restore necessary bits in EFLAGS. */
|
||||
#define _PRE_EFLAGS(_sav, _msk, _tmp) \
|
||||
/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
|
||||
"movl %"_sav",%"_LO32 _tmp"; " \
|
||||
"push %"_tmp"; " \
|
||||
"push %"_tmp"; " \
|
||||
"movl %"_msk",%"_LO32 _tmp"; " \
|
||||
"andl %"_LO32 _tmp",("_STK"); " \
|
||||
"pushf; " \
|
||||
"notl %"_LO32 _tmp"; " \
|
||||
"andl %"_LO32 _tmp",("_STK"); " \
|
||||
"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
|
||||
"pop %"_tmp"; " \
|
||||
"orl %"_LO32 _tmp",("_STK"); " \
|
||||
"popf; " \
|
||||
"pop %"_sav"; "
|
||||
|
||||
/* After executing instruction: write-back necessary bits in EFLAGS. */
|
||||
#define _POST_EFLAGS(_sav, _msk, _tmp) \
|
||||
/* _sav |= EFLAGS & _msk; */ \
|
||||
"pushf; " \
|
||||
"pop %"_tmp"; " \
|
||||
"andl %"_msk",%"_LO32 _tmp"; " \
|
||||
"orl %"_LO32 _tmp",%"_sav"; "
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define ON64(x) x
|
||||
#else
|
||||
#define ON64(x)
|
||||
#endif
|
||||
|
||||
#define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype) \
|
||||
do { \
|
||||
__asm__ __volatile__ ( \
|
||||
_PRE_EFLAGS("0", "4", "2") \
|
||||
_op _suffix " %"_x"3,%1; " \
|
||||
_POST_EFLAGS("0", "4", "2") \
|
||||
: "=m" ((ctxt)->eflags), \
|
||||
"+q" (*(_dsttype*)&(ctxt)->dst.val), \
|
||||
"=&r" (_tmp) \
|
||||
: _y ((ctxt)->src.val), "i" (EFLAGS_MASK)); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Raw emulation: instruction has two explicit operands. */
|
||||
#define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy) \
|
||||
do { \
|
||||
unsigned long _tmp; \
|
||||
\
|
||||
switch ((ctxt)->dst.bytes) { \
|
||||
case 2: \
|
||||
____emulate_2op(ctxt,_op,_wx,_wy,"w",u16); \
|
||||
break; \
|
||||
case 4: \
|
||||
____emulate_2op(ctxt,_op,_lx,_ly,"l",u32); \
|
||||
break; \
|
||||
case 8: \
|
||||
ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
|
||||
do { \
|
||||
unsigned long _tmp; \
|
||||
switch ((ctxt)->dst.bytes) { \
|
||||
case 1: \
|
||||
____emulate_2op(ctxt,_op,_bx,_by,"b",u8); \
|
||||
break; \
|
||||
default: \
|
||||
__emulate_2op_nobyte(ctxt, _op, \
|
||||
_wx, _wy, _lx, _ly, _qx, _qy); \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Source operand is byte-sized and may be restricted to just %cl. */
|
||||
#define emulate_2op_SrcB(ctxt, _op) \
|
||||
__emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c")
|
||||
|
||||
/* Source operand is byte, word, long or quad sized. */
|
||||
#define emulate_2op_SrcV(ctxt, _op) \
|
||||
__emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r")
|
||||
|
||||
/* Source operand is word, long or quad sized. */
|
||||
#define emulate_2op_SrcV_nobyte(ctxt, _op) \
|
||||
__emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r")
|
||||
|
||||
/* Instruction has three operands and one operand is stored in ECX register */
|
||||
#define __emulate_2op_cl(ctxt, _op, _suffix, _type) \
|
||||
do { \
|
||||
unsigned long _tmp; \
|
||||
_type _clv = (ctxt)->src2.val; \
|
||||
_type _srcv = (ctxt)->src.val; \
|
||||
_type _dstv = (ctxt)->dst.val; \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
_PRE_EFLAGS("0", "5", "2") \
|
||||
_op _suffix " %4,%1 \n" \
|
||||
_POST_EFLAGS("0", "5", "2") \
|
||||
: "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \
|
||||
: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
|
||||
); \
|
||||
\
|
||||
(ctxt)->src2.val = (unsigned long) _clv; \
|
||||
(ctxt)->src2.val = (unsigned long) _srcv; \
|
||||
(ctxt)->dst.val = (unsigned long) _dstv; \
|
||||
} while (0)
|
||||
|
||||
#define emulate_2op_cl(ctxt, _op) \
|
||||
do { \
|
||||
switch ((ctxt)->dst.bytes) { \
|
||||
case 2: \
|
||||
__emulate_2op_cl(ctxt, _op, "w", u16); \
|
||||
break; \
|
||||
case 4: \
|
||||
__emulate_2op_cl(ctxt, _op, "l", u32); \
|
||||
break; \
|
||||
case 8: \
|
||||
ON64(__emulate_2op_cl(ctxt, _op, "q", ulong)); \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define __emulate_1op(ctxt, _op, _suffix) \
|
||||
do { \
|
||||
unsigned long _tmp; \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
_PRE_EFLAGS("0", "3", "2") \
|
||||
_op _suffix " %1; " \
|
||||
_POST_EFLAGS("0", "3", "2") \
|
||||
: "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \
|
||||
"=&r" (_tmp) \
|
||||
: "i" (EFLAGS_MASK)); \
|
||||
} while (0)
|
||||
|
||||
/* Instruction has only one explicit operand (no source operand). */
|
||||
#define emulate_1op(ctxt, _op) \
|
||||
do { \
|
||||
switch ((ctxt)->dst.bytes) { \
|
||||
case 1: __emulate_1op(ctxt, _op, "b"); break; \
|
||||
case 2: __emulate_1op(ctxt, _op, "w"); break; \
|
||||
case 4: __emulate_1op(ctxt, _op, "l"); break; \
|
||||
case 8: ON64(__emulate_1op(ctxt, _op, "q")); break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
|
||||
|
||||
#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
|
||||
@ -462,7 +314,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
|
||||
#define FOPNOP() FOP_ALIGN FOP_RET
|
||||
|
||||
#define FOP1E(op, dst) \
|
||||
FOP_ALIGN #op " %" #dst " \n\t" FOP_RET
|
||||
FOP_ALIGN "10: " #op " %" #dst " \n\t" FOP_RET
|
||||
|
||||
#define FOP1EEX(op, dst) \
|
||||
FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
|
||||
|
||||
#define FASTOP1(op) \
|
||||
FOP_START(op) \
|
||||
@ -472,24 +327,42 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
|
||||
ON64(FOP1E(op##q, rax)) \
|
||||
FOP_END
|
||||
|
||||
/* 1-operand, using src2 (for MUL/DIV r/m) */
|
||||
#define FASTOP1SRC2(op, name) \
|
||||
FOP_START(name) \
|
||||
FOP1E(op, cl) \
|
||||
FOP1E(op, cx) \
|
||||
FOP1E(op, ecx) \
|
||||
ON64(FOP1E(op, rcx)) \
|
||||
FOP_END
|
||||
|
||||
/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
|
||||
#define FASTOP1SRC2EX(op, name) \
|
||||
FOP_START(name) \
|
||||
FOP1EEX(op, cl) \
|
||||
FOP1EEX(op, cx) \
|
||||
FOP1EEX(op, ecx) \
|
||||
ON64(FOP1EEX(op, rcx)) \
|
||||
FOP_END
|
||||
|
||||
#define FOP2E(op, dst, src) \
|
||||
FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
|
||||
|
||||
#define FASTOP2(op) \
|
||||
FOP_START(op) \
|
||||
FOP2E(op##b, al, bl) \
|
||||
FOP2E(op##w, ax, bx) \
|
||||
FOP2E(op##l, eax, ebx) \
|
||||
ON64(FOP2E(op##q, rax, rbx)) \
|
||||
FOP2E(op##b, al, dl) \
|
||||
FOP2E(op##w, ax, dx) \
|
||||
FOP2E(op##l, eax, edx) \
|
||||
ON64(FOP2E(op##q, rax, rdx)) \
|
||||
FOP_END
|
||||
|
||||
/* 2 operand, word only */
|
||||
#define FASTOP2W(op) \
|
||||
FOP_START(op) \
|
||||
FOPNOP() \
|
||||
FOP2E(op##w, ax, bx) \
|
||||
FOP2E(op##l, eax, ebx) \
|
||||
ON64(FOP2E(op##q, rax, rbx)) \
|
||||
FOP2E(op##w, ax, dx) \
|
||||
FOP2E(op##l, eax, edx) \
|
||||
ON64(FOP2E(op##q, rax, rdx)) \
|
||||
FOP_END
|
||||
|
||||
/* 2 operand, src is CL */
|
||||
@ -508,14 +381,17 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
|
||||
#define FASTOP3WCL(op) \
|
||||
FOP_START(op) \
|
||||
FOPNOP() \
|
||||
FOP3E(op##w, ax, bx, cl) \
|
||||
FOP3E(op##l, eax, ebx, cl) \
|
||||
ON64(FOP3E(op##q, rax, rbx, cl)) \
|
||||
FOP3E(op##w, ax, dx, cl) \
|
||||
FOP3E(op##l, eax, edx, cl) \
|
||||
ON64(FOP3E(op##q, rax, rdx, cl)) \
|
||||
FOP_END
|
||||
|
||||
/* Special case for SETcc - 1 instruction per cc */
|
||||
#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
|
||||
|
||||
asm(".global kvm_fastop_exception \n"
|
||||
"kvm_fastop_exception: xor %esi, %esi; ret");
|
||||
|
||||
FOP_START(setcc)
|
||||
FOP_SETCC(seto)
|
||||
FOP_SETCC(setno)
|
||||
@ -538,47 +414,6 @@ FOP_END;
|
||||
FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
|
||||
FOP_END;
|
||||
|
||||
#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \
|
||||
do { \
|
||||
unsigned long _tmp; \
|
||||
ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX); \
|
||||
ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX); \
|
||||
\
|
||||
__asm__ __volatile__ ( \
|
||||
_PRE_EFLAGS("0", "5", "1") \
|
||||
"1: \n\t" \
|
||||
_op _suffix " %6; " \
|
||||
"2: \n\t" \
|
||||
_POST_EFLAGS("0", "5", "1") \
|
||||
".pushsection .fixup,\"ax\" \n\t" \
|
||||
"3: movb $1, %4 \n\t" \
|
||||
"jmp 2b \n\t" \
|
||||
".popsection \n\t" \
|
||||
_ASM_EXTABLE(1b, 3b) \
|
||||
: "=m" ((ctxt)->eflags), "=&r" (_tmp), \
|
||||
"+a" (*rax), "+d" (*rdx), "+qm"(_ex) \
|
||||
: "i" (EFLAGS_MASK), "m" ((ctxt)->src.val)); \
|
||||
} while (0)
|
||||
|
||||
/* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
|
||||
#define emulate_1op_rax_rdx(ctxt, _op, _ex) \
|
||||
do { \
|
||||
switch((ctxt)->src.bytes) { \
|
||||
case 1: \
|
||||
__emulate_1op_rax_rdx(ctxt, _op, "b", _ex); \
|
||||
break; \
|
||||
case 2: \
|
||||
__emulate_1op_rax_rdx(ctxt, _op, "w", _ex); \
|
||||
break; \
|
||||
case 4: \
|
||||
__emulate_1op_rax_rdx(ctxt, _op, "l", _ex); \
|
||||
break; \
|
||||
case 8: ON64( \
|
||||
__emulate_1op_rax_rdx(ctxt, _op, "q", _ex)); \
|
||||
break; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
|
||||
enum x86_intercept intercept,
|
||||
enum x86_intercept_stage stage)
|
||||
@ -988,6 +823,11 @@ FASTOP2(xor);
|
||||
FASTOP2(cmp);
|
||||
FASTOP2(test);
|
||||
|
||||
FASTOP1SRC2(mul, mul_ex);
|
||||
FASTOP1SRC2(imul, imul_ex);
|
||||
FASTOP1SRC2EX(div, div_ex);
|
||||
FASTOP1SRC2EX(idiv, idiv_ex);
|
||||
|
||||
FASTOP3WCL(shld);
|
||||
FASTOP3WCL(shrd);
|
||||
|
||||
@ -1013,6 +853,8 @@ FASTOP2W(bts);
|
||||
FASTOP2W(btr);
|
||||
FASTOP2W(btc);
|
||||
|
||||
FASTOP2(xadd);
|
||||
|
||||
static u8 test_cc(unsigned int condition, unsigned long flags)
|
||||
{
|
||||
u8 rc;
|
||||
@ -1726,45 +1568,42 @@ static void write_register_operand(struct operand *op)
|
||||
}
|
||||
}
|
||||
|
||||
static int writeback(struct x86_emulate_ctxt *ctxt)
|
||||
static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (ctxt->d & NoWrite)
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
switch (ctxt->dst.type) {
|
||||
switch (op->type) {
|
||||
case OP_REG:
|
||||
write_register_operand(&ctxt->dst);
|
||||
write_register_operand(op);
|
||||
break;
|
||||
case OP_MEM:
|
||||
if (ctxt->lock_prefix)
|
||||
rc = segmented_cmpxchg(ctxt,
|
||||
ctxt->dst.addr.mem,
|
||||
&ctxt->dst.orig_val,
|
||||
&ctxt->dst.val,
|
||||
ctxt->dst.bytes);
|
||||
op->addr.mem,
|
||||
&op->orig_val,
|
||||
&op->val,
|
||||
op->bytes);
|
||||
else
|
||||
rc = segmented_write(ctxt,
|
||||
ctxt->dst.addr.mem,
|
||||
&ctxt->dst.val,
|
||||
ctxt->dst.bytes);
|
||||
op->addr.mem,
|
||||
&op->val,
|
||||
op->bytes);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
break;
|
||||
case OP_MEM_STR:
|
||||
rc = segmented_write(ctxt,
|
||||
ctxt->dst.addr.mem,
|
||||
ctxt->dst.data,
|
||||
ctxt->dst.bytes * ctxt->dst.count);
|
||||
op->addr.mem,
|
||||
op->data,
|
||||
op->bytes * op->count);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
break;
|
||||
case OP_XMM:
|
||||
write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
|
||||
write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
|
||||
break;
|
||||
case OP_MM:
|
||||
write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm);
|
||||
write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
|
||||
break;
|
||||
case OP_NONE:
|
||||
/* no writeback */
|
||||
@ -2117,42 +1956,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u8 ex = 0;
|
||||
|
||||
emulate_1op_rax_rdx(ctxt, "mul", ex);
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_imul_ex(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u8 ex = 0;
|
||||
|
||||
emulate_1op_rax_rdx(ctxt, "imul", ex);
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_div_ex(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u8 de = 0;
|
||||
|
||||
emulate_1op_rax_rdx(ctxt, "div", de);
|
||||
if (de)
|
||||
return emulate_de(ctxt);
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_idiv_ex(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u8 de = 0;
|
||||
|
||||
emulate_1op_rax_rdx(ctxt, "idiv", de);
|
||||
if (de)
|
||||
return emulate_de(ctxt);
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_grp45(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
int rc = X86EMUL_CONTINUE;
|
||||
@ -3734,10 +3537,10 @@ static const struct opcode group3[] = {
|
||||
F(DstMem | SrcImm | NoWrite, em_test),
|
||||
F(DstMem | SrcNone | Lock, em_not),
|
||||
F(DstMem | SrcNone | Lock, em_neg),
|
||||
I(SrcMem, em_mul_ex),
|
||||
I(SrcMem, em_imul_ex),
|
||||
I(SrcMem, em_div_ex),
|
||||
I(SrcMem, em_idiv_ex),
|
||||
F(DstXacc | Src2Mem, em_mul_ex),
|
||||
F(DstXacc | Src2Mem, em_imul_ex),
|
||||
F(DstXacc | Src2Mem, em_div_ex),
|
||||
F(DstXacc | Src2Mem, em_idiv_ex),
|
||||
};
|
||||
|
||||
static const struct opcode group4[] = {
|
||||
@ -4064,7 +3867,7 @@ static const struct opcode twobyte_table[256] = {
|
||||
F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
|
||||
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
|
||||
/* 0xC0 - 0xC7 */
|
||||
D2bv(DstMem | SrcReg | ModRM | Lock),
|
||||
F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
|
||||
N, D(DstMem | SrcReg | ModRM | Mov),
|
||||
N, N, N, GD(0, &group9),
|
||||
/* 0xC8 - 0xCF */
|
||||
@ -4172,6 +3975,24 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
|
||||
fetch_register_operand(op);
|
||||
op->orig_val = op->val;
|
||||
break;
|
||||
case OpAccLo:
|
||||
op->type = OP_REG;
|
||||
op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
|
||||
op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
|
||||
fetch_register_operand(op);
|
||||
op->orig_val = op->val;
|
||||
break;
|
||||
case OpAccHi:
|
||||
if (ctxt->d & ByteOp) {
|
||||
op->type = OP_NONE;
|
||||
break;
|
||||
}
|
||||
op->type = OP_REG;
|
||||
op->bytes = ctxt->op_bytes;
|
||||
op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
|
||||
fetch_register_operand(op);
|
||||
op->orig_val = op->val;
|
||||
break;
|
||||
case OpDI:
|
||||
op->type = OP_MEM;
|
||||
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
|
||||
@ -4553,11 +4374,15 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
|
||||
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
|
||||
{
|
||||
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
|
||||
fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
|
||||
if (!(ctxt->d & ByteOp))
|
||||
fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
|
||||
asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
|
||||
: "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags)
|
||||
: "c"(ctxt->src2.val), [fastop]"S"(fop));
|
||||
: "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
|
||||
[fastop]"+S"(fop)
|
||||
: "c"(ctxt->src2.val));
|
||||
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
|
||||
if (!fop) /* exception is returned in fop variable */
|
||||
return emulate_de(ctxt);
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
@ -4773,9 +4598,17 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
goto done;
|
||||
|
||||
writeback:
|
||||
rc = writeback(ctxt);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
goto done;
|
||||
if (!(ctxt->d & NoWrite)) {
|
||||
rc = writeback(ctxt, &ctxt->dst);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
goto done;
|
||||
}
|
||||
if (ctxt->d & SrcWrite) {
|
||||
BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
|
||||
rc = writeback(ctxt, &ctxt->src);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* restore dst type in case the decoding will be reused
|
||||
@ -4872,12 +4705,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
|
||||
(s16) ctxt->src.val;
|
||||
break;
|
||||
case 0xc0 ... 0xc1: /* xadd */
|
||||
fastop(ctxt, em_add);
|
||||
/* Write back the register source. */
|
||||
ctxt->src.val = ctxt->dst.orig_val;
|
||||
write_register_operand(&ctxt->src);
|
||||
break;
|
||||
case 0xc3: /* movnti */
|
||||
ctxt->dst.bytes = ctxt->op_bytes;
|
||||
ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val :
|
||||
|
@ -1608,8 +1608,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
if (atomic_read(&apic->lapic_timer.pending) > 0) {
|
||||
if (kvm_apic_local_deliver(apic, APIC_LVTT))
|
||||
atomic_dec(&apic->lapic_timer.pending);
|
||||
kvm_apic_local_deliver(apic, APIC_LVTT);
|
||||
atomic_set(&apic->lapic_timer.pending, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -197,15 +197,63 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
|
||||
|
||||
static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access)
|
||||
/*
|
||||
* spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number,
|
||||
* the bits of bits 52 ~ bit 61 are used as high 10 bits of generation
|
||||
* number.
|
||||
*/
|
||||
#define MMIO_SPTE_GEN_LOW_SHIFT 3
|
||||
#define MMIO_SPTE_GEN_HIGH_SHIFT 52
|
||||
|
||||
#define MMIO_GEN_SHIFT 19
|
||||
#define MMIO_GEN_LOW_SHIFT 9
|
||||
#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1)
|
||||
#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
|
||||
#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
|
||||
|
||||
static u64 generation_mmio_spte_mask(unsigned int gen)
|
||||
{
|
||||
struct kvm_mmu_page *sp = page_header(__pa(sptep));
|
||||
u64 mask;
|
||||
|
||||
WARN_ON(gen > MMIO_MAX_GEN);
|
||||
|
||||
mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
|
||||
mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
|
||||
return mask;
|
||||
}
|
||||
|
||||
static unsigned int get_mmio_spte_generation(u64 spte)
|
||||
{
|
||||
unsigned int gen;
|
||||
|
||||
spte &= ~shadow_mmio_mask;
|
||||
|
||||
gen = (spte >> MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_GEN_LOW_MASK;
|
||||
gen |= (spte >> MMIO_SPTE_GEN_HIGH_SHIFT) << MMIO_GEN_LOW_SHIFT;
|
||||
return gen;
|
||||
}
|
||||
|
||||
static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* Init kvm generation close to MMIO_MAX_GEN to easily test the
|
||||
* code of handling generation number wrap-around.
|
||||
*/
|
||||
return (kvm_memslots(kvm)->generation +
|
||||
MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
|
||||
}
|
||||
|
||||
static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
|
||||
unsigned access)
|
||||
{
|
||||
unsigned int gen = kvm_current_mmio_generation(kvm);
|
||||
u64 mask = generation_mmio_spte_mask(gen);
|
||||
|
||||
access &= ACC_WRITE_MASK | ACC_USER_MASK;
|
||||
mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
|
||||
|
||||
sp->mmio_cached = true;
|
||||
trace_mark_mmio_spte(sptep, gfn, access);
|
||||
mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT);
|
||||
trace_mark_mmio_spte(sptep, gfn, access, gen);
|
||||
mmu_spte_set(sptep, mask);
|
||||
}
|
||||
|
||||
static bool is_mmio_spte(u64 spte)
|
||||
@ -215,24 +263,38 @@ static bool is_mmio_spte(u64 spte)
|
||||
|
||||
static gfn_t get_mmio_spte_gfn(u64 spte)
|
||||
{
|
||||
return (spte & ~shadow_mmio_mask) >> PAGE_SHIFT;
|
||||
u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
|
||||
return (spte & ~mask) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static unsigned get_mmio_spte_access(u64 spte)
|
||||
{
|
||||
return (spte & ~shadow_mmio_mask) & ~PAGE_MASK;
|
||||
u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
|
||||
return (spte & ~mask) & ~PAGE_MASK;
|
||||
}
|
||||
|
||||
static bool set_mmio_spte(u64 *sptep, gfn_t gfn, pfn_t pfn, unsigned access)
|
||||
static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
|
||||
pfn_t pfn, unsigned access)
|
||||
{
|
||||
if (unlikely(is_noslot_pfn(pfn))) {
|
||||
mark_mmio_spte(sptep, gfn, access);
|
||||
mark_mmio_spte(kvm, sptep, gfn, access);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool check_mmio_spte(struct kvm *kvm, u64 spte)
|
||||
{
|
||||
unsigned int kvm_gen, spte_gen;
|
||||
|
||||
kvm_gen = kvm_current_mmio_generation(kvm);
|
||||
spte_gen = get_mmio_spte_generation(spte);
|
||||
|
||||
trace_check_mmio_spte(spte, kvm_gen, spte_gen);
|
||||
return likely(kvm_gen == spte_gen);
|
||||
}
|
||||
|
||||
static inline u64 rsvd_bits(int s, int e)
|
||||
{
|
||||
return ((1ULL << (e - s + 1)) - 1) << s;
|
||||
@ -404,9 +466,20 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
|
||||
/*
|
||||
* The idea using the light way get the spte on x86_32 guest is from
|
||||
* gup_get_pte(arch/x86/mm/gup.c).
|
||||
* The difference is we can not catch the spte tlb flush if we leave
|
||||
* guest mode, so we emulate it by increase clear_spte_count when spte
|
||||
* is cleared.
|
||||
*
|
||||
* An spte tlb flush may be pending, because kvm_set_pte_rmapp
|
||||
* coalesces them and we are running out of the MMU lock. Therefore
|
||||
* we need to protect against in-progress updates of the spte.
|
||||
*
|
||||
* Reading the spte while an update is in progress may get the old value
|
||||
* for the high part of the spte. The race is fine for a present->non-present
|
||||
* change (because the high part of the spte is ignored for non-present spte),
|
||||
* but for a present->present change we must reread the spte.
|
||||
*
|
||||
* All such changes are done in two steps (present->non-present and
|
||||
* non-present->present), hence it is enough to count the number of
|
||||
* present->non-present updates: if it changed while reading the spte,
|
||||
* we might have hit the race. This is done using clear_spte_count.
|
||||
*/
|
||||
static u64 __get_spte_lockless(u64 *sptep)
|
||||
{
|
||||
@ -1511,6 +1584,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
|
||||
if (!direct)
|
||||
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
|
||||
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
|
||||
|
||||
/*
|
||||
* The active_mmu_pages list is the FIFO list, do not move the
|
||||
* page until it is zapped. kvm_zap_obsolete_pages depends on
|
||||
* this feature. See the comments in kvm_zap_obsolete_pages().
|
||||
*/
|
||||
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
|
||||
sp->parent_ptes = 0;
|
||||
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
|
||||
@ -1648,6 +1727,16 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
|
||||
struct list_head *invalid_list);
|
||||
|
||||
/*
|
||||
* NOTE: we should pay more attention on the zapped-obsolete page
|
||||
* (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk
|
||||
* since it has been deleted from active_mmu_pages but still can be found
|
||||
* at hast list.
|
||||
*
|
||||
* for_each_gfn_indirect_valid_sp has skipped that kind of page and
|
||||
* kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped
|
||||
* all the obsolete pages.
|
||||
*/
|
||||
#define for_each_gfn_sp(_kvm, _sp, _gfn) \
|
||||
hlist_for_each_entry(_sp, \
|
||||
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
|
||||
@ -1838,6 +1927,11 @@ static void clear_sp_write_flooding_count(u64 *spte)
|
||||
__clear_sp_write_flooding_count(sp);
|
||||
}
|
||||
|
||||
static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
|
||||
}
|
||||
|
||||
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
||||
gfn_t gfn,
|
||||
gva_t gaddr,
|
||||
@ -1864,6 +1958,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
||||
role.quadrant = quadrant;
|
||||
}
|
||||
for_each_gfn_sp(vcpu->kvm, sp, gfn) {
|
||||
if (is_obsolete_sp(vcpu->kvm, sp))
|
||||
continue;
|
||||
|
||||
if (!need_sync && sp->unsync)
|
||||
need_sync = true;
|
||||
|
||||
@ -1900,6 +1997,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
||||
|
||||
account_shadowed(vcpu->kvm, gfn);
|
||||
}
|
||||
sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
|
||||
init_shadow_page_table(sp);
|
||||
trace_kvm_mmu_get_page(sp, true);
|
||||
return sp;
|
||||
@ -2070,8 +2168,10 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
|
||||
kvm_mmu_page_unlink_children(kvm, sp);
|
||||
kvm_mmu_unlink_parents(kvm, sp);
|
||||
|
||||
if (!sp->role.invalid && !sp->role.direct)
|
||||
unaccount_shadowed(kvm, sp->gfn);
|
||||
|
||||
if (sp->unsync)
|
||||
kvm_unlink_unsync_page(kvm, sp);
|
||||
if (!sp->root_count) {
|
||||
@ -2081,7 +2181,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
kvm_mod_used_mmu_pages(kvm, -1);
|
||||
} else {
|
||||
list_move(&sp->link, &kvm->arch.active_mmu_pages);
|
||||
kvm_reload_remote_mmus(kvm);
|
||||
|
||||
/*
|
||||
* The obsolete pages can not be used on any vcpus.
|
||||
* See the comments in kvm_mmu_invalidate_zap_all_pages().
|
||||
*/
|
||||
if (!sp->role.invalid && !is_obsolete_sp(kvm, sp))
|
||||
kvm_reload_remote_mmus(kvm);
|
||||
}
|
||||
|
||||
sp->role.invalid = 1;
|
||||
@ -2331,7 +2437,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
u64 spte;
|
||||
int ret = 0;
|
||||
|
||||
if (set_mmio_spte(sptep, gfn, pfn, pte_access))
|
||||
if (set_mmio_spte(vcpu->kvm, sptep, gfn, pfn, pte_access))
|
||||
return 0;
|
||||
|
||||
spte = PT_PRESENT_MASK;
|
||||
@ -2869,22 +2975,25 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
|
||||
return;
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL &&
|
||||
(vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL ||
|
||||
vcpu->arch.mmu.direct_map)) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
sp = page_header(root);
|
||||
--sp->root_count;
|
||||
if (!sp->root_count && sp->role.invalid) {
|
||||
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
|
||||
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
|
||||
}
|
||||
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
for (i = 0; i < 4; ++i) {
|
||||
hpa_t root = vcpu->arch.mmu.pae_root[i];
|
||||
|
||||
@ -3148,17 +3257,12 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
|
||||
return spte;
|
||||
}
|
||||
|
||||
/*
|
||||
* If it is a real mmio page fault, return 1 and emulat the instruction
|
||||
* directly, return 0 to let CPU fault again on the address, -1 is
|
||||
* returned if bug is detected.
|
||||
*/
|
||||
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
||||
{
|
||||
u64 spte;
|
||||
|
||||
if (quickly_check_mmio_pf(vcpu, addr, direct))
|
||||
return 1;
|
||||
return RET_MMIO_PF_EMULATE;
|
||||
|
||||
spte = walk_shadow_page_get_mmio_spte(vcpu, addr);
|
||||
|
||||
@ -3166,12 +3270,15 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
||||
gfn_t gfn = get_mmio_spte_gfn(spte);
|
||||
unsigned access = get_mmio_spte_access(spte);
|
||||
|
||||
if (!check_mmio_spte(vcpu->kvm, spte))
|
||||
return RET_MMIO_PF_INVALID;
|
||||
|
||||
if (direct)
|
||||
addr = 0;
|
||||
|
||||
trace_handle_mmio_page_fault(addr, gfn, access);
|
||||
vcpu_cache_mmio_info(vcpu, addr, gfn, access);
|
||||
return 1;
|
||||
return RET_MMIO_PF_EMULATE;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3179,13 +3286,13 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
||||
* it's a BUG if the gfn is not a mmio page.
|
||||
*/
|
||||
if (direct && !check_direct_spte_mmio_pf(spte))
|
||||
return -1;
|
||||
return RET_MMIO_PF_BUG;
|
||||
|
||||
/*
|
||||
* If the page table is zapped by other cpus, let CPU fault again on
|
||||
* the address.
|
||||
*/
|
||||
return 0;
|
||||
return RET_MMIO_PF_RETRY;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common);
|
||||
|
||||
@ -3195,7 +3302,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr,
|
||||
int ret;
|
||||
|
||||
ret = handle_mmio_page_fault_common(vcpu, addr, direct);
|
||||
WARN_ON(ret < 0);
|
||||
WARN_ON(ret == RET_MMIO_PF_BUG);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3207,8 +3314,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
|
||||
|
||||
pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
|
||||
|
||||
if (unlikely(error_code & PFERR_RSVD_MASK))
|
||||
return handle_mmio_page_fault(vcpu, gva, error_code, true);
|
||||
if (unlikely(error_code & PFERR_RSVD_MASK)) {
|
||||
r = handle_mmio_page_fault(vcpu, gva, error_code, true);
|
||||
|
||||
if (likely(r != RET_MMIO_PF_INVALID))
|
||||
return r;
|
||||
}
|
||||
|
||||
r = mmu_topup_memory_caches(vcpu);
|
||||
if (r)
|
||||
@ -3284,8 +3395,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
||||
ASSERT(vcpu);
|
||||
ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||
|
||||
if (unlikely(error_code & PFERR_RSVD_MASK))
|
||||
return handle_mmio_page_fault(vcpu, gpa, error_code, true);
|
||||
if (unlikely(error_code & PFERR_RSVD_MASK)) {
|
||||
r = handle_mmio_page_fault(vcpu, gpa, error_code, true);
|
||||
|
||||
if (likely(r != RET_MMIO_PF_INVALID))
|
||||
return r;
|
||||
}
|
||||
|
||||
r = mmu_topup_memory_caches(vcpu);
|
||||
if (r)
|
||||
@ -3391,8 +3506,8 @@ static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
|
||||
*access &= mask;
|
||||
}
|
||||
|
||||
static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
|
||||
int *nr_present)
|
||||
static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
|
||||
unsigned access, int *nr_present)
|
||||
{
|
||||
if (unlikely(is_mmio_spte(*sptep))) {
|
||||
if (gfn != get_mmio_spte_gfn(*sptep)) {
|
||||
@ -3401,7 +3516,7 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
|
||||
}
|
||||
|
||||
(*nr_present)++;
|
||||
mark_mmio_spte(sptep, gfn, access);
|
||||
mark_mmio_spte(kvm, sptep, gfn, access);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -3764,9 +3879,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
|
||||
if (r)
|
||||
goto out;
|
||||
r = mmu_alloc_roots(vcpu);
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
mmu_sync_roots(vcpu);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_mmu_sync_roots(vcpu);
|
||||
if (r)
|
||||
goto out;
|
||||
/* set_cr3() should ensure TLB has been flushed */
|
||||
@ -4179,39 +4292,107 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
#define BATCH_ZAP_PAGES 10
|
||||
static void kvm_zap_obsolete_pages(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_mmu_page *sp, *node;
|
||||
LIST_HEAD(invalid_list);
|
||||
int batch = 0;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
restart:
|
||||
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
|
||||
if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
|
||||
goto restart;
|
||||
list_for_each_entry_safe_reverse(sp, node,
|
||||
&kvm->arch.active_mmu_pages, link) {
|
||||
int ret;
|
||||
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
/*
|
||||
* No obsolete page exists before new created page since
|
||||
* active_mmu_pages is the FIFO list.
|
||||
*/
|
||||
if (!is_obsolete_sp(kvm, sp))
|
||||
break;
|
||||
|
||||
void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_mmu_page *sp, *node;
|
||||
LIST_HEAD(invalid_list);
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
restart:
|
||||
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
|
||||
if (!sp->mmio_cached)
|
||||
/*
|
||||
* Since we are reversely walking the list and the invalid
|
||||
* list will be moved to the head, skip the invalid page
|
||||
* can help us to avoid the infinity list walking.
|
||||
*/
|
||||
if (sp->role.invalid)
|
||||
continue;
|
||||
if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
|
||||
|
||||
/*
|
||||
* Need not flush tlb since we only zap the sp with invalid
|
||||
* generation number.
|
||||
*/
|
||||
if (batch >= BATCH_ZAP_PAGES &&
|
||||
cond_resched_lock(&kvm->mmu_lock)) {
|
||||
batch = 0;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
ret = kvm_mmu_prepare_zap_page(kvm, sp,
|
||||
&kvm->arch.zapped_obsolete_pages);
|
||||
batch += ret;
|
||||
|
||||
if (ret)
|
||||
goto restart;
|
||||
}
|
||||
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
/*
|
||||
* Should flush tlb before free page tables since lockless-walking
|
||||
* may use the pages.
|
||||
*/
|
||||
kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fast invalidate all shadow pages and use lock-break technique
|
||||
* to zap obsolete pages.
|
||||
*
|
||||
* It's required when memslot is being deleted or VM is being
|
||||
* destroyed, in these cases, we should ensure that KVM MMU does
|
||||
* not use any resource of the being-deleted slot or all slots
|
||||
* after calling the function.
|
||||
*/
|
||||
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
|
||||
{
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
trace_kvm_mmu_invalidate_zap_all_pages(kvm);
|
||||
kvm->arch.mmu_valid_gen++;
|
||||
|
||||
/*
|
||||
* Notify all vcpus to reload its shadow page table
|
||||
* and flush TLB. Then all vcpus will switch to new
|
||||
* shadow page table with the new mmu_valid_gen.
|
||||
*
|
||||
* Note: we should do this under the protection of
|
||||
* mmu-lock, otherwise, vcpu would purge shadow page
|
||||
* but miss tlb flush.
|
||||
*/
|
||||
kvm_reload_remote_mmus(kvm);
|
||||
|
||||
kvm_zap_obsolete_pages(kvm);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
|
||||
{
|
||||
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* The very rare case: if the generation-number is round,
|
||||
* zap all shadow pages.
|
||||
*
|
||||
* The max value is MMIO_MAX_GEN - 1 since it is not called
|
||||
* when mark memslot invalid.
|
||||
*/
|
||||
if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) {
|
||||
printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
|
||||
kvm_mmu_invalidate_zap_all_pages(kvm);
|
||||
}
|
||||
}
|
||||
|
||||
static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
struct kvm *kvm;
|
||||
@ -4240,15 +4421,23 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
|
||||
* want to shrink a VM that only started to populate its MMU
|
||||
* anyway.
|
||||
*/
|
||||
if (!kvm->arch.n_used_mmu_pages)
|
||||
if (!kvm->arch.n_used_mmu_pages &&
|
||||
!kvm_has_zapped_obsolete_pages(kvm))
|
||||
continue;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
if (kvm_has_zapped_obsolete_pages(kvm)) {
|
||||
kvm_mmu_commit_zap_page(kvm,
|
||||
&kvm->arch.zapped_obsolete_pages);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
prepare_zap_oldest_mmu_page(kvm, &invalid_list);
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
|
||||
unlock:
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
|
@ -52,6 +52,23 @@
|
||||
|
||||
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
|
||||
|
||||
/*
|
||||
* Return values of handle_mmio_page_fault_common:
|
||||
* RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
|
||||
* directly.
|
||||
* RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
|
||||
* fault path update the mmio spte.
|
||||
* RET_MMIO_PF_RETRY: let CPU fault again on the address.
|
||||
* RET_MMIO_PF_BUG: bug is detected.
|
||||
*/
|
||||
enum {
|
||||
RET_MMIO_PF_EMULATE = 1,
|
||||
RET_MMIO_PF_INVALID = 2,
|
||||
RET_MMIO_PF_RETRY = 0,
|
||||
RET_MMIO_PF_BUG = -1
|
||||
};
|
||||
|
||||
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
|
||||
int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
|
||||
|
||||
@ -97,4 +114,5 @@ static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
|
||||
return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
|
||||
#endif
|
||||
|
@ -7,16 +7,18 @@
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvmmmu
|
||||
|
||||
#define KVM_MMU_PAGE_FIELDS \
|
||||
__field(__u64, gfn) \
|
||||
__field(__u32, role) \
|
||||
__field(__u32, root_count) \
|
||||
#define KVM_MMU_PAGE_FIELDS \
|
||||
__field(unsigned long, mmu_valid_gen) \
|
||||
__field(__u64, gfn) \
|
||||
__field(__u32, role) \
|
||||
__field(__u32, root_count) \
|
||||
__field(bool, unsync)
|
||||
|
||||
#define KVM_MMU_PAGE_ASSIGN(sp) \
|
||||
__entry->gfn = sp->gfn; \
|
||||
__entry->role = sp->role.word; \
|
||||
__entry->root_count = sp->root_count; \
|
||||
#define KVM_MMU_PAGE_ASSIGN(sp) \
|
||||
__entry->mmu_valid_gen = sp->mmu_valid_gen; \
|
||||
__entry->gfn = sp->gfn; \
|
||||
__entry->role = sp->role.word; \
|
||||
__entry->root_count = sp->root_count; \
|
||||
__entry->unsync = sp->unsync;
|
||||
|
||||
#define KVM_MMU_PAGE_PRINTK() ({ \
|
||||
@ -28,8 +30,8 @@
|
||||
\
|
||||
role.word = __entry->role; \
|
||||
\
|
||||
trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \
|
||||
" %snxe root %u %s%c", \
|
||||
trace_seq_printf(p, "sp gen %lx gfn %llx %u%s q%u%s %s%s" \
|
||||
" %snxe root %u %s%c", __entry->mmu_valid_gen, \
|
||||
__entry->gfn, role.level, \
|
||||
role.cr4_pae ? " pae" : "", \
|
||||
role.quadrant, \
|
||||
@ -197,23 +199,25 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
|
||||
|
||||
TRACE_EVENT(
|
||||
mark_mmio_spte,
|
||||
TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access),
|
||||
TP_ARGS(sptep, gfn, access),
|
||||
TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access, unsigned int gen),
|
||||
TP_ARGS(sptep, gfn, access, gen),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(void *, sptep)
|
||||
__field(gfn_t, gfn)
|
||||
__field(unsigned, access)
|
||||
__field(unsigned int, gen)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->sptep = sptep;
|
||||
__entry->gfn = gfn;
|
||||
__entry->access = access;
|
||||
__entry->gen = gen;
|
||||
),
|
||||
|
||||
TP_printk("sptep:%p gfn %llx access %x", __entry->sptep, __entry->gfn,
|
||||
__entry->access)
|
||||
TP_printk("sptep:%p gfn %llx access %x gen %x", __entry->sptep,
|
||||
__entry->gfn, __entry->access, __entry->gen)
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
@ -274,6 +278,50 @@ TRACE_EVENT(
|
||||
__spte_satisfied(old_spte), __spte_satisfied(new_spte)
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_invalidate_zap_all_pages,
|
||||
TP_PROTO(struct kvm *kvm),
|
||||
TP_ARGS(kvm),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, mmu_valid_gen)
|
||||
__field(unsigned int, mmu_used_pages)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
|
||||
__entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
|
||||
),
|
||||
|
||||
TP_printk("kvm-mmu-valid-gen %lx used_pages %x",
|
||||
__entry->mmu_valid_gen, __entry->mmu_used_pages
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
TRACE_EVENT(
|
||||
check_mmio_spte,
|
||||
TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
|
||||
TP_ARGS(spte, kvm_gen, spte_gen),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, kvm_gen)
|
||||
__field(unsigned int, spte_gen)
|
||||
__field(u64, spte)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->kvm_gen = kvm_gen;
|
||||
__entry->spte_gen = spte_gen;
|
||||
__entry->spte = spte;
|
||||
),
|
||||
|
||||
TP_printk("spte %llx kvm_gen %x spte-gen %x valid %d", __entry->spte,
|
||||
__entry->kvm_gen, __entry->spte_gen,
|
||||
__entry->kvm_gen == __entry->spte_gen
|
||||
)
|
||||
);
|
||||
#endif /* _TRACE_KVMMMU_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
@ -552,9 +552,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
|
||||
|
||||
if (unlikely(error_code & PFERR_RSVD_MASK))
|
||||
return handle_mmio_page_fault(vcpu, addr, error_code,
|
||||
if (unlikely(error_code & PFERR_RSVD_MASK)) {
|
||||
r = handle_mmio_page_fault(vcpu, addr, error_code,
|
||||
mmu_is_nested(vcpu));
|
||||
if (likely(r != RET_MMIO_PF_INVALID))
|
||||
return r;
|
||||
};
|
||||
|
||||
r = mmu_topup_memory_caches(vcpu);
|
||||
if (r)
|
||||
@ -792,7 +795,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
pte_access &= gpte_access(vcpu, gpte);
|
||||
protect_clean_gpte(&pte_access, gpte);
|
||||
|
||||
if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
|
||||
if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access,
|
||||
&nr_present))
|
||||
continue;
|
||||
|
||||
if (gfn != sp->gfns[i]) {
|
||||
|
@ -1026,7 +1026,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
g_tsc_offset = svm->vmcb->control.tsc_offset -
|
||||
svm->nested.hsave->control.tsc_offset;
|
||||
svm->nested.hsave->control.tsc_offset = offset;
|
||||
}
|
||||
} else
|
||||
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
|
||||
svm->vmcb->control.tsc_offset,
|
||||
offset);
|
||||
|
||||
svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
|
||||
|
||||
@ -1044,6 +1047,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
|
||||
svm->vmcb->control.tsc_offset += adjustment;
|
||||
if (is_guest_mode(vcpu))
|
||||
svm->nested.hsave->control.tsc_offset += adjustment;
|
||||
else
|
||||
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
|
||||
svm->vmcb->control.tsc_offset - adjustment,
|
||||
svm->vmcb->control.tsc_offset);
|
||||
|
||||
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
|
||||
}
|
||||
|
||||
|
@ -756,6 +756,27 @@ TRACE_EVENT(
|
||||
__entry->gpa_match ? "GPA" : "GVA")
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_write_tsc_offset,
|
||||
TP_PROTO(unsigned int vcpu_id, __u64 previous_tsc_offset,
|
||||
__u64 next_tsc_offset),
|
||||
TP_ARGS(vcpu_id, previous_tsc_offset, next_tsc_offset),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, vcpu_id )
|
||||
__field( __u64, previous_tsc_offset )
|
||||
__field( __u64, next_tsc_offset )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->previous_tsc_offset = previous_tsc_offset;
|
||||
__entry->next_tsc_offset = next_tsc_offset;
|
||||
),
|
||||
|
||||
TP_printk("vcpu=%u prev=%llu next=%llu", __entry->vcpu_id,
|
||||
__entry->previous_tsc_offset, __entry->next_tsc_offset)
|
||||
);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define host_clocks \
|
||||
|
@ -2096,6 +2096,8 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
(nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
|
||||
vmcs12->tsc_offset : 0));
|
||||
} else {
|
||||
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
|
||||
vmcs_read64(TSC_OFFSET), offset);
|
||||
vmcs_write64(TSC_OFFSET, offset);
|
||||
}
|
||||
}
|
||||
@ -2103,11 +2105,14 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
|
||||
{
|
||||
u64 offset = vmcs_read64(TSC_OFFSET);
|
||||
|
||||
vmcs_write64(TSC_OFFSET, offset + adjustment);
|
||||
if (is_guest_mode(vcpu)) {
|
||||
/* Even when running L2, the adjustment needs to apply to L1 */
|
||||
to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
|
||||
}
|
||||
} else
|
||||
trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset,
|
||||
offset + adjustment);
|
||||
}
|
||||
|
||||
static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
|
||||
@ -4176,10 +4181,10 @@ static void ept_set_mmio_spte_mask(void)
|
||||
/*
|
||||
* EPT Misconfigurations can be generated if the value of bits 2:0
|
||||
* of an EPT paging-structure entry is 110b (write/execute).
|
||||
* Also, magic bits (0xffull << 49) is set to quickly identify mmio
|
||||
* Also, magic bits (0x3ull << 62) is set to quickly identify mmio
|
||||
* spte.
|
||||
*/
|
||||
kvm_mmu_set_mmio_spte_mask(0xffull << 49 | 0x6ull);
|
||||
kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5366,10 +5371,14 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
|
||||
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
|
||||
|
||||
ret = handle_mmio_page_fault_common(vcpu, gpa, true);
|
||||
if (likely(ret == 1))
|
||||
if (likely(ret == RET_MMIO_PF_EMULATE))
|
||||
return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
|
||||
EMULATE_DONE;
|
||||
if (unlikely(!ret))
|
||||
|
||||
if (unlikely(ret == RET_MMIO_PF_INVALID))
|
||||
return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0);
|
||||
|
||||
if (unlikely(ret == RET_MMIO_PF_RETRY))
|
||||
return 1;
|
||||
|
||||
/* It is the real ept misconfig */
|
||||
|
@ -1193,20 +1193,37 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
elapsed = ns - kvm->arch.last_tsc_nsec;
|
||||
|
||||
if (vcpu->arch.virtual_tsc_khz) {
|
||||
int faulted = 0;
|
||||
|
||||
/* n.b - signed multiplication and division required */
|
||||
usdiff = data - kvm->arch.last_tsc_write;
|
||||
#ifdef CONFIG_X86_64
|
||||
usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
|
||||
#else
|
||||
/* do_div() only does unsigned */
|
||||
asm("idivl %2; xor %%edx, %%edx"
|
||||
: "=A"(usdiff)
|
||||
: "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
|
||||
asm("1: idivl %[divisor]\n"
|
||||
"2: xor %%edx, %%edx\n"
|
||||
" movl $0, %[faulted]\n"
|
||||
"3:\n"
|
||||
".section .fixup,\"ax\"\n"
|
||||
"4: movl $1, %[faulted]\n"
|
||||
" jmp 3b\n"
|
||||
".previous\n"
|
||||
|
||||
_ASM_EXTABLE(1b, 4b)
|
||||
|
||||
: "=A"(usdiff), [faulted] "=r" (faulted)
|
||||
: "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
|
||||
|
||||
#endif
|
||||
do_div(elapsed, 1000);
|
||||
usdiff -= elapsed;
|
||||
if (usdiff < 0)
|
||||
usdiff = -usdiff;
|
||||
|
||||
/* idivl overflow => difference is larger than USEC_PER_SEC */
|
||||
if (faulted)
|
||||
usdiff = USEC_PER_SEC;
|
||||
} else
|
||||
usdiff = USEC_PER_SEC; /* disable TSC match window below */
|
||||
|
||||
@ -1587,6 +1604,30 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* kvmclock updates which are isolated to a given vcpu, such as
|
||||
* vcpu->cpu migration, should not allow system_timestamp from
|
||||
* the rest of the vcpus to remain static. Otherwise ntp frequency
|
||||
* correction applies to one vcpu's system_timestamp but not
|
||||
* the others.
|
||||
*
|
||||
* So in those cases, request a kvmclock update for all vcpus.
|
||||
* The worst case for a remote vcpu to update its kvmclock
|
||||
* is then bounded by maximum nohz sleep latency.
|
||||
*/
|
||||
|
||||
static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
|
||||
{
|
||||
int i;
|
||||
struct kvm *kvm = v->kvm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static bool msr_mtrr_valid(unsigned msr)
|
||||
{
|
||||
switch (msr) {
|
||||
@ -1984,7 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
kvmclock_reset(vcpu);
|
||||
|
||||
vcpu->arch.time = data;
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
|
||||
|
||||
/* we verify if the enable bit is set... */
|
||||
if (!(data & 1))
|
||||
@ -2701,7 +2742,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
* kvmclock on vcpu->cpu migration
|
||||
*/
|
||||
if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
|
||||
if (vcpu->cpu != cpu)
|
||||
kvm_migrate_timers(vcpu);
|
||||
vcpu->cpu = cpu;
|
||||
@ -5238,7 +5279,13 @@ static void kvm_set_mmio_spte_mask(void)
|
||||
* Set the reserved bits and the present bit of an paging-structure
|
||||
* entry to generate page fault with PFER.RSV = 1.
|
||||
*/
|
||||
mask = ((1ull << (62 - maxphyaddr + 1)) - 1) << maxphyaddr;
|
||||
/* Mask the reserved physical address bits. */
|
||||
mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr;
|
||||
|
||||
/* Bit 62 is always reserved for 32bit host. */
|
||||
mask |= 0x3ull << 62;
|
||||
|
||||
/* Set the present bit. */
|
||||
mask |= 1ull;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -5498,13 +5545,6 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
|
||||
char instruction[3];
|
||||
unsigned long rip = kvm_rip_read(vcpu);
|
||||
|
||||
/*
|
||||
* Blow out the MMU to ensure that no other VCPU has an active mapping
|
||||
* to ensure that the updated hypercall appears atomically across all
|
||||
* VCPUs.
|
||||
*/
|
||||
kvm_mmu_zap_all(vcpu->kvm);
|
||||
|
||||
kvm_x86_ops->patch_hypercall(vcpu, instruction);
|
||||
|
||||
return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
|
||||
@ -5702,6 +5742,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
__kvm_migrate_timers(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
|
||||
kvm_gen_update_masterclock(vcpu->kvm);
|
||||
if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
|
||||
kvm_gen_kvmclock_update(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
|
||||
r = kvm_guest_time_update(vcpu);
|
||||
if (unlikely(r))
|
||||
@ -6812,6 +6854,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
return -EINVAL;
|
||||
|
||||
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
||||
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
|
||||
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
|
||||
|
||||
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
|
||||
@ -7040,22 +7083,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
* If memory slot is created, or moved, we need to clear all
|
||||
* mmio sptes.
|
||||
*/
|
||||
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
|
||||
kvm_mmu_zap_mmio_sptes(kvm);
|
||||
kvm_reload_remote_mmus(kvm);
|
||||
}
|
||||
kvm_mmu_invalidate_mmio_sptes(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
kvm_mmu_zap_all(kvm);
|
||||
kvm_reload_remote_mmus(kvm);
|
||||
kvm_mmu_invalidate_zap_all_pages(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvm_arch_flush_shadow_all(kvm);
|
||||
kvm_mmu_invalidate_zap_all_pages(kvm);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
@ -7263,3 +7302,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
|
||||
|
@ -61,6 +61,8 @@ struct arch_timer_cpu {
|
||||
#ifdef CONFIG_KVM_ARM_TIMER
|
||||
int kvm_timer_hyp_init(void);
|
||||
int kvm_timer_init(struct kvm *kvm);
|
||||
void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_irq_level *irq);
|
||||
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
|
||||
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
|
||||
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
|
||||
@ -76,6 +78,8 @@ static inline int kvm_timer_init(struct kvm *kvm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_irq_level *irq) {}
|
||||
static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
|
@ -125,6 +125,7 @@ static inline bool is_error_page(struct page *page)
|
||||
#define KVM_REQ_MCLOCK_INPROGRESS 19
|
||||
#define KVM_REQ_EPR_EXIT 20
|
||||
#define KVM_REQ_SCAN_IOAPIC 21
|
||||
#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
|
||||
|
||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
|
||||
@ -145,7 +146,8 @@ struct kvm_io_range {
|
||||
#define NR_IOBUS_DEVS 1000
|
||||
|
||||
struct kvm_io_bus {
|
||||
int dev_count;
|
||||
int dev_count;
|
||||
int ioeventfd_count;
|
||||
struct kvm_io_range range[];
|
||||
};
|
||||
|
||||
|
@ -25,14 +25,12 @@
|
||||
#include <clocksource/arm_arch_timer.h>
|
||||
#include <asm/arch_timer.h>
|
||||
|
||||
#include <asm/kvm_vgic.h>
|
||||
#include <asm/kvm_arch_timer.h>
|
||||
#include <kvm/arm_vgic.h>
|
||||
#include <kvm/arm_arch_timer.h>
|
||||
|
||||
static struct timecounter *timecounter;
|
||||
static struct workqueue_struct *wqueue;
|
||||
static struct kvm_irq_level timer_irq = {
|
||||
.level = 1,
|
||||
};
|
||||
static unsigned int host_vtimer_irq;
|
||||
|
||||
static cycle_t kvm_phys_timer_read(void)
|
||||
{
|
||||
@ -67,8 +65,8 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
|
||||
|
||||
timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
|
||||
kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
|
||||
vcpu->arch.timer_cpu.irq->irq,
|
||||
vcpu->arch.timer_cpu.irq->level);
|
||||
timer->irq->irq,
|
||||
timer->irq->level);
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
|
||||
@ -156,6 +154,20 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
|
||||
timer_arm(timer, ns);
|
||||
}
|
||||
|
||||
void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_irq_level *irq)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
|
||||
/*
|
||||
* The vcpu timer irq number cannot be determined in
|
||||
* kvm_timer_vcpu_init() because it is called much before
|
||||
* kvm_vcpu_set_target(). To handle this, we determine
|
||||
* vcpu timer irq number when the vcpu is reset.
|
||||
*/
|
||||
timer->irq = irq;
|
||||
}
|
||||
|
||||
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
|
||||
@ -163,12 +175,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
|
||||
hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||
timer->timer.function = kvm_timer_expire;
|
||||
timer->irq = &timer_irq;
|
||||
}
|
||||
|
||||
static void kvm_timer_init_interrupt(void *info)
|
||||
{
|
||||
enable_percpu_irq(timer_irq.irq, 0);
|
||||
enable_percpu_irq(host_vtimer_irq, 0);
|
||||
}
|
||||
|
||||
|
||||
@ -182,7 +193,7 @@ static int kvm_timer_cpu_notify(struct notifier_block *self,
|
||||
break;
|
||||
case CPU_DYING:
|
||||
case CPU_DYING_FROZEN:
|
||||
disable_percpu_irq(timer_irq.irq);
|
||||
disable_percpu_irq(host_vtimer_irq);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -230,7 +241,7 @@ int kvm_timer_hyp_init(void)
|
||||
goto out;
|
||||
}
|
||||
|
||||
timer_irq.irq = ppi;
|
||||
host_vtimer_irq = ppi;
|
||||
|
||||
err = register_cpu_notifier(&kvm_timer_cpu_nb);
|
||||
if (err) {
|
@ -753,6 +753,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
if (ret < 0)
|
||||
goto unlock_fail;
|
||||
|
||||
kvm->buses[bus_idx]->ioeventfd_count++;
|
||||
list_add_tail(&p->list, &kvm->ioeventfds);
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
@ -798,6 +799,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
continue;
|
||||
|
||||
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
|
||||
kvm->buses[bus_idx]->ioeventfd_count--;
|
||||
ioeventfd_release(p);
|
||||
ret = 0;
|
||||
break;
|
||||
|
@ -2926,7 +2926,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
struct kvm_io_bus *new_bus, *bus;
|
||||
|
||||
bus = kvm->buses[bus_idx];
|
||||
if (bus->dev_count > NR_IOBUS_DEVS - 1)
|
||||
/* exclude ioeventfd which is limited by maximum fd */
|
||||
if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
|
||||
return -ENOSPC;
|
||||
|
||||
new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) *
|
||||
@ -3181,6 +3182,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
||||
|
||||
out_undebugfs:
|
||||
unregister_syscore_ops(&kvm_syscore_ops);
|
||||
misc_deregister(&kvm_dev);
|
||||
out_unreg:
|
||||
kvm_async_pf_deinit();
|
||||
out_free:
|
||||
|
Loading…
Reference in New Issue
Block a user