mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-08 14:23:19 +00:00
Merge tag 'kvm-3.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti: "KVM updates for the 3.9 merge window, including x86 real mode emulation fixes, stronger memory slot interface restrictions, mmu_lock spinlock hold time reduction, improved handling of large page faults on shadow, initial APICv HW acceleration support, s390 channel IO based virtio, amongst others" * tag 'kvm-3.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (143 commits) Revert "KVM: MMU: lazily drop large spte" x86: pvclock kvm: align allocation size to page size KVM: nVMX: Remove redundant get_vmcs12 from nested_vmx_exit_handled_msr x86 emulator: fix parity calculation for AAD instruction KVM: PPC: BookE: Handle alignment interrupts booke: Added DBCR4 SPR number KVM: PPC: booke: Allow multiple exception types KVM: PPC: booke: use vcpu reference from thread_struct KVM: Remove user_alloc from struct kvm_memory_slot KVM: VMX: disable apicv by default KVM: s390: Fix handling of iscs. KVM: MMU: cleanup __direct_map KVM: MMU: remove pt_access in mmu_set_spte KVM: MMU: cleanup mapping-level KVM: MMU: lazily drop large spte KVM: VMX: cleanup vmx_set_cr0(). KVM: VMX: add missing exit names to VMX_EXIT_REASONS array KVM: VMX: disable SMEP feature when guest is in non-paging mode KVM: Remove duplicate text in api.txt Revert "KVM: MMU: split kvm_mmu_free_page" ...
This commit is contained in:
commit
89f883372f
@ -219,19 +219,6 @@ allocation of vcpu ids. For example, if userspace wants
|
||||
single-threaded guest vcpus, it should make all vcpu ids be a multiple
|
||||
of the number of vcpus per vcore.
|
||||
|
||||
On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
|
||||
threads in one or more virtual CPU cores. (This is because the
|
||||
hardware requires all the hardware threads in a CPU core to be in the
|
||||
same partition.) The KVM_CAP_PPC_SMT capability indicates the number
|
||||
of vcpus per virtual core (vcore). The vcore id is obtained by
|
||||
dividing the vcpu id by the number of vcpus per vcore. The vcpus in a
|
||||
given vcore will always be in the same physical core as each other
|
||||
(though that might be a different physical core from time to time).
|
||||
Userspace can control the threading (SMT) mode of the guest by its
|
||||
allocation of vcpu ids. For example, if userspace wants
|
||||
single-threaded guest vcpus, it should make all vcpu ids be a multiple
|
||||
of the number of vcpus per vcore.
|
||||
|
||||
For virtual cpus that have been created with S390 user controlled virtual
|
||||
machines, the resulting vcpu fd can be memory mapped at page offset
|
||||
KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
|
||||
@ -345,7 +332,7 @@ struct kvm_sregs {
|
||||
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
|
||||
};
|
||||
|
||||
/* ppc -- see arch/powerpc/include/asm/kvm.h */
|
||||
/* ppc -- see arch/powerpc/include/uapi/asm/kvm.h */
|
||||
|
||||
interrupt_bitmap is a bitmap of pending external interrupts. At most
|
||||
one bit may be set. This interrupt has been acknowledged by the APIC
|
||||
@ -892,12 +879,12 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
|
||||
be identical. This allows large pages in the guest to be backed by large
|
||||
pages in the host.
|
||||
|
||||
The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which instructs
|
||||
kvm to keep track of writes to memory within the slot. See KVM_GET_DIRTY_LOG
|
||||
ioctl. The KVM_CAP_READONLY_MEM capability indicates the availability of the
|
||||
KVM_MEM_READONLY flag. When this flag is set for a memory region, KVM only
|
||||
allows read accesses. Writes will be posted to userspace as KVM_EXIT_MMIO
|
||||
exits.
|
||||
The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
|
||||
KVM_MEM_READONLY. The former can be set to instruct KVM to keep track of
|
||||
writes to memory within the slot. See KVM_GET_DIRTY_LOG ioctl to know how to
|
||||
use it. The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
|
||||
to make a new slot read-only. In this case, writes to this memory will be
|
||||
posted to userspace as KVM_EXIT_MMIO exits.
|
||||
|
||||
When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
|
||||
the memory region are automatically reflected into the guest. For example, an
|
||||
@ -931,7 +918,7 @@ documentation when it pops into existence).
|
||||
4.37 KVM_ENABLE_CAP
|
||||
|
||||
Capability: KVM_CAP_ENABLE_CAP
|
||||
Architectures: ppc
|
||||
Architectures: ppc, s390
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_enable_cap (in)
|
||||
Returns: 0 on success; -1 on error
|
||||
@ -1792,6 +1779,7 @@ registers, find a list below:
|
||||
PPC | KVM_REG_PPC_VPA_SLB | 128
|
||||
PPC | KVM_REG_PPC_VPA_DTL | 128
|
||||
PPC | KVM_REG_PPC_EPCR | 32
|
||||
PPC | KVM_REG_PPC_EPR | 32
|
||||
|
||||
ARM registers are mapped using the lower 32 bits. The upper 16 of that
|
||||
is the register group type, or coprocessor number:
|
||||
@ -2108,6 +2096,14 @@ KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt
|
||||
KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm
|
||||
KVM_S390_INT_EMERGENCY (vcpu) - sigp emergency; source cpu in parm
|
||||
KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm
|
||||
KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm) - compound value to indicate an
|
||||
I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel);
|
||||
I/O interruption parameters in parm (subchannel) and parm64 (intparm,
|
||||
interruption subclass)
|
||||
KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm,
|
||||
machine check interrupt code in parm64 (note that
|
||||
machine checks needing further payload are not
|
||||
supported by this ioctl)
|
||||
|
||||
Note that the vcpu ioctl is asynchronous to vcpu execution.
|
||||
|
||||
@ -2359,8 +2355,8 @@ executed a memory-mapped I/O instruction which could not be satisfied
|
||||
by kvm. The 'data' member contains the written data if 'is_write' is
|
||||
true, and should be filled by application code otherwise.
|
||||
|
||||
NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR
|
||||
and KVM_EXIT_PAPR the corresponding
|
||||
NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR,
|
||||
KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding
|
||||
operations are complete (and guest state is consistent) only after userspace
|
||||
has re-entered the kernel with KVM_RUN. The kernel side will first finish
|
||||
incomplete operations and then check for pending signals. Userspace
|
||||
@ -2463,6 +2459,41 @@ The possible hypercalls are defined in the Power Architecture Platform
|
||||
Requirements (PAPR) document available from www.power.org (free
|
||||
developer registration required to access it).
|
||||
|
||||
/* KVM_EXIT_S390_TSCH */
|
||||
struct {
|
||||
__u16 subchannel_id;
|
||||
__u16 subchannel_nr;
|
||||
__u32 io_int_parm;
|
||||
__u32 io_int_word;
|
||||
__u32 ipb;
|
||||
__u8 dequeued;
|
||||
} s390_tsch;
|
||||
|
||||
s390 specific. This exit occurs when KVM_CAP_S390_CSS_SUPPORT has been enabled
|
||||
and TEST SUBCHANNEL was intercepted. If dequeued is set, a pending I/O
|
||||
interrupt for the target subchannel has been dequeued and subchannel_id,
|
||||
subchannel_nr, io_int_parm and io_int_word contain the parameters for that
|
||||
interrupt. ipb is needed for instruction parameter decoding.
|
||||
|
||||
/* KVM_EXIT_EPR */
|
||||
struct {
|
||||
__u32 epr;
|
||||
} epr;
|
||||
|
||||
On FSL BookE PowerPC chips, the interrupt controller has a fast patch
|
||||
interrupt acknowledge path to the core. When the core successfully
|
||||
delivers an interrupt, it automatically populates the EPR register with
|
||||
the interrupt vector number and acknowledges the interrupt inside
|
||||
the interrupt controller.
|
||||
|
||||
In case the interrupt controller lives in user space, we need to do
|
||||
the interrupt acknowledge cycle through it to fetch the next to be
|
||||
delivered interrupt vector using this exit.
|
||||
|
||||
It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
|
||||
external interrupt has just been delivered into the guest. User space
|
||||
should put the acknowledged interrupt vector into the 'epr' field.
|
||||
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
@ -2584,3 +2615,34 @@ For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
|
||||
where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
|
||||
- The tsize field of mas1 shall be set to 4K on TLB0, even though the
|
||||
hardware ignores this value for TLB0.
|
||||
|
||||
6.4 KVM_CAP_S390_CSS_SUPPORT
|
||||
|
||||
Architectures: s390
|
||||
Parameters: none
|
||||
Returns: 0 on success; -1 on error
|
||||
|
||||
This capability enables support for handling of channel I/O instructions.
|
||||
|
||||
TEST PENDING INTERRUPTION and the interrupt portion of TEST SUBCHANNEL are
|
||||
handled in-kernel, while the other I/O instructions are passed to userspace.
|
||||
|
||||
When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST
|
||||
SUBCHANNEL intercepts.
|
||||
|
||||
6.5 KVM_CAP_PPC_EPR
|
||||
|
||||
Architectures: ppc
|
||||
Parameters: args[0] defines whether the proxy facility is active
|
||||
Returns: 0 on success; -1 on error
|
||||
|
||||
This capability enables or disables the delivery of interrupts through the
|
||||
external proxy facility.
|
||||
|
||||
When enabled (args[0] != 0), every time the guest gets an external interrupt
|
||||
delivered, it automatically exits into user space with a KVM_EXIT_EPR exit
|
||||
to receive the topmost interrupt vector.
|
||||
|
||||
When disabled (args[0] == 0), behavior is as if this facility is unsupported.
|
||||
|
||||
When this capability is enabled, KVM_EXIT_EPR can occur.
|
||||
|
@ -187,13 +187,6 @@ Shadow pages contain the following information:
|
||||
perform a reverse map from a pte to a gfn. When role.direct is set, any
|
||||
element of this array can be calculated from the gfn field when used, in
|
||||
this case, the array of gfns is not allocated. See role.direct and gfn.
|
||||
slot_bitmap:
|
||||
A bitmap containing one bit per memory slot. If the page contains a pte
|
||||
mapping a page from memory slot n, then bit n of slot_bitmap will be set
|
||||
(if a page is aliased among several slots, then it is not guaranteed that
|
||||
all slots will be marked).
|
||||
Used during dirty logging to avoid scanning a shadow page if none if its
|
||||
pages need tracking.
|
||||
root_count:
|
||||
A counter keeping track of how many hardware registers (guest cr3 or
|
||||
pdptrs) are now pointing at the page. While this counter is nonzero, the
|
||||
|
@ -23,9 +23,7 @@
|
||||
#ifndef __ASM_KVM_HOST_H
|
||||
#define __ASM_KVM_HOST_H
|
||||
|
||||
#define KVM_MEMORY_SLOTS 32
|
||||
/* memory slots that does not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 4
|
||||
#define KVM_USER_MEM_SLOTS 32
|
||||
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
|
||||
|
@ -955,7 +955,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
kvm_mem.guest_phys_addr;
|
||||
kvm_userspace_mem.memory_size = kvm_mem.memory_size;
|
||||
r = kvm_vm_ioctl_set_memory_region(kvm,
|
||||
&kvm_userspace_mem, 0);
|
||||
&kvm_userspace_mem, false);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
@ -1580,7 +1580,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_memory_slot old,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
unsigned long i;
|
||||
unsigned long pfn;
|
||||
@ -1611,7 +1611,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot old,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
return;
|
||||
}
|
||||
@ -1834,7 +1834,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = -EINVAL;
|
||||
if (log->slot >= KVM_MEMORY_SLOTS)
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
|
@ -27,4 +27,10 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
|
||||
#define kvm_apic_present(x) (true)
|
||||
#define kvm_lapic_enabled(x) (true)
|
||||
|
||||
static inline bool kvm_apic_vid_enabled(void)
|
||||
{
|
||||
/* IA64 has no apicv supporting, do nothing here */
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -37,10 +37,8 @@
|
||||
|
||||
#define KVM_MAX_VCPUS NR_CPUS
|
||||
#define KVM_MAX_VCORES NR_CPUS
|
||||
#define KVM_MEMORY_SLOTS 32
|
||||
/* memory slots that does not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 4
|
||||
#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
|
||||
#define KVM_USER_MEM_SLOTS 32
|
||||
#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
|
||||
|
||||
#ifdef CONFIG_KVM_MMIO
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
@ -523,6 +521,8 @@ struct kvm_vcpu_arch {
|
||||
u8 sane;
|
||||
u8 cpu_type;
|
||||
u8 hcall_needed;
|
||||
u8 epr_enabled;
|
||||
u8 epr_needed;
|
||||
|
||||
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
|
||||
|
||||
|
@ -44,12 +44,11 @@ enum emulation_result {
|
||||
EMULATE_DO_DCR, /* kvm_run filled with DCR request */
|
||||
EMULATE_FAIL, /* can't emulate this instruction */
|
||||
EMULATE_AGAIN, /* something went wrong. go again */
|
||||
EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */
|
||||
};
|
||||
|
||||
extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
|
||||
extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
|
||||
extern char kvmppc_handlers_start[];
|
||||
extern unsigned long kvmppc_handler_len;
|
||||
extern void kvmppc_handler_highmem(void);
|
||||
|
||||
extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
|
||||
@ -263,6 +262,15 @@ static inline void kvm_linear_init(void)
|
||||
{}
|
||||
#endif
|
||||
|
||||
static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
|
||||
{
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
mtspr(SPRN_GEPR, epr);
|
||||
#elif defined(CONFIG_BOOKE)
|
||||
vcpu->arch.epr = epr;
|
||||
#endif
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
|
||||
struct kvm_config_tlb *cfg);
|
||||
int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
|
||||
|
@ -956,8 +956,6 @@
|
||||
#define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9
|
||||
#define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9
|
||||
#endif
|
||||
#define SPRN_SPRG_RVCPU SPRN_SPRG1
|
||||
#define SPRN_SPRG_WVCPU SPRN_SPRG1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_8xx
|
||||
|
@ -56,6 +56,7 @@
|
||||
#define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */
|
||||
#define SPRN_EPCR 0x133 /* Embedded Processor Control Register */
|
||||
#define SPRN_DBCR2 0x136 /* Debug Control Register 2 */
|
||||
#define SPRN_DBCR4 0x233 /* Debug Control Register 4 */
|
||||
#define SPRN_MSRP 0x137 /* MSR Protect Register */
|
||||
#define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */
|
||||
#define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */
|
||||
|
@ -114,7 +114,10 @@ struct kvm_regs {
|
||||
/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
|
||||
#define KVM_SREGS_E_SPE (1 << 9)
|
||||
|
||||
/* External Proxy (EXP) -- EPR */
|
||||
/*
|
||||
* DEPRECATED! USE ONE_REG FOR THIS ONE!
|
||||
* External Proxy (EXP) -- EPR
|
||||
*/
|
||||
#define KVM_SREGS_EXP (1 << 10)
|
||||
|
||||
/* External PID (E.PD) -- EPSC/EPLC */
|
||||
@ -412,5 +415,6 @@ struct kvm_get_htab_header {
|
||||
#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
|
||||
|
||||
#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
|
||||
#define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
|
||||
|
||||
#endif /* __LINUX_KVM_POWERPC_H */
|
||||
|
@ -118,7 +118,7 @@ int main(void)
|
||||
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
|
||||
DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
|
||||
DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu));
|
||||
#endif
|
||||
|
||||
|
@ -10,7 +10,8 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
|
||||
eventfd.o)
|
||||
|
||||
CFLAGS_44x_tlb.o := -I.
|
||||
CFLAGS_e500_tlb.o := -I.
|
||||
CFLAGS_e500_mmu.o := -I.
|
||||
CFLAGS_e500_mmu_host.o := -I.
|
||||
CFLAGS_emulate.o := -I.
|
||||
|
||||
common-objs-y += powerpc.o emulate.o
|
||||
@ -35,7 +36,8 @@ kvm-e500-objs := \
|
||||
booke_emulate.o \
|
||||
booke_interrupts.o \
|
||||
e500.o \
|
||||
e500_tlb.o \
|
||||
e500_mmu.o \
|
||||
e500_mmu_host.o \
|
||||
e500_emulate.o
|
||||
kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
|
||||
|
||||
@ -45,7 +47,8 @@ kvm-e500mc-objs := \
|
||||
booke_emulate.o \
|
||||
bookehv_interrupts.o \
|
||||
e500mc.o \
|
||||
e500_tlb.o \
|
||||
e500_mmu.o \
|
||||
e500_mmu_host.o \
|
||||
e500_emulate.o
|
||||
kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
|
||||
|
||||
|
@ -34,6 +34,8 @@
|
||||
#define OP_31_XOP_MTSRIN 242
|
||||
#define OP_31_XOP_TLBIEL 274
|
||||
#define OP_31_XOP_TLBIE 306
|
||||
/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
|
||||
#define OP_31_XOP_FAKE_SC1 308
|
||||
#define OP_31_XOP_SLBMTE 402
|
||||
#define OP_31_XOP_SLBIE 434
|
||||
#define OP_31_XOP_SLBIA 498
|
||||
@ -170,6 +172,32 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.mmu.tlbie(vcpu, addr, large);
|
||||
break;
|
||||
}
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_PR
|
||||
case OP_31_XOP_FAKE_SC1:
|
||||
{
|
||||
/* SC 1 papr hypercalls */
|
||||
ulong cmd = kvmppc_get_gpr(vcpu, 3);
|
||||
int i;
|
||||
|
||||
if ((vcpu->arch.shared->msr & MSR_PR) ||
|
||||
!vcpu->arch.papr_enabled) {
|
||||
emulated = EMULATE_FAIL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE)
|
||||
break;
|
||||
|
||||
run->papr_hcall.nr = cmd;
|
||||
for (i = 0; i < 9; ++i) {
|
||||
ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
|
||||
run->papr_hcall.args[i] = gpr;
|
||||
}
|
||||
|
||||
emulated = EMULATE_DO_PAPR;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case OP_31_XOP_EIOIO:
|
||||
break;
|
||||
case OP_31_XOP_SLBMTE:
|
||||
@ -427,6 +455,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
|
||||
case SPRN_PMC3_GEKKO:
|
||||
case SPRN_PMC4_GEKKO:
|
||||
case SPRN_WPAR_GEKKO:
|
||||
case SPRN_MSSSR0:
|
||||
break;
|
||||
unprivileged:
|
||||
default:
|
||||
@ -523,6 +552,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
|
||||
case SPRN_PMC3_GEKKO:
|
||||
case SPRN_PMC4_GEKKO:
|
||||
case SPRN_WPAR_GEKKO:
|
||||
case SPRN_MSSSR0:
|
||||
*spr_val = 0;
|
||||
break;
|
||||
default:
|
||||
|
@ -1549,7 +1549,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = -EINVAL;
|
||||
if (log->slot >= KVM_MEMORY_SLOTS)
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
|
@ -762,6 +762,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
r = RESUME_HOST_NV;
|
||||
break;
|
||||
case EMULATE_DO_PAPR:
|
||||
run->exit_reason = KVM_EXIT_PAPR_HCALL;
|
||||
vcpu->arch.hcall_needed = 1;
|
||||
r = RESUME_HOST_NV;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
@ -182,6 +182,14 @@ static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
|
||||
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
|
||||
}
|
||||
|
||||
static void kvmppc_core_queue_alignment(struct kvm_vcpu *vcpu, ulong dear_flags,
|
||||
ulong esr_flags)
|
||||
{
|
||||
vcpu->arch.queued_dear = dear_flags;
|
||||
vcpu->arch.queued_esr = esr_flags;
|
||||
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALIGNMENT);
|
||||
}
|
||||
|
||||
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
|
||||
{
|
||||
vcpu->arch.queued_esr = esr_flags;
|
||||
@ -300,13 +308,22 @@ static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr)
|
||||
#endif
|
||||
}
|
||||
|
||||
static unsigned long get_guest_epr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
return mfspr(SPRN_GEPR);
|
||||
#else
|
||||
return vcpu->arch.epr;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Deliver the interrupt of the corresponding priority, if possible. */
|
||||
static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
unsigned int priority)
|
||||
{
|
||||
int allowed = 0;
|
||||
ulong msr_mask = 0;
|
||||
bool update_esr = false, update_dear = false;
|
||||
bool update_esr = false, update_dear = false, update_epr = false;
|
||||
ulong crit_raw = vcpu->arch.shared->critical;
|
||||
ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
|
||||
bool crit;
|
||||
@ -330,9 +347,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
keep_irq = true;
|
||||
}
|
||||
|
||||
if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled)
|
||||
update_epr = true;
|
||||
|
||||
switch (priority) {
|
||||
case BOOKE_IRQPRIO_DTLB_MISS:
|
||||
case BOOKE_IRQPRIO_DATA_STORAGE:
|
||||
case BOOKE_IRQPRIO_ALIGNMENT:
|
||||
update_dear = true;
|
||||
/* fall through */
|
||||
case BOOKE_IRQPRIO_INST_STORAGE:
|
||||
@ -346,7 +367,6 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
case BOOKE_IRQPRIO_SPE_FP_DATA:
|
||||
case BOOKE_IRQPRIO_SPE_FP_ROUND:
|
||||
case BOOKE_IRQPRIO_AP_UNAVAIL:
|
||||
case BOOKE_IRQPRIO_ALIGNMENT:
|
||||
allowed = 1;
|
||||
msr_mask = MSR_CE | MSR_ME | MSR_DE;
|
||||
int_class = INT_CLASS_NONCRIT;
|
||||
@ -408,6 +428,8 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
|
||||
set_guest_esr(vcpu, vcpu->arch.queued_esr);
|
||||
if (update_dear == true)
|
||||
set_guest_dear(vcpu, vcpu->arch.queued_dear);
|
||||
if (update_epr == true)
|
||||
kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
|
||||
|
||||
new_msr &= msr_mask;
|
||||
#if defined(CONFIG_64BIT)
|
||||
@ -581,6 +603,11 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvmppc_core_check_exceptions(vcpu);
|
||||
|
||||
if (vcpu->requests) {
|
||||
/* Exception delivery raised request; start over */
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (vcpu->arch.shared->msr & MSR_WE) {
|
||||
local_irq_enable();
|
||||
kvm_vcpu_block(vcpu);
|
||||
@ -610,6 +637,13 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_EPR_EXIT, vcpu)) {
|
||||
vcpu->run->epr.epr = 0;
|
||||
vcpu->arch.epr_needed = true;
|
||||
vcpu->run->exit_reason = KVM_EXIT_EPR;
|
||||
r = 0;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -945,6 +979,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
|
||||
case BOOKE_INTERRUPT_ALIGNMENT:
|
||||
kvmppc_core_queue_alignment(vcpu, vcpu->arch.fault_dear,
|
||||
vcpu->arch.fault_esr);
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
case BOOKE_INTERRUPT_HV_SYSCALL:
|
||||
if (!(vcpu->arch.shared->msr & MSR_PR)) {
|
||||
@ -1388,6 +1428,11 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
&vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
|
||||
break;
|
||||
}
|
||||
case KVM_REG_PPC_EPR: {
|
||||
u32 epr = get_guest_epr(vcpu);
|
||||
r = put_user(epr, (u32 __user *)(long)reg->addr);
|
||||
break;
|
||||
}
|
||||
#if defined(CONFIG_64BIT)
|
||||
case KVM_REG_PPC_EPCR:
|
||||
r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
|
||||
@ -1420,6 +1465,13 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
|
||||
(u64 __user *)(long)reg->addr, sizeof(u64));
|
||||
break;
|
||||
}
|
||||
case KVM_REG_PPC_EPR: {
|
||||
u32 new_epr;
|
||||
r = get_user(new_epr, (u32 __user *)(long)reg->addr);
|
||||
if (!r)
|
||||
kvmppc_set_epr(vcpu, new_epr);
|
||||
break;
|
||||
}
|
||||
#if defined(CONFIG_64BIT)
|
||||
case KVM_REG_PPC_EPCR: {
|
||||
u32 new_epcr;
|
||||
@ -1556,7 +1608,9 @@ int __init kvmppc_booke_init(void)
|
||||
{
|
||||
#ifndef CONFIG_KVM_BOOKE_HV
|
||||
unsigned long ivor[16];
|
||||
unsigned long *handler = kvmppc_booke_handler_addr;
|
||||
unsigned long max_ivor = 0;
|
||||
unsigned long handler_len;
|
||||
int i;
|
||||
|
||||
/* We install our own exception handlers by hijacking IVPR. IVPR must
|
||||
@ -1589,14 +1643,16 @@ int __init kvmppc_booke_init(void)
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (ivor[i] > max_ivor)
|
||||
max_ivor = ivor[i];
|
||||
max_ivor = i;
|
||||
|
||||
handler_len = handler[i + 1] - handler[i];
|
||||
memcpy((void *)kvmppc_booke_handlers + ivor[i],
|
||||
kvmppc_handlers_start + i * kvmppc_handler_len,
|
||||
kvmppc_handler_len);
|
||||
(void *)handler[i], handler_len);
|
||||
}
|
||||
flush_icache_range(kvmppc_booke_handlers,
|
||||
kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
|
||||
|
||||
handler_len = handler[max_ivor + 1] - handler[max_ivor];
|
||||
flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
|
||||
ivor[max_ivor] + handler_len);
|
||||
#endif /* !BOOKE_HV */
|
||||
return 0;
|
||||
}
|
||||
|
@ -65,6 +65,7 @@
|
||||
(1 << BOOKE_IRQPRIO_CRITICAL))
|
||||
|
||||
extern unsigned long kvmppc_booke_handlers;
|
||||
extern unsigned long kvmppc_booke_handler_addr[];
|
||||
|
||||
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
|
||||
void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
|
||||
|
@ -269,6 +269,9 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
|
||||
case SPRN_ESR:
|
||||
*spr_val = vcpu->arch.shared->esr;
|
||||
break;
|
||||
case SPRN_EPR:
|
||||
*spr_val = vcpu->arch.epr;
|
||||
break;
|
||||
case SPRN_CSRR0:
|
||||
*spr_val = vcpu->arch.csrr0;
|
||||
break;
|
||||
|
@ -45,18 +45,21 @@
|
||||
(1<<BOOKE_INTERRUPT_DEBUG))
|
||||
|
||||
#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
|
||||
(1<<BOOKE_INTERRUPT_DTLB_MISS))
|
||||
(1<<BOOKE_INTERRUPT_DTLB_MISS) | \
|
||||
(1<<BOOKE_INTERRUPT_ALIGNMENT))
|
||||
|
||||
#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
|
||||
(1<<BOOKE_INTERRUPT_INST_STORAGE) | \
|
||||
(1<<BOOKE_INTERRUPT_PROGRAM) | \
|
||||
(1<<BOOKE_INTERRUPT_DTLB_MISS))
|
||||
(1<<BOOKE_INTERRUPT_DTLB_MISS) | \
|
||||
(1<<BOOKE_INTERRUPT_ALIGNMENT))
|
||||
|
||||
.macro KVM_HANDLER ivor_nr scratch srr0
|
||||
_GLOBAL(kvmppc_handler_\ivor_nr)
|
||||
/* Get pointer to vcpu and record exit number. */
|
||||
mtspr \scratch , r4
|
||||
mfspr r4, SPRN_SPRG_RVCPU
|
||||
mfspr r4, SPRN_SPRG_THREAD
|
||||
lwz r4, THREAD_KVM_VCPU(r4)
|
||||
stw r3, VCPU_GPR(R3)(r4)
|
||||
stw r5, VCPU_GPR(R5)(r4)
|
||||
stw r6, VCPU_GPR(R6)(r4)
|
||||
@ -73,6 +76,14 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
|
||||
bctr
|
||||
.endm
|
||||
|
||||
.macro KVM_HANDLER_ADDR ivor_nr
|
||||
.long kvmppc_handler_\ivor_nr
|
||||
.endm
|
||||
|
||||
.macro KVM_HANDLER_END
|
||||
.long kvmppc_handlers_end
|
||||
.endm
|
||||
|
||||
_GLOBAL(kvmppc_handlers_start)
|
||||
KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
|
||||
KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0
|
||||
@ -93,9 +104,7 @@ KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
|
||||
KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
|
||||
KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
|
||||
KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
|
||||
|
||||
_GLOBAL(kvmppc_handler_len)
|
||||
.long kvmppc_handler_1 - kvmppc_handler_0
|
||||
_GLOBAL(kvmppc_handlers_end)
|
||||
|
||||
/* Registers:
|
||||
* SPRG_SCRATCH0: guest r4
|
||||
@ -402,9 +411,6 @@ lightweight_exit:
|
||||
lwz r8, kvmppc_booke_handlers@l(r8)
|
||||
mtspr SPRN_IVPR, r8
|
||||
|
||||
/* Save vcpu pointer for the exception handlers. */
|
||||
mtspr SPRN_SPRG_WVCPU, r4
|
||||
|
||||
lwz r5, VCPU_SHARED(r4)
|
||||
|
||||
/* Can't switch the stack pointer until after IVPR is switched,
|
||||
@ -463,6 +469,31 @@ lightweight_exit:
|
||||
lwz r4, VCPU_GPR(R4)(r4)
|
||||
rfi
|
||||
|
||||
.data
|
||||
.align 4
|
||||
.globl kvmppc_booke_handler_addr
|
||||
kvmppc_booke_handler_addr:
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_CRITICAL
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_MACHINE_CHECK
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_DATA_STORAGE
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_INST_STORAGE
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_EXTERNAL
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_ALIGNMENT
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_PROGRAM
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_FP_UNAVAIL
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_SYSCALL
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_AP_UNAVAIL
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_DECREMENTER
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_FIT
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_WATCHDOG
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_DTLB_MISS
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_ITLB_MISS
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_DEBUG
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_UNAVAIL
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_DATA
|
||||
KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_ROUND
|
||||
KVM_HANDLER_END /*Always keep this in end*/
|
||||
|
||||
#ifdef CONFIG_SPE
|
||||
_GLOBAL(kvmppc_save_guest_spe)
|
||||
cmpi 0,r3,0
|
||||
|
@ -491,6 +491,9 @@ static int __init kvmppc_e500_init(void)
|
||||
{
|
||||
int r, i;
|
||||
unsigned long ivor[3];
|
||||
/* Process remaining handlers above the generic first 16 */
|
||||
unsigned long *handler = &kvmppc_booke_handler_addr[16];
|
||||
unsigned long handler_len;
|
||||
unsigned long max_ivor = 0;
|
||||
|
||||
r = kvmppc_core_check_processor_compat();
|
||||
@ -506,15 +509,16 @@ static int __init kvmppc_e500_init(void)
|
||||
ivor[1] = mfspr(SPRN_IVOR33);
|
||||
ivor[2] = mfspr(SPRN_IVOR34);
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (ivor[i] > max_ivor)
|
||||
max_ivor = ivor[i];
|
||||
if (ivor[i] > ivor[max_ivor])
|
||||
max_ivor = i;
|
||||
|
||||
handler_len = handler[i + 1] - handler[i];
|
||||
memcpy((void *)kvmppc_booke_handlers + ivor[i],
|
||||
kvmppc_handlers_start + (i + 16) * kvmppc_handler_len,
|
||||
kvmppc_handler_len);
|
||||
(void *)handler[i], handler_len);
|
||||
}
|
||||
flush_icache_range(kvmppc_booke_handlers,
|
||||
kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
|
||||
handler_len = handler[max_ivor + 1] - handler[max_ivor];
|
||||
flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
|
||||
ivor[max_ivor] + handler_len);
|
||||
|
||||
return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
|
||||
}
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
#define E500_TLB_VALID 1
|
||||
#define E500_TLB_BITMAP 2
|
||||
#define E500_TLB_TLB0 (1 << 2)
|
||||
|
||||
struct tlbe_ref {
|
||||
pfn_t pfn;
|
||||
|
@ -1,10 +1,11 @@
|
||||
/*
|
||||
* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
|
||||
* Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
|
||||
*
|
||||
* Author: Yu Liu, yu.liu@freescale.com
|
||||
* Scott Wood, scottwood@freescale.com
|
||||
* Ashish Kalra, ashish.kalra@freescale.com
|
||||
* Varun Sethi, varun.sethi@freescale.com
|
||||
* Alexander Graf, agraf@suse.de
|
||||
*
|
||||
* Description:
|
||||
* This file is based on arch/powerpc/kvm/44x_tlb.c,
|
||||
@ -33,10 +34,7 @@
|
||||
#include "e500.h"
|
||||
#include "trace.h"
|
||||
#include "timing.h"
|
||||
|
||||
#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
|
||||
|
||||
static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
|
||||
#include "e500_mmu_host.h"
|
||||
|
||||
static inline unsigned int gtlb0_get_next_victim(
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
@ -50,174 +48,6 @@ static inline unsigned int gtlb0_get_next_victim(
|
||||
return victim;
|
||||
}
|
||||
|
||||
static inline unsigned int tlb1_max_shadow_size(void)
|
||||
{
|
||||
/* reserve one entry for magic page */
|
||||
return host_tlb_params[1].entries - tlbcam_index - 1;
|
||||
}
|
||||
|
||||
static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
|
||||
}
|
||||
|
||||
static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
|
||||
{
|
||||
/* Mask off reserved bits. */
|
||||
mas3 &= MAS3_ATTRIB_MASK;
|
||||
|
||||
#ifndef CONFIG_KVM_BOOKE_HV
|
||||
if (!usermode) {
|
||||
/* Guest is in supervisor mode,
|
||||
* so we need to translate guest
|
||||
* supervisor permissions into user permissions. */
|
||||
mas3 &= ~E500_TLB_USER_PERM_MASK;
|
||||
mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
|
||||
}
|
||||
mas3 |= E500_TLB_SUPER_PERM_MASK;
|
||||
#endif
|
||||
return mas3;
|
||||
}
|
||||
|
||||
static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
|
||||
#else
|
||||
return mas2 & MAS2_ATTRIB_MASK;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* writing shadow tlb entry to host TLB
|
||||
*/
|
||||
static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
|
||||
uint32_t mas0)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
mtspr(SPRN_MAS0, mas0);
|
||||
mtspr(SPRN_MAS1, stlbe->mas1);
|
||||
mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
|
||||
mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
|
||||
mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
mtspr(SPRN_MAS8, stlbe->mas8);
|
||||
#endif
|
||||
asm volatile("isync; tlbwe" : : : "memory");
|
||||
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
/* Must clear mas8 for other host tlbwe's */
|
||||
mtspr(SPRN_MAS8, 0);
|
||||
isync();
|
||||
#endif
|
||||
local_irq_restore(flags);
|
||||
|
||||
trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
|
||||
stlbe->mas2, stlbe->mas7_3);
|
||||
}
|
||||
|
||||
/*
|
||||
* Acquire a mas0 with victim hint, as if we just took a TLB miss.
|
||||
*
|
||||
* We don't care about the address we're searching for, other than that it's
|
||||
* in the right set and is not present in the TLB. Using a zero PID and a
|
||||
* userspace address means we don't have to set and then restore MAS5, or
|
||||
* calculate a proper MAS6 value.
|
||||
*/
|
||||
static u32 get_host_mas0(unsigned long eaddr)
|
||||
{
|
||||
unsigned long flags;
|
||||
u32 mas0;
|
||||
|
||||
local_irq_save(flags);
|
||||
mtspr(SPRN_MAS6, 0);
|
||||
asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET));
|
||||
mas0 = mfspr(SPRN_MAS0);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return mas0;
|
||||
}
|
||||
|
||||
/* sesel is for tlb1 only */
|
||||
static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe)
|
||||
{
|
||||
u32 mas0;
|
||||
|
||||
if (tlbsel == 0) {
|
||||
mas0 = get_host_mas0(stlbe->mas2);
|
||||
__write_host_tlbe(stlbe, mas0);
|
||||
} else {
|
||||
__write_host_tlbe(stlbe,
|
||||
MAS0_TLBSEL(1) |
|
||||
MAS0_ESEL(to_htlb1_esel(sesel)));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_E500V2
|
||||
void kvmppc_map_magic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
struct kvm_book3e_206_tlb_entry magic;
|
||||
ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
|
||||
unsigned int stid;
|
||||
pfn_t pfn;
|
||||
|
||||
pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT;
|
||||
get_page(pfn_to_page(pfn));
|
||||
|
||||
preempt_disable();
|
||||
stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0);
|
||||
|
||||
magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
|
||||
MAS1_TSIZE(BOOK3E_PAGESZ_4K);
|
||||
magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
|
||||
magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) |
|
||||
MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
|
||||
magic.mas8 = 0;
|
||||
|
||||
__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
|
||||
preempt_enable();
|
||||
}
|
||||
#endif
|
||||
|
||||
static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
int tlbsel, int esel)
|
||||
{
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe =
|
||||
get_entry(vcpu_e500, tlbsel, esel);
|
||||
|
||||
if (tlbsel == 1 &&
|
||||
vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) {
|
||||
u64 tmp = vcpu_e500->g2h_tlb1_map[esel];
|
||||
int hw_tlb_indx;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
while (tmp) {
|
||||
hw_tlb_indx = __ilog2_u64(tmp & -tmp);
|
||||
mtspr(SPRN_MAS0,
|
||||
MAS0_TLBSEL(1) |
|
||||
MAS0_ESEL(to_htlb1_esel(hw_tlb_indx)));
|
||||
mtspr(SPRN_MAS1, 0);
|
||||
asm volatile("tlbwe");
|
||||
vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0;
|
||||
tmp &= tmp - 1;
|
||||
}
|
||||
mb();
|
||||
vcpu_e500->g2h_tlb1_map[esel] = 0;
|
||||
vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP;
|
||||
local_irq_restore(flags);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Guest tlbe is backed by at most one host tlbe per shadow pid. */
|
||||
kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
|
||||
}
|
||||
|
||||
static int tlb0_set_base(gva_t addr, int sets, int ways)
|
||||
{
|
||||
int set_base;
|
||||
@ -296,70 +126,6 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||
pfn_t pfn)
|
||||
{
|
||||
ref->pfn = pfn;
|
||||
ref->flags = E500_TLB_VALID;
|
||||
|
||||
if (tlbe_is_writable(gtlbe))
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
}
|
||||
|
||||
static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
|
||||
{
|
||||
if (ref->flags & E500_TLB_VALID) {
|
||||
trace_kvm_booke206_ref_release(ref->pfn, ref->flags);
|
||||
ref->flags = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
if (vcpu_e500->g2h_tlb1_map)
|
||||
memset(vcpu_e500->g2h_tlb1_map, 0,
|
||||
sizeof(u64) * vcpu_e500->gtlb_params[1].entries);
|
||||
if (vcpu_e500->h2g_tlb1_rmap)
|
||||
memset(vcpu_e500->h2g_tlb1_rmap, 0,
|
||||
sizeof(unsigned int) * host_tlb_params[1].entries);
|
||||
}
|
||||
|
||||
static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
int tlbsel = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) {
|
||||
struct tlbe_ref *ref =
|
||||
&vcpu_e500->gtlb_priv[tlbsel][i].ref;
|
||||
kvmppc_e500_ref_release(ref);
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
int stlbsel = 1;
|
||||
int i;
|
||||
|
||||
kvmppc_e500_tlbil_all(vcpu_e500);
|
||||
|
||||
for (i = 0; i < host_tlb_params[stlbsel].entries; i++) {
|
||||
struct tlbe_ref *ref =
|
||||
&vcpu_e500->tlb_refs[stlbsel][i];
|
||||
kvmppc_e500_ref_release(ref);
|
||||
}
|
||||
|
||||
clear_tlb_privs(vcpu_e500);
|
||||
}
|
||||
|
||||
void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
clear_tlb_refs(vcpu_e500);
|
||||
clear_tlb1_bitmap(vcpu_e500);
|
||||
}
|
||||
|
||||
static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
|
||||
unsigned int eaddr, int as)
|
||||
{
|
||||
@ -385,216 +151,6 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
|
||||
| (as ? MAS6_SAS : 0);
|
||||
}
|
||||
|
||||
/* TID must be supplied by the caller */
|
||||
static inline void kvmppc_e500_setup_stlbe(
|
||||
struct kvm_vcpu *vcpu,
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||
int tsize, struct tlbe_ref *ref, u64 gvaddr,
|
||||
struct kvm_book3e_206_tlb_entry *stlbe)
|
||||
{
|
||||
pfn_t pfn = ref->pfn;
|
||||
u32 pr = vcpu->arch.shared->msr & MSR_PR;
|
||||
|
||||
BUG_ON(!(ref->flags & E500_TLB_VALID));
|
||||
|
||||
/* Force IPROT=0 for all guest mappings. */
|
||||
stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
|
||||
stlbe->mas2 = (gvaddr & MAS2_EPN) |
|
||||
e500_shadow_mas2_attrib(gtlbe->mas2, pr);
|
||||
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
|
||||
e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
|
||||
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||
int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe,
|
||||
struct tlbe_ref *ref)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned long pfn = 0; /* silence GCC warning */
|
||||
unsigned long hva;
|
||||
int pfnmap = 0;
|
||||
int tsize = BOOK3E_PAGESZ_4K;
|
||||
|
||||
/*
|
||||
* Translate guest physical to true physical, acquiring
|
||||
* a page reference if it is normal, non-reserved memory.
|
||||
*
|
||||
* gfn_to_memslot() must succeed because otherwise we wouldn't
|
||||
* have gotten this far. Eventually we should just pass the slot
|
||||
* pointer through from the first lookup.
|
||||
*/
|
||||
slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
|
||||
hva = gfn_to_hva_memslot(slot, gfn);
|
||||
|
||||
if (tlbsel == 1) {
|
||||
struct vm_area_struct *vma;
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
|
||||
vma = find_vma(current->mm, hva);
|
||||
if (vma && hva >= vma->vm_start &&
|
||||
(vma->vm_flags & VM_PFNMAP)) {
|
||||
/*
|
||||
* This VMA is a physically contiguous region (e.g.
|
||||
* /dev/mem) that bypasses normal Linux page
|
||||
* management. Find the overlap between the
|
||||
* vma and the memslot.
|
||||
*/
|
||||
|
||||
unsigned long start, end;
|
||||
unsigned long slot_start, slot_end;
|
||||
|
||||
pfnmap = 1;
|
||||
|
||||
start = vma->vm_pgoff;
|
||||
end = start +
|
||||
((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
|
||||
|
||||
pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
|
||||
|
||||
slot_start = pfn - (gfn - slot->base_gfn);
|
||||
slot_end = slot_start + slot->npages;
|
||||
|
||||
if (start < slot_start)
|
||||
start = slot_start;
|
||||
if (end > slot_end)
|
||||
end = slot_end;
|
||||
|
||||
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
|
||||
MAS1_TSIZE_SHIFT;
|
||||
|
||||
/*
|
||||
* e500 doesn't implement the lowest tsize bit,
|
||||
* or 1K pages.
|
||||
*/
|
||||
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
|
||||
|
||||
/*
|
||||
* Now find the largest tsize (up to what the guest
|
||||
* requested) that will cover gfn, stay within the
|
||||
* range, and for which gfn and pfn are mutually
|
||||
* aligned.
|
||||
*/
|
||||
|
||||
for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
|
||||
unsigned long gfn_start, gfn_end, tsize_pages;
|
||||
tsize_pages = 1 << (tsize - 2);
|
||||
|
||||
gfn_start = gfn & ~(tsize_pages - 1);
|
||||
gfn_end = gfn_start + tsize_pages;
|
||||
|
||||
if (gfn_start + pfn - gfn < start)
|
||||
continue;
|
||||
if (gfn_end + pfn - gfn > end)
|
||||
continue;
|
||||
if ((gfn & (tsize_pages - 1)) !=
|
||||
(pfn & (tsize_pages - 1)))
|
||||
continue;
|
||||
|
||||
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
|
||||
pfn &= ~(tsize_pages - 1);
|
||||
break;
|
||||
}
|
||||
} else if (vma && hva >= vma->vm_start &&
|
||||
(vma->vm_flags & VM_HUGETLB)) {
|
||||
unsigned long psize = vma_kernel_pagesize(vma);
|
||||
|
||||
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
|
||||
MAS1_TSIZE_SHIFT;
|
||||
|
||||
/*
|
||||
* Take the largest page size that satisfies both host
|
||||
* and guest mapping
|
||||
*/
|
||||
tsize = min(__ilog2(psize) - 10, tsize);
|
||||
|
||||
/*
|
||||
* e500 doesn't implement the lowest tsize bit,
|
||||
* or 1K pages.
|
||||
*/
|
||||
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
|
||||
}
|
||||
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
}
|
||||
|
||||
if (likely(!pfnmap)) {
|
||||
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
|
||||
pfn = gfn_to_pfn_memslot(slot, gfn);
|
||||
if (is_error_noslot_pfn(pfn)) {
|
||||
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
|
||||
(long)gfn);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Align guest and physical address to page map boundaries */
|
||||
pfn &= ~(tsize_pages - 1);
|
||||
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
|
||||
}
|
||||
|
||||
/* Drop old ref and setup new one. */
|
||||
kvmppc_e500_ref_release(ref);
|
||||
kvmppc_e500_ref_setup(ref, gtlbe, pfn);
|
||||
|
||||
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
|
||||
ref, gvaddr, stlbe);
|
||||
|
||||
/* Clear i-cache for new pages */
|
||||
kvmppc_mmu_flush_icache(pfn);
|
||||
|
||||
/* Drop refcount on page, so that mmu notifiers can clear it */
|
||||
kvm_release_pfn_clean(pfn);
|
||||
}
|
||||
|
||||
/* XXX only map the one-one case, for now use TLB0 */
|
||||
static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
int esel,
|
||||
struct kvm_book3e_206_tlb_entry *stlbe)
|
||||
{
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe;
|
||||
struct tlbe_ref *ref;
|
||||
|
||||
gtlbe = get_entry(vcpu_e500, 0, esel);
|
||||
ref = &vcpu_e500->gtlb_priv[0][esel].ref;
|
||||
|
||||
kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
|
||||
get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
|
||||
gtlbe, 0, stlbe, ref);
|
||||
}
|
||||
|
||||
/* Caller must ensure that the specified guest TLB entry is safe to insert into
|
||||
* the shadow TLB. */
|
||||
/* XXX for both one-one and one-to-many , for now use TLB1 */
|
||||
static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||
struct kvm_book3e_206_tlb_entry *stlbe, int esel)
|
||||
{
|
||||
struct tlbe_ref *ref;
|
||||
unsigned int victim;
|
||||
|
||||
victim = vcpu_e500->host_tlb1_nv++;
|
||||
|
||||
if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
|
||||
vcpu_e500->host_tlb1_nv = 0;
|
||||
|
||||
ref = &vcpu_e500->tlb_refs[1][victim];
|
||||
kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref);
|
||||
|
||||
vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim;
|
||||
vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
|
||||
if (vcpu_e500->h2g_tlb1_rmap[victim]) {
|
||||
unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim];
|
||||
vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim);
|
||||
}
|
||||
vcpu_e500->h2g_tlb1_rmap[victim] = esel;
|
||||
|
||||
return victim;
|
||||
}
|
||||
|
||||
static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
int size = vcpu_e500->gtlb_params[1].entries;
|
||||
@ -683,8 +239,8 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
|
||||
for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++)
|
||||
kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
|
||||
|
||||
/* Invalidate all vcpu id mappings */
|
||||
kvmppc_e500_tlbil_all(vcpu_e500);
|
||||
/* Invalidate all host shadow mappings */
|
||||
kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
@ -713,8 +269,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
|
||||
kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
|
||||
}
|
||||
|
||||
/* Invalidate all vcpu id mappings */
|
||||
kvmppc_e500_tlbil_all(vcpu_e500);
|
||||
/* Invalidate all host shadow mappings */
|
||||
kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
@ -834,27 +390,11 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
/* sesel is for tlb1 only */
|
||||
static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||
struct kvm_book3e_206_tlb_entry *stlbe,
|
||||
int stlbsel, int sesel)
|
||||
{
|
||||
int stid;
|
||||
|
||||
preempt_disable();
|
||||
stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe);
|
||||
|
||||
stlbe->mas1 |= MAS1_TID(stid);
|
||||
write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
|
||||
int tlbsel, esel, stlbsel, sesel;
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe;
|
||||
int tlbsel, esel;
|
||||
int recal = 0;
|
||||
|
||||
tlbsel = get_tlb_tlbsel(vcpu);
|
||||
@ -892,40 +432,16 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
|
||||
|
||||
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
|
||||
if (tlbe_is_host_safe(vcpu, gtlbe)) {
|
||||
u64 eaddr;
|
||||
u64 raddr;
|
||||
u64 eaddr = get_tlb_eaddr(gtlbe);
|
||||
u64 raddr = get_tlb_raddr(gtlbe);
|
||||
|
||||
switch (tlbsel) {
|
||||
case 0:
|
||||
/* TLB0 */
|
||||
if (tlbsel == 0) {
|
||||
gtlbe->mas1 &= ~MAS1_TSIZE(~0);
|
||||
gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
|
||||
|
||||
stlbsel = 0;
|
||||
kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
|
||||
sesel = 0; /* unused */
|
||||
|
||||
break;
|
||||
|
||||
case 1:
|
||||
/* TLB1 */
|
||||
eaddr = get_tlb_eaddr(gtlbe);
|
||||
raddr = get_tlb_raddr(gtlbe);
|
||||
|
||||
/* Create a 4KB mapping on the host.
|
||||
* If the guest wanted a large page,
|
||||
* only the first 4KB is mapped here and the rest
|
||||
* are mapped on the fly. */
|
||||
stlbsel = 1;
|
||||
sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
|
||||
raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel);
|
||||
break;
|
||||
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
|
||||
/* Premap the faulting page */
|
||||
kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel));
|
||||
}
|
||||
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
|
||||
@ -1019,100 +535,14 @@ void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
|
||||
void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
|
||||
unsigned int index)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
struct tlbe_priv *priv;
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
|
||||
int tlbsel = tlbsel_of(index);
|
||||
int esel = esel_of(index);
|
||||
int stlbsel, sesel;
|
||||
|
||||
gtlbe = get_entry(vcpu_e500, tlbsel, esel);
|
||||
|
||||
switch (tlbsel) {
|
||||
case 0:
|
||||
stlbsel = 0;
|
||||
sesel = 0; /* unused */
|
||||
priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
|
||||
|
||||
/* Only triggers after clear_tlb_refs */
|
||||
if (unlikely(!(priv->ref.flags & E500_TLB_VALID)))
|
||||
kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
|
||||
else
|
||||
kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
|
||||
&priv->ref, eaddr, &stlbe);
|
||||
break;
|
||||
|
||||
case 1: {
|
||||
gfn_t gfn = gpaddr >> PAGE_SHIFT;
|
||||
|
||||
stlbsel = 1;
|
||||
sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn,
|
||||
gtlbe, &stlbe, esel);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
|
||||
write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
|
||||
}
|
||||
|
||||
/************* MMU Notifiers *************/
|
||||
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
trace_kvm_unmap_hva(hva);
|
||||
|
||||
/*
|
||||
* Flush all shadow tlb entries everywhere. This is slow, but
|
||||
* we are 100% sure that we catch the to be unmapped page
|
||||
*/
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
{
|
||||
/* kvm_unmap_hva flushes everything anyways */
|
||||
kvm_unmap_hva(kvm, start);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
/* XXX could be more clever ;) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
/* XXX could be more clever ;) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
/* The page will get remapped properly on its next fault */
|
||||
kvm_unmap_hva(kvm, hva);
|
||||
}
|
||||
|
||||
/*****************************************/
|
||||
|
||||
static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
int i;
|
||||
|
||||
clear_tlb1_bitmap(vcpu_e500);
|
||||
kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
|
||||
kfree(vcpu_e500->g2h_tlb1_map);
|
||||
|
||||
clear_tlb_refs(vcpu_e500);
|
||||
kfree(vcpu_e500->gtlb_priv[0]);
|
||||
kfree(vcpu_e500->gtlb_priv[1]);
|
||||
|
||||
@ -1303,7 +733,7 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
kvmppc_recalc_tlb1map_range(vcpu_e500);
|
||||
clear_tlb_refs(vcpu_e500);
|
||||
kvmppc_core_flush_tlb(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1313,37 +743,8 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
|
||||
int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
|
||||
|
||||
host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY;
|
||||
host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
|
||||
|
||||
/*
|
||||
* This should never happen on real e500 hardware, but is
|
||||
* architecturally possible -- e.g. in some weird nested
|
||||
* virtualization case.
|
||||
*/
|
||||
if (host_tlb_params[0].entries == 0 ||
|
||||
host_tlb_params[1].entries == 0) {
|
||||
pr_err("%s: need to know host tlb size\n", __func__);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >>
|
||||
TLBnCFG_ASSOC_SHIFT;
|
||||
host_tlb_params[1].ways = host_tlb_params[1].entries;
|
||||
|
||||
if (!is_power_of_2(host_tlb_params[0].entries) ||
|
||||
!is_power_of_2(host_tlb_params[0].ways) ||
|
||||
host_tlb_params[0].entries < host_tlb_params[0].ways ||
|
||||
host_tlb_params[0].ways == 0) {
|
||||
pr_err("%s: bad tlb0 host config: %u entries %u ways\n",
|
||||
__func__, host_tlb_params[0].entries,
|
||||
host_tlb_params[0].ways);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
host_tlb_params[0].sets =
|
||||
host_tlb_params[0].entries / host_tlb_params[0].ways;
|
||||
host_tlb_params[1].sets = 1;
|
||||
if (e500_mmu_host_init(vcpu_e500))
|
||||
goto err;
|
||||
|
||||
vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
|
||||
vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
|
||||
@ -1362,18 +763,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
vcpu_e500->gtlb_offset[0] = 0;
|
||||
vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
|
||||
|
||||
vcpu_e500->tlb_refs[0] =
|
||||
kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries,
|
||||
GFP_KERNEL);
|
||||
if (!vcpu_e500->tlb_refs[0])
|
||||
goto err;
|
||||
|
||||
vcpu_e500->tlb_refs[1] =
|
||||
kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries,
|
||||
GFP_KERNEL);
|
||||
if (!vcpu_e500->tlb_refs[1])
|
||||
goto err;
|
||||
|
||||
vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
|
||||
vcpu_e500->gtlb_params[0].entries,
|
||||
GFP_KERNEL);
|
||||
@ -1392,12 +781,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
if (!vcpu_e500->g2h_tlb1_map)
|
||||
goto err;
|
||||
|
||||
vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) *
|
||||
host_tlb_params[1].entries,
|
||||
GFP_KERNEL);
|
||||
if (!vcpu_e500->h2g_tlb1_rmap)
|
||||
goto err;
|
||||
|
||||
/* Init TLB configuration register */
|
||||
vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
|
||||
~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
|
||||
@ -1416,15 +799,11 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
|
||||
err:
|
||||
free_gtlb(vcpu_e500);
|
||||
kfree(vcpu_e500->tlb_refs[0]);
|
||||
kfree(vcpu_e500->tlb_refs[1]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
free_gtlb(vcpu_e500);
|
||||
kfree(vcpu_e500->h2g_tlb1_rmap);
|
||||
kfree(vcpu_e500->tlb_refs[0]);
|
||||
kfree(vcpu_e500->tlb_refs[1]);
|
||||
e500_mmu_host_uninit(vcpu_e500);
|
||||
}
|
699
arch/powerpc/kvm/e500_mmu_host.c
Normal file
699
arch/powerpc/kvm/e500_mmu_host.c
Normal file
@ -0,0 +1,699 @@
|
||||
/*
|
||||
* Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
|
||||
*
|
||||
* Author: Yu Liu, yu.liu@freescale.com
|
||||
* Scott Wood, scottwood@freescale.com
|
||||
* Ashish Kalra, ashish.kalra@freescale.com
|
||||
* Varun Sethi, varun.sethi@freescale.com
|
||||
* Alexander Graf, agraf@suse.de
|
||||
*
|
||||
* Description:
|
||||
* This file is based on arch/powerpc/kvm/44x_tlb.c,
|
||||
* by Hollis Blanchard <hollisb@us.ibm.com>.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License, version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <asm/kvm_ppc.h>
|
||||
|
||||
#include "e500.h"
|
||||
#include "trace.h"
|
||||
#include "timing.h"
|
||||
#include "e500_mmu_host.h"
|
||||
|
||||
#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
|
||||
|
||||
static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
|
||||
|
||||
static inline unsigned int tlb1_max_shadow_size(void)
|
||||
{
|
||||
/* reserve one entry for magic page */
|
||||
return host_tlb_params[1].entries - tlbcam_index - 1;
|
||||
}
|
||||
|
||||
static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
|
||||
{
|
||||
/* Mask off reserved bits. */
|
||||
mas3 &= MAS3_ATTRIB_MASK;
|
||||
|
||||
#ifndef CONFIG_KVM_BOOKE_HV
|
||||
if (!usermode) {
|
||||
/* Guest is in supervisor mode,
|
||||
* so we need to translate guest
|
||||
* supervisor permissions into user permissions. */
|
||||
mas3 &= ~E500_TLB_USER_PERM_MASK;
|
||||
mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
|
||||
}
|
||||
mas3 |= E500_TLB_SUPER_PERM_MASK;
|
||||
#endif
|
||||
return mas3;
|
||||
}
|
||||
|
||||
static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
|
||||
#else
|
||||
return mas2 & MAS2_ATTRIB_MASK;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* writing shadow tlb entry to host TLB
|
||||
*/
|
||||
static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
|
||||
uint32_t mas0)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
mtspr(SPRN_MAS0, mas0);
|
||||
mtspr(SPRN_MAS1, stlbe->mas1);
|
||||
mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
|
||||
mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
|
||||
mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
mtspr(SPRN_MAS8, stlbe->mas8);
|
||||
#endif
|
||||
asm volatile("isync; tlbwe" : : : "memory");
|
||||
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
/* Must clear mas8 for other host tlbwe's */
|
||||
mtspr(SPRN_MAS8, 0);
|
||||
isync();
|
||||
#endif
|
||||
local_irq_restore(flags);
|
||||
|
||||
trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
|
||||
stlbe->mas2, stlbe->mas7_3);
|
||||
}
|
||||
|
||||
/*
|
||||
* Acquire a mas0 with victim hint, as if we just took a TLB miss.
|
||||
*
|
||||
* We don't care about the address we're searching for, other than that it's
|
||||
* in the right set and is not present in the TLB. Using a zero PID and a
|
||||
* userspace address means we don't have to set and then restore MAS5, or
|
||||
* calculate a proper MAS6 value.
|
||||
*/
|
||||
static u32 get_host_mas0(unsigned long eaddr)
|
||||
{
|
||||
unsigned long flags;
|
||||
u32 mas0;
|
||||
|
||||
local_irq_save(flags);
|
||||
mtspr(SPRN_MAS6, 0);
|
||||
asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET));
|
||||
mas0 = mfspr(SPRN_MAS0);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return mas0;
|
||||
}
|
||||
|
||||
/* sesel is for tlb1 only */
|
||||
static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe)
|
||||
{
|
||||
u32 mas0;
|
||||
|
||||
if (tlbsel == 0) {
|
||||
mas0 = get_host_mas0(stlbe->mas2);
|
||||
__write_host_tlbe(stlbe, mas0);
|
||||
} else {
|
||||
__write_host_tlbe(stlbe,
|
||||
MAS0_TLBSEL(1) |
|
||||
MAS0_ESEL(to_htlb1_esel(sesel)));
|
||||
}
|
||||
}
|
||||
|
||||
/* sesel is for tlb1 only */
|
||||
static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||
struct kvm_book3e_206_tlb_entry *stlbe,
|
||||
int stlbsel, int sesel)
|
||||
{
|
||||
int stid;
|
||||
|
||||
preempt_disable();
|
||||
stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe);
|
||||
|
||||
stlbe->mas1 |= MAS1_TID(stid);
|
||||
write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_E500V2
|
||||
/* XXX should be a hook in the gva2hpa translation */
|
||||
void kvmppc_map_magic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
struct kvm_book3e_206_tlb_entry magic;
|
||||
ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
|
||||
unsigned int stid;
|
||||
pfn_t pfn;
|
||||
|
||||
pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT;
|
||||
get_page(pfn_to_page(pfn));
|
||||
|
||||
preempt_disable();
|
||||
stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0);
|
||||
|
||||
magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
|
||||
MAS1_TSIZE(BOOK3E_PAGESZ_4K);
|
||||
magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
|
||||
magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) |
|
||||
MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
|
||||
magic.mas8 = 0;
|
||||
|
||||
__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
|
||||
preempt_enable();
|
||||
}
|
||||
#endif
|
||||
|
||||
void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
|
||||
int esel)
|
||||
{
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe =
|
||||
get_entry(vcpu_e500, tlbsel, esel);
|
||||
struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref;
|
||||
|
||||
/* Don't bother with unmapped entries */
|
||||
if (!(ref->flags & E500_TLB_VALID))
|
||||
return;
|
||||
|
||||
if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) {
|
||||
u64 tmp = vcpu_e500->g2h_tlb1_map[esel];
|
||||
int hw_tlb_indx;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
while (tmp) {
|
||||
hw_tlb_indx = __ilog2_u64(tmp & -tmp);
|
||||
mtspr(SPRN_MAS0,
|
||||
MAS0_TLBSEL(1) |
|
||||
MAS0_ESEL(to_htlb1_esel(hw_tlb_indx)));
|
||||
mtspr(SPRN_MAS1, 0);
|
||||
asm volatile("tlbwe");
|
||||
vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0;
|
||||
tmp &= tmp - 1;
|
||||
}
|
||||
mb();
|
||||
vcpu_e500->g2h_tlb1_map[esel] = 0;
|
||||
ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
if (tlbsel == 1 && ref->flags & E500_TLB_TLB0) {
|
||||
/*
|
||||
* TLB1 entry is backed by 4k pages. This should happen
|
||||
* rarely and is not worth optimizing. Invalidate everything.
|
||||
*/
|
||||
kvmppc_e500_tlbil_all(vcpu_e500);
|
||||
ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID);
|
||||
}
|
||||
|
||||
/* Already invalidated in between */
|
||||
if (!(ref->flags & E500_TLB_VALID))
|
||||
return;
|
||||
|
||||
/* Guest tlbe is backed by at most one host tlbe per shadow pid. */
|
||||
kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
|
||||
|
||||
/* Mark the TLB as not backed by the host anymore */
|
||||
ref->flags &= ~E500_TLB_VALID;
|
||||
}
|
||||
|
||||
static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
|
||||
{
|
||||
return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
|
||||
}
|
||||
|
||||
static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||
pfn_t pfn)
|
||||
{
|
||||
ref->pfn = pfn;
|
||||
ref->flags = E500_TLB_VALID;
|
||||
|
||||
if (tlbe_is_writable(gtlbe))
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
}
|
||||
|
||||
static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
|
||||
{
|
||||
if (ref->flags & E500_TLB_VALID) {
|
||||
trace_kvm_booke206_ref_release(ref->pfn, ref->flags);
|
||||
ref->flags = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
if (vcpu_e500->g2h_tlb1_map)
|
||||
memset(vcpu_e500->g2h_tlb1_map, 0,
|
||||
sizeof(u64) * vcpu_e500->gtlb_params[1].entries);
|
||||
if (vcpu_e500->h2g_tlb1_rmap)
|
||||
memset(vcpu_e500->h2g_tlb1_rmap, 0,
|
||||
sizeof(unsigned int) * host_tlb_params[1].entries);
|
||||
}
|
||||
|
||||
static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
int tlbsel = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) {
|
||||
struct tlbe_ref *ref =
|
||||
&vcpu_e500->gtlb_priv[tlbsel][i].ref;
|
||||
kvmppc_e500_ref_release(ref);
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
int stlbsel = 1;
|
||||
int i;
|
||||
|
||||
kvmppc_e500_tlbil_all(vcpu_e500);
|
||||
|
||||
for (i = 0; i < host_tlb_params[stlbsel].entries; i++) {
|
||||
struct tlbe_ref *ref =
|
||||
&vcpu_e500->tlb_refs[stlbsel][i];
|
||||
kvmppc_e500_ref_release(ref);
|
||||
}
|
||||
|
||||
clear_tlb_privs(vcpu_e500);
|
||||
}
|
||||
|
||||
void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
clear_tlb_refs(vcpu_e500);
|
||||
clear_tlb1_bitmap(vcpu_e500);
|
||||
}
|
||||
|
||||
/* TID must be supplied by the caller */
|
||||
static void kvmppc_e500_setup_stlbe(
|
||||
struct kvm_vcpu *vcpu,
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||
int tsize, struct tlbe_ref *ref, u64 gvaddr,
|
||||
struct kvm_book3e_206_tlb_entry *stlbe)
|
||||
{
|
||||
pfn_t pfn = ref->pfn;
|
||||
u32 pr = vcpu->arch.shared->msr & MSR_PR;
|
||||
|
||||
BUG_ON(!(ref->flags & E500_TLB_VALID));
|
||||
|
||||
/* Force IPROT=0 for all guest mappings. */
|
||||
stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
|
||||
stlbe->mas2 = (gvaddr & MAS2_EPN) |
|
||||
e500_shadow_mas2_attrib(gtlbe->mas2, pr);
|
||||
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
|
||||
e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
|
||||
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||
int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe,
|
||||
struct tlbe_ref *ref)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned long pfn = 0; /* silence GCC warning */
|
||||
unsigned long hva;
|
||||
int pfnmap = 0;
|
||||
int tsize = BOOK3E_PAGESZ_4K;
|
||||
|
||||
/*
|
||||
* Translate guest physical to true physical, acquiring
|
||||
* a page reference if it is normal, non-reserved memory.
|
||||
*
|
||||
* gfn_to_memslot() must succeed because otherwise we wouldn't
|
||||
* have gotten this far. Eventually we should just pass the slot
|
||||
* pointer through from the first lookup.
|
||||
*/
|
||||
slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
|
||||
hva = gfn_to_hva_memslot(slot, gfn);
|
||||
|
||||
if (tlbsel == 1) {
|
||||
struct vm_area_struct *vma;
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
|
||||
vma = find_vma(current->mm, hva);
|
||||
if (vma && hva >= vma->vm_start &&
|
||||
(vma->vm_flags & VM_PFNMAP)) {
|
||||
/*
|
||||
* This VMA is a physically contiguous region (e.g.
|
||||
* /dev/mem) that bypasses normal Linux page
|
||||
* management. Find the overlap between the
|
||||
* vma and the memslot.
|
||||
*/
|
||||
|
||||
unsigned long start, end;
|
||||
unsigned long slot_start, slot_end;
|
||||
|
||||
pfnmap = 1;
|
||||
|
||||
start = vma->vm_pgoff;
|
||||
end = start +
|
||||
((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
|
||||
|
||||
pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
|
||||
|
||||
slot_start = pfn - (gfn - slot->base_gfn);
|
||||
slot_end = slot_start + slot->npages;
|
||||
|
||||
if (start < slot_start)
|
||||
start = slot_start;
|
||||
if (end > slot_end)
|
||||
end = slot_end;
|
||||
|
||||
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
|
||||
MAS1_TSIZE_SHIFT;
|
||||
|
||||
/*
|
||||
* e500 doesn't implement the lowest tsize bit,
|
||||
* or 1K pages.
|
||||
*/
|
||||
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
|
||||
|
||||
/*
|
||||
* Now find the largest tsize (up to what the guest
|
||||
* requested) that will cover gfn, stay within the
|
||||
* range, and for which gfn and pfn are mutually
|
||||
* aligned.
|
||||
*/
|
||||
|
||||
for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
|
||||
unsigned long gfn_start, gfn_end, tsize_pages;
|
||||
tsize_pages = 1 << (tsize - 2);
|
||||
|
||||
gfn_start = gfn & ~(tsize_pages - 1);
|
||||
gfn_end = gfn_start + tsize_pages;
|
||||
|
||||
if (gfn_start + pfn - gfn < start)
|
||||
continue;
|
||||
if (gfn_end + pfn - gfn > end)
|
||||
continue;
|
||||
if ((gfn & (tsize_pages - 1)) !=
|
||||
(pfn & (tsize_pages - 1)))
|
||||
continue;
|
||||
|
||||
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
|
||||
pfn &= ~(tsize_pages - 1);
|
||||
break;
|
||||
}
|
||||
} else if (vma && hva >= vma->vm_start &&
|
||||
(vma->vm_flags & VM_HUGETLB)) {
|
||||
unsigned long psize = vma_kernel_pagesize(vma);
|
||||
|
||||
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
|
||||
MAS1_TSIZE_SHIFT;
|
||||
|
||||
/*
|
||||
* Take the largest page size that satisfies both host
|
||||
* and guest mapping
|
||||
*/
|
||||
tsize = min(__ilog2(psize) - 10, tsize);
|
||||
|
||||
/*
|
||||
* e500 doesn't implement the lowest tsize bit,
|
||||
* or 1K pages.
|
||||
*/
|
||||
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
|
||||
}
|
||||
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
}
|
||||
|
||||
if (likely(!pfnmap)) {
|
||||
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
|
||||
pfn = gfn_to_pfn_memslot(slot, gfn);
|
||||
if (is_error_noslot_pfn(pfn)) {
|
||||
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
|
||||
(long)gfn);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Align guest and physical address to page map boundaries */
|
||||
pfn &= ~(tsize_pages - 1);
|
||||
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
|
||||
}
|
||||
|
||||
/* Drop old ref and setup new one. */
|
||||
kvmppc_e500_ref_release(ref);
|
||||
kvmppc_e500_ref_setup(ref, gtlbe, pfn);
|
||||
|
||||
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
|
||||
ref, gvaddr, stlbe);
|
||||
|
||||
/* Clear i-cache for new pages */
|
||||
kvmppc_mmu_flush_icache(pfn);
|
||||
|
||||
/* Drop refcount on page, so that mmu notifiers can clear it */
|
||||
kvm_release_pfn_clean(pfn);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* XXX only map the one-one case, for now use TLB0 */
|
||||
static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel,
|
||||
struct kvm_book3e_206_tlb_entry *stlbe)
|
||||
{
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe;
|
||||
struct tlbe_ref *ref;
|
||||
int stlbsel = 0;
|
||||
int sesel = 0;
|
||||
int r;
|
||||
|
||||
gtlbe = get_entry(vcpu_e500, 0, esel);
|
||||
ref = &vcpu_e500->gtlb_priv[0][esel].ref;
|
||||
|
||||
r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
|
||||
get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
|
||||
gtlbe, 0, stlbe, ref);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
struct tlbe_ref *ref,
|
||||
int esel)
|
||||
{
|
||||
unsigned int sesel = vcpu_e500->host_tlb1_nv++;
|
||||
|
||||
if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
|
||||
vcpu_e500->host_tlb1_nv = 0;
|
||||
|
||||
vcpu_e500->tlb_refs[1][sesel] = *ref;
|
||||
vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel;
|
||||
vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
|
||||
if (vcpu_e500->h2g_tlb1_rmap[sesel]) {
|
||||
unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel];
|
||||
vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel);
|
||||
}
|
||||
vcpu_e500->h2g_tlb1_rmap[sesel] = esel;
|
||||
|
||||
return sesel;
|
||||
}
|
||||
|
||||
/* Caller must ensure that the specified guest TLB entry is safe to insert into
|
||||
* the shadow TLB. */
|
||||
/* For both one-one and one-to-many */
|
||||
static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
|
||||
struct kvm_book3e_206_tlb_entry *stlbe, int esel)
|
||||
{
|
||||
struct tlbe_ref ref;
|
||||
int sesel;
|
||||
int r;
|
||||
|
||||
ref.flags = 0;
|
||||
r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe,
|
||||
&ref);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* Use TLB0 when we can only map a page with 4k */
|
||||
if (get_tlb_tsize(stlbe) == BOOK3E_PAGESZ_4K) {
|
||||
vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_TLB0;
|
||||
write_stlbe(vcpu_e500, gtlbe, stlbe, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Otherwise map into TLB1 */
|
||||
sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, &ref, esel);
|
||||
write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
|
||||
unsigned int index)
|
||||
{
|
||||
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
|
||||
struct tlbe_priv *priv;
|
||||
struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
|
||||
int tlbsel = tlbsel_of(index);
|
||||
int esel = esel_of(index);
|
||||
|
||||
gtlbe = get_entry(vcpu_e500, tlbsel, esel);
|
||||
|
||||
switch (tlbsel) {
|
||||
case 0:
|
||||
priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
|
||||
|
||||
/* Triggers after clear_tlb_refs or on initial mapping */
|
||||
if (!(priv->ref.flags & E500_TLB_VALID)) {
|
||||
kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
|
||||
} else {
|
||||
kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
|
||||
&priv->ref, eaddr, &stlbe);
|
||||
write_stlbe(vcpu_e500, gtlbe, &stlbe, 0, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: {
|
||||
gfn_t gfn = gpaddr >> PAGE_SHIFT;
|
||||
kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe, &stlbe,
|
||||
esel);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/************* MMU Notifiers *************/
|
||||
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
trace_kvm_unmap_hva(hva);
|
||||
|
||||
/*
|
||||
* Flush all shadow tlb entries everywhere. This is slow, but
|
||||
* we are 100% sure that we catch the to be unmapped page
|
||||
*/
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
{
|
||||
/* kvm_unmap_hva flushes everything anyways */
|
||||
kvm_unmap_hva(kvm, start);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
/* XXX could be more clever ;) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
/* XXX could be more clever ;) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
/* The page will get remapped properly on its next fault */
|
||||
kvm_unmap_hva(kvm, hva);
|
||||
}
|
||||
|
||||
/*****************************************/
|
||||
|
||||
int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY;
|
||||
host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
|
||||
|
||||
/*
|
||||
* This should never happen on real e500 hardware, but is
|
||||
* architecturally possible -- e.g. in some weird nested
|
||||
* virtualization case.
|
||||
*/
|
||||
if (host_tlb_params[0].entries == 0 ||
|
||||
host_tlb_params[1].entries == 0) {
|
||||
pr_err("%s: need to know host tlb size\n", __func__);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >>
|
||||
TLBnCFG_ASSOC_SHIFT;
|
||||
host_tlb_params[1].ways = host_tlb_params[1].entries;
|
||||
|
||||
if (!is_power_of_2(host_tlb_params[0].entries) ||
|
||||
!is_power_of_2(host_tlb_params[0].ways) ||
|
||||
host_tlb_params[0].entries < host_tlb_params[0].ways ||
|
||||
host_tlb_params[0].ways == 0) {
|
||||
pr_err("%s: bad tlb0 host config: %u entries %u ways\n",
|
||||
__func__, host_tlb_params[0].entries,
|
||||
host_tlb_params[0].ways);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
host_tlb_params[0].sets =
|
||||
host_tlb_params[0].entries / host_tlb_params[0].ways;
|
||||
host_tlb_params[1].sets = 1;
|
||||
|
||||
vcpu_e500->tlb_refs[0] =
|
||||
kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries,
|
||||
GFP_KERNEL);
|
||||
if (!vcpu_e500->tlb_refs[0])
|
||||
goto err;
|
||||
|
||||
vcpu_e500->tlb_refs[1] =
|
||||
kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries,
|
||||
GFP_KERNEL);
|
||||
if (!vcpu_e500->tlb_refs[1])
|
||||
goto err;
|
||||
|
||||
vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) *
|
||||
host_tlb_params[1].entries,
|
||||
GFP_KERNEL);
|
||||
if (!vcpu_e500->h2g_tlb1_rmap)
|
||||
goto err;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
kfree(vcpu_e500->tlb_refs[0]);
|
||||
kfree(vcpu_e500->tlb_refs[1]);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
|
||||
{
|
||||
kfree(vcpu_e500->h2g_tlb1_rmap);
|
||||
kfree(vcpu_e500->tlb_refs[0]);
|
||||
kfree(vcpu_e500->tlb_refs[1]);
|
||||
}
|
18
arch/powerpc/kvm/e500_mmu_host.h
Normal file
18
arch/powerpc/kvm/e500_mmu_host.h
Normal file
@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License, version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#ifndef KVM_E500_MMU_HOST_H
|
||||
#define KVM_E500_MMU_HOST_H
|
||||
|
||||
void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
|
||||
int esel);
|
||||
|
||||
int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500);
|
||||
void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
|
||||
|
||||
#endif /* KVM_E500_MMU_HOST_H */
|
@ -150,8 +150,6 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
case SPRN_TBWL: break;
|
||||
case SPRN_TBWU: break;
|
||||
|
||||
case SPRN_MSSSR0: break;
|
||||
|
||||
case SPRN_DEC:
|
||||
vcpu->arch.dec = spr_val;
|
||||
kvmppc_emulate_dec(vcpu);
|
||||
@ -202,9 +200,6 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
||||
case SPRN_PIR:
|
||||
spr_val = vcpu->vcpu_id;
|
||||
break;
|
||||
case SPRN_MSSSR0:
|
||||
spr_val = 0;
|
||||
break;
|
||||
|
||||
/* Note: mftb and TBRL/TBWL are user-accessible, so
|
||||
* the guest can always access the real TB anyways.
|
||||
|
@ -237,7 +237,8 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
r = RESUME_HOST;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
WARN_ON(1);
|
||||
r = RESUME_GUEST;
|
||||
}
|
||||
|
||||
return r;
|
||||
@ -305,6 +306,7 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_CAP_PPC_BOOKE_SREGS:
|
||||
case KVM_CAP_PPC_BOOKE_WATCHDOG:
|
||||
case KVM_CAP_PPC_EPR:
|
||||
#else
|
||||
case KVM_CAP_PPC_SEGSTATE:
|
||||
case KVM_CAP_PPC_HIOR:
|
||||
@ -412,7 +414,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_memory_slot old,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
|
||||
}
|
||||
@ -420,7 +422,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot old,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
kvmppc_core_commit_memory_region(kvm, mem, old);
|
||||
}
|
||||
@ -720,6 +722,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
for (i = 0; i < 9; ++i)
|
||||
kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
|
||||
vcpu->arch.hcall_needed = 0;
|
||||
#ifdef CONFIG_BOOKE
|
||||
} else if (vcpu->arch.epr_needed) {
|
||||
kvmppc_set_epr(vcpu, run->epr.epr);
|
||||
vcpu->arch.epr_needed = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
r = kvmppc_vcpu_run(run, vcpu);
|
||||
@ -761,6 +768,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
||||
r = 0;
|
||||
vcpu->arch.papr_enabled = true;
|
||||
break;
|
||||
case KVM_CAP_PPC_EPR:
|
||||
r = 0;
|
||||
vcpu->arch.epr_enabled = cap->args[0];
|
||||
break;
|
||||
#ifdef CONFIG_BOOKE
|
||||
case KVM_CAP_PPC_BOOKE_WATCHDOG:
|
||||
r = 0;
|
||||
|
@ -41,6 +41,7 @@ enum interruption_class {
|
||||
IRQIO_CSC,
|
||||
IRQIO_PCI,
|
||||
IRQIO_MSI,
|
||||
IRQIO_VIR,
|
||||
NMI_NMI,
|
||||
CPU_RST,
|
||||
NR_ARCH_IRQS
|
||||
|
@ -20,9 +20,7 @@
|
||||
#include <asm/cpu.h>
|
||||
|
||||
#define KVM_MAX_VCPUS 64
|
||||
#define KVM_MEMORY_SLOTS 32
|
||||
/* memory slots that does not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 4
|
||||
#define KVM_USER_MEM_SLOTS 32
|
||||
|
||||
struct sca_entry {
|
||||
atomic_t scn;
|
||||
@ -76,8 +74,11 @@ struct kvm_s390_sie_block {
|
||||
__u64 epoch; /* 0x0038 */
|
||||
__u8 reserved40[4]; /* 0x0040 */
|
||||
#define LCTL_CR0 0x8000
|
||||
#define LCTL_CR6 0x0200
|
||||
#define LCTL_CR14 0x0002
|
||||
__u16 lctl; /* 0x0044 */
|
||||
__s16 icpua; /* 0x0046 */
|
||||
#define ICTL_LPSW 0x00400000
|
||||
__u32 ictl; /* 0x0048 */
|
||||
__u32 eca; /* 0x004c */
|
||||
__u8 icptcode; /* 0x0050 */
|
||||
@ -127,6 +128,7 @@ struct kvm_vcpu_stat {
|
||||
u32 deliver_prefix_signal;
|
||||
u32 deliver_restart_signal;
|
||||
u32 deliver_program_int;
|
||||
u32 deliver_io_int;
|
||||
u32 exit_wait_state;
|
||||
u32 instruction_stidp;
|
||||
u32 instruction_spx;
|
||||
@ -187,6 +189,11 @@ struct kvm_s390_emerg_info {
|
||||
__u16 code;
|
||||
};
|
||||
|
||||
struct kvm_s390_mchk_info {
|
||||
__u64 cr14;
|
||||
__u64 mcic;
|
||||
};
|
||||
|
||||
struct kvm_s390_interrupt_info {
|
||||
struct list_head list;
|
||||
u64 type;
|
||||
@ -197,6 +204,7 @@ struct kvm_s390_interrupt_info {
|
||||
struct kvm_s390_emerg_info emerg;
|
||||
struct kvm_s390_extcall_info extcall;
|
||||
struct kvm_s390_prefix_info prefix;
|
||||
struct kvm_s390_mchk_info mchk;
|
||||
};
|
||||
};
|
||||
|
||||
@ -254,6 +262,7 @@ struct kvm_arch{
|
||||
debug_info_t *dbf;
|
||||
struct kvm_s390_float_interrupt float_int;
|
||||
struct gmap *gmap;
|
||||
int css_support;
|
||||
};
|
||||
|
||||
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
|
||||
|
@ -81,6 +81,7 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
|
||||
[IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"},
|
||||
[IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
|
||||
[IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
|
||||
[IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
|
||||
[NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"},
|
||||
[CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
|
||||
};
|
||||
|
@ -26,27 +26,20 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
|
||||
int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
|
||||
((vcpu->arch.sie_block->ipb & 0xff00) << 4);
|
||||
u64 useraddr;
|
||||
int reg, rc;
|
||||
|
||||
vcpu->stat.instruction_lctlg++;
|
||||
if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
useraddr = disp2;
|
||||
if (base2)
|
||||
useraddr += vcpu->run->s.regs.gprs[base2];
|
||||
useraddr = kvm_s390_get_base_disp_rsy(vcpu);
|
||||
|
||||
if (useraddr & 7)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
reg = reg1;
|
||||
|
||||
VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
|
||||
disp2);
|
||||
VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
|
||||
useraddr);
|
||||
trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
|
||||
|
||||
do {
|
||||
@ -68,23 +61,19 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
|
||||
int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u64 useraddr;
|
||||
u32 val = 0;
|
||||
int reg, rc;
|
||||
|
||||
vcpu->stat.instruction_lctl++;
|
||||
|
||||
useraddr = disp2;
|
||||
if (base2)
|
||||
useraddr += vcpu->run->s.regs.gprs[base2];
|
||||
useraddr = kvm_s390_get_base_disp_rs(vcpu);
|
||||
|
||||
if (useraddr & 3)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
|
||||
disp2);
|
||||
VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
|
||||
useraddr);
|
||||
trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
|
||||
|
||||
reg = reg1;
|
||||
@ -104,14 +93,31 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static intercept_handler_t instruction_handlers[256] = {
|
||||
static const intercept_handler_t eb_handlers[256] = {
|
||||
[0x2f] = handle_lctlg,
|
||||
[0x8a] = kvm_s390_handle_priv_eb,
|
||||
};
|
||||
|
||||
static int handle_eb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t handler;
|
||||
|
||||
handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static const intercept_handler_t instruction_handlers[256] = {
|
||||
[0x01] = kvm_s390_handle_01,
|
||||
[0x82] = kvm_s390_handle_lpsw,
|
||||
[0x83] = kvm_s390_handle_diag,
|
||||
[0xae] = kvm_s390_handle_sigp,
|
||||
[0xb2] = kvm_s390_handle_b2,
|
||||
[0xb7] = handle_lctl,
|
||||
[0xb9] = kvm_s390_handle_b9,
|
||||
[0xe5] = kvm_s390_handle_e5,
|
||||
[0xeb] = handle_lctlg,
|
||||
[0xeb] = handle_eb,
|
||||
};
|
||||
|
||||
static int handle_noop(struct kvm_vcpu *vcpu)
|
||||
@ -258,6 +264,7 @@ static const intercept_handler_t intercept_funcs[] = {
|
||||
[0x0C >> 2] = handle_instruction_and_prog,
|
||||
[0x10 >> 2] = handle_noop,
|
||||
[0x14 >> 2] = handle_noop,
|
||||
[0x18 >> 2] = handle_noop,
|
||||
[0x1C >> 2] = kvm_s390_handle_wait,
|
||||
[0x20 >> 2] = handle_validity,
|
||||
[0x28 >> 2] = handle_stop,
|
||||
|
@ -21,11 +21,31 @@
|
||||
#include "gaccess.h"
|
||||
#include "trace-s390.h"
|
||||
|
||||
#define IOINT_SCHID_MASK 0x0000ffff
|
||||
#define IOINT_SSID_MASK 0x00030000
|
||||
#define IOINT_CSSID_MASK 0x03fc0000
|
||||
#define IOINT_AI_MASK 0x04000000
|
||||
|
||||
static int is_ioint(u64 type)
|
||||
{
|
||||
return ((type & 0xfffe0000u) != 0xfffe0000u);
|
||||
}
|
||||
|
||||
static int psw_extint_disabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
|
||||
}
|
||||
|
||||
static int psw_ioint_disabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO);
|
||||
}
|
||||
|
||||
static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK);
|
||||
}
|
||||
|
||||
static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
|
||||
@ -35,6 +55,13 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static u64 int_word_to_isc_bits(u32 int_word)
|
||||
{
|
||||
u8 isc = (int_word & 0x38000000) >> 27;
|
||||
|
||||
return (0x80 >> isc) << 24;
|
||||
}
|
||||
|
||||
static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_interrupt_info *inti)
|
||||
{
|
||||
@ -67,7 +94,22 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
|
||||
case KVM_S390_SIGP_SET_PREFIX:
|
||||
case KVM_S390_RESTART:
|
||||
return 1;
|
||||
case KVM_S390_MCHK:
|
||||
if (psw_mchk_disabled(vcpu))
|
||||
return 0;
|
||||
if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14)
|
||||
return 1;
|
||||
return 0;
|
||||
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
|
||||
if (psw_ioint_disabled(vcpu))
|
||||
return 0;
|
||||
if (vcpu->arch.sie_block->gcr[6] &
|
||||
int_word_to_isc_bits(inti->io.io_int_word))
|
||||
return 1;
|
||||
return 0;
|
||||
default:
|
||||
printk(KERN_WARNING "illegal interrupt type %llx\n",
|
||||
inti->type);
|
||||
BUG();
|
||||
}
|
||||
return 0;
|
||||
@ -93,6 +135,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
|
||||
CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
|
||||
&vcpu->arch.sie_block->cpuflags);
|
||||
vcpu->arch.sie_block->lctl = 0x0000;
|
||||
vcpu->arch.sie_block->ictl &= ~ICTL_LPSW;
|
||||
}
|
||||
|
||||
static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
|
||||
@ -116,6 +159,18 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
|
||||
case KVM_S390_SIGP_STOP:
|
||||
__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
|
||||
break;
|
||||
case KVM_S390_MCHK:
|
||||
if (psw_mchk_disabled(vcpu))
|
||||
vcpu->arch.sie_block->ictl |= ICTL_LPSW;
|
||||
else
|
||||
vcpu->arch.sie_block->lctl |= LCTL_CR14;
|
||||
break;
|
||||
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
|
||||
if (psw_ioint_disabled(vcpu))
|
||||
__set_cpuflag(vcpu, CPUSTAT_IO_INT);
|
||||
else
|
||||
vcpu->arch.sie_block->lctl |= LCTL_CR6;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
@ -297,6 +352,73 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
|
||||
exception = 1;
|
||||
break;
|
||||
|
||||
case KVM_S390_MCHK:
|
||||
VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
|
||||
inti->mchk.mcic);
|
||||
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
|
||||
inti->mchk.cr14,
|
||||
inti->mchk.mcic);
|
||||
rc = kvm_s390_vcpu_store_status(vcpu,
|
||||
KVM_S390_STORE_STATUS_PREFIXED);
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic);
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
|
||||
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
|
||||
__LC_MCK_NEW_PSW, sizeof(psw_t));
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
break;
|
||||
|
||||
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
|
||||
{
|
||||
__u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
|
||||
inti->io.subchannel_nr;
|
||||
__u64 param1 = ((__u64)inti->io.io_int_parm << 32) |
|
||||
inti->io.io_int_word;
|
||||
VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
|
||||
vcpu->stat.deliver_io_int++;
|
||||
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
|
||||
param0, param1);
|
||||
rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID,
|
||||
inti->io.subchannel_id);
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR,
|
||||
inti->io.subchannel_nr);
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = put_guest_u32(vcpu, __LC_IO_INT_PARM,
|
||||
inti->io.io_int_parm);
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = put_guest_u32(vcpu, __LC_IO_INT_WORD,
|
||||
inti->io.io_int_word);
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW,
|
||||
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
|
||||
__LC_IO_NEW_PSW, sizeof(psw_t));
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
@ -518,6 +640,61 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
|
||||
struct kvm_s390_interrupt_info *n, *inti = NULL;
|
||||
int deliver;
|
||||
|
||||
__reset_intercept_indicators(vcpu);
|
||||
if (atomic_read(&li->active)) {
|
||||
do {
|
||||
deliver = 0;
|
||||
spin_lock_bh(&li->lock);
|
||||
list_for_each_entry_safe(inti, n, &li->list, list) {
|
||||
if ((inti->type == KVM_S390_MCHK) &&
|
||||
__interrupt_is_deliverable(vcpu, inti)) {
|
||||
list_del(&inti->list);
|
||||
deliver = 1;
|
||||
break;
|
||||
}
|
||||
__set_intercept_indicator(vcpu, inti);
|
||||
}
|
||||
if (list_empty(&li->list))
|
||||
atomic_set(&li->active, 0);
|
||||
spin_unlock_bh(&li->lock);
|
||||
if (deliver) {
|
||||
__do_deliver_interrupt(vcpu, inti);
|
||||
kfree(inti);
|
||||
}
|
||||
} while (deliver);
|
||||
}
|
||||
|
||||
if (atomic_read(&fi->active)) {
|
||||
do {
|
||||
deliver = 0;
|
||||
spin_lock(&fi->lock);
|
||||
list_for_each_entry_safe(inti, n, &fi->list, list) {
|
||||
if ((inti->type == KVM_S390_MCHK) &&
|
||||
__interrupt_is_deliverable(vcpu, inti)) {
|
||||
list_del(&inti->list);
|
||||
deliver = 1;
|
||||
break;
|
||||
}
|
||||
__set_intercept_indicator(vcpu, inti);
|
||||
}
|
||||
if (list_empty(&fi->list))
|
||||
atomic_set(&fi->active, 0);
|
||||
spin_unlock(&fi->lock);
|
||||
if (deliver) {
|
||||
__do_deliver_interrupt(vcpu, inti);
|
||||
kfree(inti);
|
||||
}
|
||||
} while (deliver);
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
@ -540,12 +717,50 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
|
||||
u64 cr6, u64 schid)
|
||||
{
|
||||
struct kvm_s390_float_interrupt *fi;
|
||||
struct kvm_s390_interrupt_info *inti, *iter;
|
||||
|
||||
if ((!schid && !cr6) || (schid && cr6))
|
||||
return NULL;
|
||||
mutex_lock(&kvm->lock);
|
||||
fi = &kvm->arch.float_int;
|
||||
spin_lock(&fi->lock);
|
||||
inti = NULL;
|
||||
list_for_each_entry(iter, &fi->list, list) {
|
||||
if (!is_ioint(iter->type))
|
||||
continue;
|
||||
if (cr6 &&
|
||||
((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0))
|
||||
continue;
|
||||
if (schid) {
|
||||
if (((schid & 0x00000000ffff0000) >> 16) !=
|
||||
iter->io.subchannel_id)
|
||||
continue;
|
||||
if ((schid & 0x000000000000ffff) !=
|
||||
iter->io.subchannel_nr)
|
||||
continue;
|
||||
}
|
||||
inti = iter;
|
||||
break;
|
||||
}
|
||||
if (inti)
|
||||
list_del_init(&inti->list);
|
||||
if (list_empty(&fi->list))
|
||||
atomic_set(&fi->active, 0);
|
||||
spin_unlock(&fi->lock);
|
||||
mutex_unlock(&kvm->lock);
|
||||
return inti;
|
||||
}
|
||||
|
||||
int kvm_s390_inject_vm(struct kvm *kvm,
|
||||
struct kvm_s390_interrupt *s390int)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li;
|
||||
struct kvm_s390_float_interrupt *fi;
|
||||
struct kvm_s390_interrupt_info *inti;
|
||||
struct kvm_s390_interrupt_info *inti, *iter;
|
||||
int sigcpu;
|
||||
|
||||
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
|
||||
@ -569,6 +784,29 @@ int kvm_s390_inject_vm(struct kvm *kvm,
|
||||
case KVM_S390_SIGP_STOP:
|
||||
case KVM_S390_INT_EXTERNAL_CALL:
|
||||
case KVM_S390_INT_EMERGENCY:
|
||||
kfree(inti);
|
||||
return -EINVAL;
|
||||
case KVM_S390_MCHK:
|
||||
VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
|
||||
s390int->parm64);
|
||||
inti->type = s390int->type;
|
||||
inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
|
||||
inti->mchk.mcic = s390int->parm64;
|
||||
break;
|
||||
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
|
||||
if (s390int->type & IOINT_AI_MASK)
|
||||
VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
|
||||
else
|
||||
VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
|
||||
s390int->type & IOINT_CSSID_MASK,
|
||||
s390int->type & IOINT_SSID_MASK,
|
||||
s390int->type & IOINT_SCHID_MASK);
|
||||
inti->type = s390int->type;
|
||||
inti->io.subchannel_id = s390int->parm >> 16;
|
||||
inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
|
||||
inti->io.io_int_parm = s390int->parm64 >> 32;
|
||||
inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull;
|
||||
break;
|
||||
default:
|
||||
kfree(inti);
|
||||
return -EINVAL;
|
||||
@ -579,7 +817,22 @@ int kvm_s390_inject_vm(struct kvm *kvm,
|
||||
mutex_lock(&kvm->lock);
|
||||
fi = &kvm->arch.float_int;
|
||||
spin_lock(&fi->lock);
|
||||
list_add_tail(&inti->list, &fi->list);
|
||||
if (!is_ioint(inti->type))
|
||||
list_add_tail(&inti->list, &fi->list);
|
||||
else {
|
||||
u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
|
||||
|
||||
/* Keep I/O interrupts sorted in isc order. */
|
||||
list_for_each_entry(iter, &fi->list, list) {
|
||||
if (!is_ioint(iter->type))
|
||||
continue;
|
||||
if (int_word_to_isc_bits(iter->io.io_int_word)
|
||||
<= isc_bits)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
list_add_tail(&inti->list, &iter->list);
|
||||
}
|
||||
atomic_set(&fi->active, 1);
|
||||
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
|
||||
if (sigcpu == KVM_MAX_VCPUS) {
|
||||
@ -651,8 +904,15 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
|
||||
inti->type = s390int->type;
|
||||
inti->emerg.code = s390int->parm;
|
||||
break;
|
||||
case KVM_S390_MCHK:
|
||||
VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
|
||||
s390int->parm64);
|
||||
inti->type = s390int->type;
|
||||
inti->mchk.mcic = s390int->parm64;
|
||||
break;
|
||||
case KVM_S390_INT_VIRTIO:
|
||||
case KVM_S390_INT_SERVICE:
|
||||
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
|
||||
default:
|
||||
kfree(inti);
|
||||
return -EINVAL;
|
||||
|
@ -140,6 +140,8 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
#endif
|
||||
case KVM_CAP_SYNC_REGS:
|
||||
case KVM_CAP_ONE_REG:
|
||||
case KVM_CAP_ENABLE_CAP:
|
||||
case KVM_CAP_S390_CSS_SUPPORT:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_NR_VCPUS:
|
||||
@ -234,6 +236,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
if (!kvm->arch.gmap)
|
||||
goto out_nogmap;
|
||||
}
|
||||
|
||||
kvm->arch.css_support = 0;
|
||||
|
||||
return 0;
|
||||
out_nogmap:
|
||||
debug_unregister(kvm->arch.dbf);
|
||||
@ -659,6 +664,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
case KVM_EXIT_INTR:
|
||||
case KVM_EXIT_S390_RESET:
|
||||
case KVM_EXIT_S390_UCONTROL:
|
||||
case KVM_EXIT_S390_TSCH:
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
@ -766,6 +772,14 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
|
||||
} else
|
||||
prefix = 0;
|
||||
|
||||
/*
|
||||
* The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
|
||||
* copying in vcpu load/put. Lets update our copies before we save
|
||||
* it into the save area
|
||||
*/
|
||||
save_fp_regs(&vcpu->arch.guest_fpregs);
|
||||
save_access_regs(vcpu->run->s.regs.acrs);
|
||||
|
||||
if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
|
||||
vcpu->arch.guest_fpregs.fprs, 128, prefix))
|
||||
return -EFAULT;
|
||||
@ -810,6 +824,29 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
||||
struct kvm_enable_cap *cap)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (cap->flags)
|
||||
return -EINVAL;
|
||||
|
||||
switch (cap->cap) {
|
||||
case KVM_CAP_S390_CSS_SUPPORT:
|
||||
if (!vcpu->kvm->arch.css_support) {
|
||||
vcpu->kvm->arch.css_support = 1;
|
||||
trace_kvm_s390_enable_css(vcpu->kvm);
|
||||
}
|
||||
r = 0;
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
@ -896,6 +933,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_ENABLE_CAP:
|
||||
{
|
||||
struct kvm_enable_cap cap;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cap, argp, sizeof(cap)))
|
||||
break;
|
||||
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
@ -930,7 +976,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_memory_slot old,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
/* A few sanity checks. We can have exactly one memory slot which has
|
||||
to start at guest virtual zero and which has to be located at a
|
||||
@ -960,7 +1006,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot old,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
int rc;
|
||||
|
||||
|
@ -65,21 +65,67 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
|
||||
vcpu->arch.sie_block->ihcpu = 0xffff;
|
||||
}
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
|
||||
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
|
||||
}
|
||||
|
||||
static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
|
||||
u64 *address1, u64 *address2)
|
||||
{
|
||||
u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
|
||||
u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
|
||||
u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
|
||||
u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
|
||||
|
||||
*address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
|
||||
*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
|
||||
}
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
|
||||
((vcpu->arch.sie_block->ipb & 0xff00) << 4);
|
||||
/* The displacement is a 20bit _SIGNED_ value */
|
||||
if (disp2 & 0x80000)
|
||||
disp2+=0xfff00000;
|
||||
|
||||
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
|
||||
}
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
|
||||
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
|
||||
}
|
||||
|
||||
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
|
||||
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
|
||||
void kvm_s390_tasklet(unsigned long parm);
|
||||
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_inject_vm(struct kvm *kvm,
|
||||
struct kvm_s390_interrupt *s390int);
|
||||
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_interrupt *s390int);
|
||||
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
|
||||
int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
|
||||
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
|
||||
u64 cr6, u64 schid);
|
||||
|
||||
/* implemented in priv.c */
|
||||
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* implemented in sigp.c */
|
||||
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
|
||||
|
@ -18,23 +18,21 @@
|
||||
#include <asm/debug.h>
|
||||
#include <asm/ebcdic.h>
|
||||
#include <asm/sysinfo.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/compat.h>
|
||||
#include "gaccess.h"
|
||||
#include "kvm-s390.h"
|
||||
#include "trace.h"
|
||||
|
||||
static int handle_set_prefix(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u64 operand2;
|
||||
u32 address = 0;
|
||||
u8 tmp;
|
||||
|
||||
vcpu->stat.instruction_spx++;
|
||||
|
||||
operand2 = disp2;
|
||||
if (base2)
|
||||
operand2 += vcpu->run->s.regs.gprs[base2];
|
||||
operand2 = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
/* must be word boundary */
|
||||
if (operand2 & 3) {
|
||||
@ -67,15 +65,12 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int handle_store_prefix(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u64 operand2;
|
||||
u32 address;
|
||||
|
||||
vcpu->stat.instruction_stpx++;
|
||||
operand2 = disp2;
|
||||
if (base2)
|
||||
operand2 += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
operand2 = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
/* must be word boundary */
|
||||
if (operand2 & 3) {
|
||||
@ -100,15 +95,12 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u64 useraddr;
|
||||
int rc;
|
||||
|
||||
vcpu->stat.instruction_stap++;
|
||||
useraddr = disp2;
|
||||
if (base2)
|
||||
useraddr += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
useraddr = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
if (useraddr & 1) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
@ -135,24 +127,96 @@ static int handle_skey(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_stsch(struct kvm_vcpu *vcpu)
|
||||
static int handle_tpi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->stat.instruction_stsch++;
|
||||
VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3");
|
||||
/* condition code 3 */
|
||||
u64 addr;
|
||||
struct kvm_s390_interrupt_info *inti;
|
||||
int cc;
|
||||
|
||||
addr = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
|
||||
if (inti) {
|
||||
if (addr) {
|
||||
/*
|
||||
* Store the two-word I/O interruption code into the
|
||||
* provided area.
|
||||
*/
|
||||
put_guest_u16(vcpu, addr, inti->io.subchannel_id);
|
||||
put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr);
|
||||
put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm);
|
||||
} else {
|
||||
/*
|
||||
* Store the three-word I/O interruption code into
|
||||
* the appropriate lowcore area.
|
||||
*/
|
||||
put_guest_u16(vcpu, 184, inti->io.subchannel_id);
|
||||
put_guest_u16(vcpu, 186, inti->io.subchannel_nr);
|
||||
put_guest_u32(vcpu, 188, inti->io.io_int_parm);
|
||||
put_guest_u32(vcpu, 192, inti->io.io_int_word);
|
||||
}
|
||||
cc = 1;
|
||||
} else
|
||||
cc = 0;
|
||||
kfree(inti);
|
||||
/* Set condition code and we're done. */
|
||||
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
|
||||
vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
|
||||
vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_chsc(struct kvm_vcpu *vcpu)
|
||||
static int handle_tsch(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->stat.instruction_chsc++;
|
||||
VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3");
|
||||
/* condition code 3 */
|
||||
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
|
||||
vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
|
||||
return 0;
|
||||
struct kvm_s390_interrupt_info *inti;
|
||||
|
||||
inti = kvm_s390_get_io_int(vcpu->kvm, 0,
|
||||
vcpu->run->s.regs.gprs[1]);
|
||||
|
||||
/*
|
||||
* Prepare exit to userspace.
|
||||
* We indicate whether we dequeued a pending I/O interrupt
|
||||
* so that userspace can re-inject it if the instruction gets
|
||||
* a program check. While this may re-order the pending I/O
|
||||
* interrupts, this is no problem since the priority is kept
|
||||
* intact.
|
||||
*/
|
||||
vcpu->run->exit_reason = KVM_EXIT_S390_TSCH;
|
||||
vcpu->run->s390_tsch.dequeued = !!inti;
|
||||
if (inti) {
|
||||
vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id;
|
||||
vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr;
|
||||
vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm;
|
||||
vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word;
|
||||
}
|
||||
vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb;
|
||||
kfree(inti);
|
||||
return -EREMOTE;
|
||||
}
|
||||
|
||||
static int handle_io_inst(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
|
||||
|
||||
if (vcpu->kvm->arch.css_support) {
|
||||
/*
|
||||
* Most I/O instructions will be handled by userspace.
|
||||
* Exceptions are tpi and the interrupt portion of tsch.
|
||||
*/
|
||||
if (vcpu->arch.sie_block->ipa == 0xb236)
|
||||
return handle_tpi(vcpu);
|
||||
if (vcpu->arch.sie_block->ipa == 0xb235)
|
||||
return handle_tsch(vcpu);
|
||||
/* Handle in userspace. */
|
||||
return -EOPNOTSUPP;
|
||||
} else {
|
||||
/*
|
||||
* Set condition code 3 to stop the guest from issueing channel
|
||||
* I/O instructions.
|
||||
*/
|
||||
vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
|
||||
vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int handle_stfl(struct kvm_vcpu *vcpu)
|
||||
@ -176,17 +240,107 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void handle_new_psw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Check whether the new psw is enabled for machine checks. */
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK)
|
||||
kvm_s390_deliver_pending_machine_checks(vcpu);
|
||||
}
|
||||
|
||||
#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
|
||||
#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
|
||||
#define PSW_ADDR_24 0x00000000000fffffUL
|
||||
#define PSW_ADDR_31 0x000000007fffffffUL
|
||||
|
||||
int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 addr;
|
||||
psw_compat_t new_psw;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
PGM_PRIVILEGED_OPERATION);
|
||||
|
||||
addr = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
if (addr & 7) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(new_psw.mask & PSW32_MASK_BASE)) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
goto out;
|
||||
}
|
||||
|
||||
vcpu->arch.sie_block->gpsw.mask =
|
||||
(new_psw.mask & ~PSW32_MASK_BASE) << 32;
|
||||
vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
|
||||
|
||||
if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
|
||||
(!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
|
||||
(vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
|
||||
((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
|
||||
PSW_MASK_EA)) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
goto out;
|
||||
}
|
||||
|
||||
handle_new_psw(vcpu);
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_lpswe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 addr;
|
||||
psw_t new_psw;
|
||||
|
||||
addr = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
if (addr & 7) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
goto out;
|
||||
}
|
||||
|
||||
vcpu->arch.sie_block->gpsw.mask = new_psw.mask;
|
||||
vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
|
||||
|
||||
if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
|
||||
(((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
|
||||
PSW_MASK_BA) &&
|
||||
(vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) ||
|
||||
(!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
|
||||
(vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
|
||||
((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
|
||||
PSW_MASK_EA)) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
goto out;
|
||||
}
|
||||
|
||||
handle_new_psw(vcpu);
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_stidp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u64 operand2;
|
||||
int rc;
|
||||
|
||||
vcpu->stat.instruction_stidp++;
|
||||
operand2 = disp2;
|
||||
if (base2)
|
||||
operand2 += vcpu->run->s.regs.gprs[base2];
|
||||
|
||||
operand2 = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
if (operand2 & 7) {
|
||||
kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
@ -240,17 +394,13 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
|
||||
int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
|
||||
int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u64 operand2;
|
||||
unsigned long mem;
|
||||
|
||||
vcpu->stat.instruction_stsi++;
|
||||
VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
|
||||
|
||||
operand2 = disp2;
|
||||
if (base2)
|
||||
operand2 += vcpu->run->s.regs.gprs[base2];
|
||||
operand2 = kvm_s390_get_base_disp_s(vcpu);
|
||||
|
||||
if (operand2 & 0xfff && fc > 0)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
@ -297,7 +447,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static intercept_handler_t priv_handlers[256] = {
|
||||
static const intercept_handler_t b2_handlers[256] = {
|
||||
[0x02] = handle_stidp,
|
||||
[0x10] = handle_set_prefix,
|
||||
[0x11] = handle_store_prefix,
|
||||
@ -305,10 +455,25 @@ static intercept_handler_t priv_handlers[256] = {
|
||||
[0x29] = handle_skey,
|
||||
[0x2a] = handle_skey,
|
||||
[0x2b] = handle_skey,
|
||||
[0x34] = handle_stsch,
|
||||
[0x5f] = handle_chsc,
|
||||
[0x30] = handle_io_inst,
|
||||
[0x31] = handle_io_inst,
|
||||
[0x32] = handle_io_inst,
|
||||
[0x33] = handle_io_inst,
|
||||
[0x34] = handle_io_inst,
|
||||
[0x35] = handle_io_inst,
|
||||
[0x36] = handle_io_inst,
|
||||
[0x37] = handle_io_inst,
|
||||
[0x38] = handle_io_inst,
|
||||
[0x39] = handle_io_inst,
|
||||
[0x3a] = handle_io_inst,
|
||||
[0x3b] = handle_io_inst,
|
||||
[0x3c] = handle_io_inst,
|
||||
[0x5f] = handle_io_inst,
|
||||
[0x74] = handle_io_inst,
|
||||
[0x76] = handle_io_inst,
|
||||
[0x7d] = handle_stsi,
|
||||
[0xb1] = handle_stfl,
|
||||
[0xb2] = handle_lpswe,
|
||||
};
|
||||
|
||||
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
|
||||
@ -322,7 +487,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
|
||||
* state bit and (a) handle the instruction or (b) send a code 2
|
||||
* program check.
|
||||
* Anything else goes to userspace.*/
|
||||
handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
|
||||
handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
|
||||
if (handler) {
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
@ -333,19 +498,74 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int handle_epsw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int reg1, reg2;
|
||||
|
||||
reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24;
|
||||
reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
|
||||
|
||||
/* This basically extracts the mask half of the psw. */
|
||||
vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000;
|
||||
vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
|
||||
if (reg2) {
|
||||
vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000;
|
||||
vcpu->run->s.regs.gprs[reg2] |=
|
||||
vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const intercept_handler_t b9_handlers[256] = {
|
||||
[0x8d] = handle_epsw,
|
||||
[0x9c] = handle_io_inst,
|
||||
};
|
||||
|
||||
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t handler;
|
||||
|
||||
/* This is handled just as for the B2 instructions. */
|
||||
handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
|
||||
if (handler) {
|
||||
if ((handler != handle_epsw) &&
|
||||
(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE))
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
PGM_PRIVILEGED_OPERATION);
|
||||
else
|
||||
return handler(vcpu);
|
||||
}
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static const intercept_handler_t eb_handlers[256] = {
|
||||
[0x8a] = handle_io_inst,
|
||||
};
|
||||
|
||||
int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
intercept_handler_t handler;
|
||||
|
||||
/* All eb instructions that end up here are privileged. */
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
PGM_PRIVILEGED_OPERATION);
|
||||
handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
|
||||
if (handler)
|
||||
return handler(vcpu);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int handle_tprot(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
|
||||
int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
|
||||
int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
|
||||
int disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
|
||||
u64 address1 = disp1 + base1 ? vcpu->run->s.regs.gprs[base1] : 0;
|
||||
u64 address2 = disp2 + base2 ? vcpu->run->s.regs.gprs[base2] : 0;
|
||||
u64 address1, address2;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long user_address;
|
||||
|
||||
vcpu->stat.instruction_tprot++;
|
||||
|
||||
kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
|
||||
|
||||
/* we only handle the Linux memory detection case:
|
||||
* access key == 0
|
||||
* guest DAT == off
|
||||
@ -405,7 +625,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static intercept_handler_t x01_handlers[256] = {
|
||||
static const intercept_handler_t x01_handlers[256] = {
|
||||
[0x07] = handle_sckpf,
|
||||
};
|
||||
|
||||
|
@ -137,8 +137,10 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
|
||||
inti->type = KVM_S390_SIGP_STOP;
|
||||
|
||||
spin_lock_bh(&li->lock);
|
||||
if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED))
|
||||
if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
|
||||
kfree(inti);
|
||||
goto out;
|
||||
}
|
||||
list_add_tail(&inti->list, &li->list);
|
||||
atomic_set(&li->active, 1);
|
||||
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
|
||||
@ -324,8 +326,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
|
||||
int r3 = vcpu->arch.sie_block->ipa & 0x000f;
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u32 parameter;
|
||||
u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
|
||||
u8 order_code;
|
||||
@ -336,9 +336,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
return kvm_s390_inject_program_int(vcpu,
|
||||
PGM_PRIVILEGED_OPERATION);
|
||||
|
||||
order_code = disp2;
|
||||
if (base2)
|
||||
order_code += vcpu->run->s.regs.gprs[base2];
|
||||
order_code = kvm_s390_get_base_disp_rs(vcpu);
|
||||
|
||||
if (r1 % 2)
|
||||
parameter = vcpu->run->s.regs.gprs[r1];
|
||||
|
@ -141,13 +141,13 @@ TRACE_EVENT(kvm_s390_inject_vcpu,
|
||||
* Trace point for the actual delivery of interrupts.
|
||||
*/
|
||||
TRACE_EVENT(kvm_s390_deliver_interrupt,
|
||||
TP_PROTO(unsigned int id, __u64 type, __u32 data0, __u64 data1),
|
||||
TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1),
|
||||
TP_ARGS(id, type, data0, data1),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, id)
|
||||
__field(__u32, inttype)
|
||||
__field(__u32, data0)
|
||||
__field(__u64, data0)
|
||||
__field(__u64, data1)
|
||||
),
|
||||
|
||||
@ -159,7 +159,7 @@ TRACE_EVENT(kvm_s390_deliver_interrupt,
|
||||
),
|
||||
|
||||
TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \
|
||||
"data:%08x %016llx",
|
||||
"data:%08llx %016llx",
|
||||
__entry->id, __entry->inttype,
|
||||
__print_symbolic(__entry->inttype, kvm_s390_int_type),
|
||||
__entry->data0, __entry->data1)
|
||||
@ -204,6 +204,26 @@ TRACE_EVENT(kvm_s390_stop_request,
|
||||
);
|
||||
|
||||
|
||||
/*
|
||||
* Trace point for enabling channel I/O instruction support.
|
||||
*/
|
||||
TRACE_EVENT(kvm_s390_enable_css,
|
||||
TP_PROTO(void *kvm),
|
||||
TP_ARGS(kvm),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(void *, kvm)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->kvm = kvm;
|
||||
),
|
||||
|
||||
TP_printk("enabling channel I/O support (kvm @ %p)\n",
|
||||
__entry->kvm)
|
||||
);
|
||||
|
||||
|
||||
#endif /* _TRACE_KVMS390_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -33,10 +33,10 @@
|
||||
|
||||
#define KVM_MAX_VCPUS 254
|
||||
#define KVM_SOFT_MAX_VCPUS 160
|
||||
#define KVM_MEMORY_SLOTS 32
|
||||
/* memory slots that does not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 4
|
||||
#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
|
||||
#define KVM_USER_MEM_SLOTS 125
|
||||
/* memory slots that are not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 3
|
||||
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
|
||||
|
||||
#define KVM_MMIO_SIZE 16
|
||||
|
||||
@ -219,11 +219,6 @@ struct kvm_mmu_page {
|
||||
u64 *spt;
|
||||
/* hold the gfn of each spte inside spt */
|
||||
gfn_t *gfns;
|
||||
/*
|
||||
* One bit set per slot which has memory
|
||||
* in this shadow page.
|
||||
*/
|
||||
DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM);
|
||||
bool unsync;
|
||||
int root_count; /* Currently serving as active root */
|
||||
unsigned int unsync_children;
|
||||
@ -502,6 +497,13 @@ struct kvm_vcpu_arch {
|
||||
u64 msr_val;
|
||||
struct gfn_to_hva_cache data;
|
||||
} pv_eoi;
|
||||
|
||||
/*
|
||||
* Indicate whether the access faults on its page table in guest
|
||||
* which is set when fix page fault and used to detect unhandeable
|
||||
* instruction.
|
||||
*/
|
||||
bool write_fault_to_shadow_pgtable;
|
||||
};
|
||||
|
||||
struct kvm_lpage_info {
|
||||
@ -697,6 +699,11 @@ struct kvm_x86_ops {
|
||||
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
|
||||
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
||||
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
||||
int (*vm_has_apicv)(struct kvm *kvm);
|
||||
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
|
||||
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
|
||||
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
|
||||
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
|
||||
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
|
||||
int (*get_tdp_level)(void);
|
||||
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
|
||||
@ -991,6 +998,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
|
||||
|
@ -27,7 +27,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
|
||||
*
|
||||
* Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
|
||||
* The hypercall number should be placed in rax and the return value will be
|
||||
* placed in rax. No other registers will be clobbered unless explicited
|
||||
* placed in rax. No other registers will be clobbered unless explicitly
|
||||
* noted by the particular hypercall.
|
||||
*/
|
||||
|
||||
|
@ -57,9 +57,12 @@
|
||||
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
|
||||
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
|
||||
#define SECONDARY_EXEC_RDTSCP 0x00000008
|
||||
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
|
||||
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
|
||||
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
|
||||
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
|
||||
#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
|
||||
#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
|
||||
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
|
||||
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
|
||||
|
||||
@ -97,6 +100,7 @@ enum vmcs_field {
|
||||
GUEST_GS_SELECTOR = 0x0000080a,
|
||||
GUEST_LDTR_SELECTOR = 0x0000080c,
|
||||
GUEST_TR_SELECTOR = 0x0000080e,
|
||||
GUEST_INTR_STATUS = 0x00000810,
|
||||
HOST_ES_SELECTOR = 0x00000c00,
|
||||
HOST_CS_SELECTOR = 0x00000c02,
|
||||
HOST_SS_SELECTOR = 0x00000c04,
|
||||
@ -124,6 +128,14 @@ enum vmcs_field {
|
||||
APIC_ACCESS_ADDR_HIGH = 0x00002015,
|
||||
EPT_POINTER = 0x0000201a,
|
||||
EPT_POINTER_HIGH = 0x0000201b,
|
||||
EOI_EXIT_BITMAP0 = 0x0000201c,
|
||||
EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
|
||||
EOI_EXIT_BITMAP1 = 0x0000201e,
|
||||
EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
|
||||
EOI_EXIT_BITMAP2 = 0x00002020,
|
||||
EOI_EXIT_BITMAP2_HIGH = 0x00002021,
|
||||
EOI_EXIT_BITMAP3 = 0x00002022,
|
||||
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
|
||||
GUEST_PHYSICAL_ADDRESS = 0x00002400,
|
||||
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
|
||||
VMCS_LINK_POINTER = 0x00002800,
|
||||
@ -346,9 +358,9 @@ enum vmcs_field {
|
||||
|
||||
#define AR_RESERVD_MASK 0xfffe0f00
|
||||
|
||||
#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0)
|
||||
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1)
|
||||
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2)
|
||||
#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
|
||||
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1)
|
||||
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
|
||||
|
||||
#define VMX_NR_VPIDS (1 << 16)
|
||||
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
|
||||
|
@ -62,10 +62,12 @@
|
||||
#define EXIT_REASON_MCE_DURING_VMENTRY 41
|
||||
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
|
||||
#define EXIT_REASON_APIC_ACCESS 44
|
||||
#define EXIT_REASON_EOI_INDUCED 45
|
||||
#define EXIT_REASON_EPT_VIOLATION 48
|
||||
#define EXIT_REASON_EPT_MISCONFIG 49
|
||||
#define EXIT_REASON_WBINVD 54
|
||||
#define EXIT_REASON_XSETBV 55
|
||||
#define EXIT_REASON_APIC_WRITE 56
|
||||
#define EXIT_REASON_INVPCID 58
|
||||
|
||||
#define VMX_EXIT_REASONS \
|
||||
@ -103,7 +105,12 @@
|
||||
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
|
||||
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
|
||||
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
|
||||
{ EXIT_REASON_WBINVD, "WBINVD" }
|
||||
{ EXIT_REASON_WBINVD, "WBINVD" }, \
|
||||
{ EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
|
||||
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
|
||||
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
|
||||
{ EXIT_REASON_INVD, "INVD" }, \
|
||||
{ EXIT_REASON_INVPCID, "INVPCID" }
|
||||
|
||||
|
||||
#endif /* _UAPIVMX_H */
|
||||
|
@ -218,6 +218,9 @@ static void kvm_shutdown(void)
|
||||
void __init kvmclock_init(void)
|
||||
{
|
||||
unsigned long mem;
|
||||
int size;
|
||||
|
||||
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
|
||||
|
||||
if (!kvm_para_available())
|
||||
return;
|
||||
@ -231,16 +234,14 @@ void __init kvmclock_init(void)
|
||||
printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
|
||||
msr_kvm_system_time, msr_kvm_wall_clock);
|
||||
|
||||
mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS,
|
||||
PAGE_SIZE);
|
||||
mem = memblock_alloc(size, PAGE_SIZE);
|
||||
if (!mem)
|
||||
return;
|
||||
hv_clock = __va(mem);
|
||||
|
||||
if (kvm_register_clock("boot clock")) {
|
||||
hv_clock = NULL;
|
||||
memblock_free(mem,
|
||||
sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
|
||||
memblock_free(mem, size);
|
||||
return;
|
||||
}
|
||||
pv_time_ops.sched_clock = kvm_clock_read;
|
||||
@ -275,7 +276,7 @@ int __init kvm_setup_vsyscall_timeinfo(void)
|
||||
struct pvclock_vcpu_time_info *vcpu_time;
|
||||
unsigned int size;
|
||||
|
||||
size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS;
|
||||
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
|
||||
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "kvm_cache_regs.h"
|
||||
#include <linux/module.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <linux/stringify.h>
|
||||
|
||||
#include "x86.h"
|
||||
#include "tss.h"
|
||||
@ -43,7 +44,7 @@
|
||||
#define OpCL 9ull /* CL register (for shifts) */
|
||||
#define OpImmByte 10ull /* 8-bit sign extended immediate */
|
||||
#define OpOne 11ull /* Implied 1 */
|
||||
#define OpImm 12ull /* Sign extended immediate */
|
||||
#define OpImm 12ull /* Sign extended up to 32-bit immediate */
|
||||
#define OpMem16 13ull /* Memory operand (16-bit). */
|
||||
#define OpMem32 14ull /* Memory operand (32-bit). */
|
||||
#define OpImmU 15ull /* Immediate operand, zero extended */
|
||||
@ -58,6 +59,7 @@
|
||||
#define OpFS 24ull /* FS */
|
||||
#define OpGS 25ull /* GS */
|
||||
#define OpMem8 26ull /* 8-bit zero extended memory operand */
|
||||
#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
|
||||
|
||||
#define OpBits 5 /* Width of operand field */
|
||||
#define OpMask ((1ull << OpBits) - 1)
|
||||
@ -101,6 +103,7 @@
|
||||
#define SrcMemFAddr (OpMemFAddr << SrcShift)
|
||||
#define SrcAcc (OpAcc << SrcShift)
|
||||
#define SrcImmU16 (OpImmU16 << SrcShift)
|
||||
#define SrcImm64 (OpImm64 << SrcShift)
|
||||
#define SrcDX (OpDX << SrcShift)
|
||||
#define SrcMem8 (OpMem8 << SrcShift)
|
||||
#define SrcMask (OpMask << SrcShift)
|
||||
@ -113,6 +116,7 @@
|
||||
#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
|
||||
#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
|
||||
#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
|
||||
#define Escape (5<<15) /* Escape to coprocessor instruction */
|
||||
#define Sse (1<<18) /* SSE Vector instruction */
|
||||
/* Generic ModRM decode. */
|
||||
#define ModRM (1<<19)
|
||||
@ -146,6 +150,8 @@
|
||||
#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
|
||||
#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
|
||||
#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
|
||||
#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
|
||||
#define NoWrite ((u64)1 << 45) /* No writeback */
|
||||
|
||||
#define X2(x...) x, x
|
||||
#define X3(x...) X2(x), x
|
||||
@ -156,6 +162,27 @@
|
||||
#define X8(x...) X4(x), X4(x)
|
||||
#define X16(x...) X8(x), X8(x)
|
||||
|
||||
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
|
||||
#define FASTOP_SIZE 8
|
||||
|
||||
/*
|
||||
* fastop functions have a special calling convention:
|
||||
*
|
||||
* dst: [rdx]:rax (in/out)
|
||||
* src: rbx (in/out)
|
||||
* src2: rcx (in)
|
||||
* flags: rflags (in/out)
|
||||
*
|
||||
* Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
|
||||
* different operand sizes can be reached by calculation, rather than a jump
|
||||
* table (which would be bigger than the code).
|
||||
*
|
||||
* fastop functions are declared as taking a never-defined fastop parameter,
|
||||
* so they can't be called from C directly.
|
||||
*/
|
||||
|
||||
struct fastop;
|
||||
|
||||
struct opcode {
|
||||
u64 flags : 56;
|
||||
u64 intercept : 8;
|
||||
@ -164,6 +191,8 @@ struct opcode {
|
||||
const struct opcode *group;
|
||||
const struct group_dual *gdual;
|
||||
const struct gprefix *gprefix;
|
||||
const struct escape *esc;
|
||||
void (*fastop)(struct fastop *fake);
|
||||
} u;
|
||||
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
|
||||
};
|
||||
@ -180,6 +209,11 @@ struct gprefix {
|
||||
struct opcode pfx_f3;
|
||||
};
|
||||
|
||||
struct escape {
|
||||
struct opcode op[8];
|
||||
struct opcode high[64];
|
||||
};
|
||||
|
||||
/* EFLAGS bit definitions. */
|
||||
#define EFLG_ID (1<<21)
|
||||
#define EFLG_VIP (1<<20)
|
||||
@ -407,6 +441,97 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
|
||||
|
||||
#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
|
||||
#define FOP_RET "ret \n\t"
|
||||
|
||||
#define FOP_START(op) \
|
||||
extern void em_##op(struct fastop *fake); \
|
||||
asm(".pushsection .text, \"ax\" \n\t" \
|
||||
".global em_" #op " \n\t" \
|
||||
FOP_ALIGN \
|
||||
"em_" #op ": \n\t"
|
||||
|
||||
#define FOP_END \
|
||||
".popsection")
|
||||
|
||||
#define FOPNOP() FOP_ALIGN FOP_RET
|
||||
|
||||
#define FOP1E(op, dst) \
|
||||
FOP_ALIGN #op " %" #dst " \n\t" FOP_RET
|
||||
|
||||
#define FASTOP1(op) \
|
||||
FOP_START(op) \
|
||||
FOP1E(op##b, al) \
|
||||
FOP1E(op##w, ax) \
|
||||
FOP1E(op##l, eax) \
|
||||
ON64(FOP1E(op##q, rax)) \
|
||||
FOP_END
|
||||
|
||||
#define FOP2E(op, dst, src) \
|
||||
FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
|
||||
|
||||
#define FASTOP2(op) \
|
||||
FOP_START(op) \
|
||||
FOP2E(op##b, al, bl) \
|
||||
FOP2E(op##w, ax, bx) \
|
||||
FOP2E(op##l, eax, ebx) \
|
||||
ON64(FOP2E(op##q, rax, rbx)) \
|
||||
FOP_END
|
||||
|
||||
/* 2 operand, word only */
|
||||
#define FASTOP2W(op) \
|
||||
FOP_START(op) \
|
||||
FOPNOP() \
|
||||
FOP2E(op##w, ax, bx) \
|
||||
FOP2E(op##l, eax, ebx) \
|
||||
ON64(FOP2E(op##q, rax, rbx)) \
|
||||
FOP_END
|
||||
|
||||
/* 2 operand, src is CL */
|
||||
#define FASTOP2CL(op) \
|
||||
FOP_START(op) \
|
||||
FOP2E(op##b, al, cl) \
|
||||
FOP2E(op##w, ax, cl) \
|
||||
FOP2E(op##l, eax, cl) \
|
||||
ON64(FOP2E(op##q, rax, cl)) \
|
||||
FOP_END
|
||||
|
||||
#define FOP3E(op, dst, src, src2) \
|
||||
FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
|
||||
|
||||
/* 3-operand, word-only, src2=cl */
|
||||
#define FASTOP3WCL(op) \
|
||||
FOP_START(op) \
|
||||
FOPNOP() \
|
||||
FOP3E(op##w, ax, bx, cl) \
|
||||
FOP3E(op##l, eax, ebx, cl) \
|
||||
ON64(FOP3E(op##q, rax, rbx, cl)) \
|
||||
FOP_END
|
||||
|
||||
/* Special case for SETcc - 1 instruction per cc */
|
||||
#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
|
||||
|
||||
FOP_START(setcc)
|
||||
FOP_SETCC(seto)
|
||||
FOP_SETCC(setno)
|
||||
FOP_SETCC(setc)
|
||||
FOP_SETCC(setnc)
|
||||
FOP_SETCC(setz)
|
||||
FOP_SETCC(setnz)
|
||||
FOP_SETCC(setbe)
|
||||
FOP_SETCC(setnbe)
|
||||
FOP_SETCC(sets)
|
||||
FOP_SETCC(setns)
|
||||
FOP_SETCC(setp)
|
||||
FOP_SETCC(setnp)
|
||||
FOP_SETCC(setl)
|
||||
FOP_SETCC(setnl)
|
||||
FOP_SETCC(setle)
|
||||
FOP_SETCC(setnle)
|
||||
FOP_END;
|
||||
|
||||
#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \
|
||||
do { \
|
||||
unsigned long _tmp; \
|
||||
@ -663,7 +788,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
|
||||
ulong la;
|
||||
u32 lim;
|
||||
u16 sel;
|
||||
unsigned cpl, rpl;
|
||||
unsigned cpl;
|
||||
|
||||
la = seg_base(ctxt, addr.seg) + addr.ea;
|
||||
switch (ctxt->mode) {
|
||||
@ -697,11 +822,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
|
||||
goto bad;
|
||||
}
|
||||
cpl = ctxt->ops->cpl(ctxt);
|
||||
if (ctxt->mode == X86EMUL_MODE_REAL)
|
||||
rpl = 0;
|
||||
else
|
||||
rpl = sel & 3;
|
||||
cpl = max(cpl, rpl);
|
||||
if (!(desc.type & 8)) {
|
||||
/* data segment */
|
||||
if (cpl > desc.dpl)
|
||||
@ -852,39 +972,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int test_cc(unsigned int condition, unsigned int flags)
|
||||
FASTOP2(add);
|
||||
FASTOP2(or);
|
||||
FASTOP2(adc);
|
||||
FASTOP2(sbb);
|
||||
FASTOP2(and);
|
||||
FASTOP2(sub);
|
||||
FASTOP2(xor);
|
||||
FASTOP2(cmp);
|
||||
FASTOP2(test);
|
||||
|
||||
FASTOP3WCL(shld);
|
||||
FASTOP3WCL(shrd);
|
||||
|
||||
FASTOP2W(imul);
|
||||
|
||||
FASTOP1(not);
|
||||
FASTOP1(neg);
|
||||
FASTOP1(inc);
|
||||
FASTOP1(dec);
|
||||
|
||||
FASTOP2CL(rol);
|
||||
FASTOP2CL(ror);
|
||||
FASTOP2CL(rcl);
|
||||
FASTOP2CL(rcr);
|
||||
FASTOP2CL(shl);
|
||||
FASTOP2CL(shr);
|
||||
FASTOP2CL(sar);
|
||||
|
||||
FASTOP2W(bsf);
|
||||
FASTOP2W(bsr);
|
||||
FASTOP2W(bt);
|
||||
FASTOP2W(bts);
|
||||
FASTOP2W(btr);
|
||||
FASTOP2W(btc);
|
||||
|
||||
static u8 test_cc(unsigned int condition, unsigned long flags)
|
||||
{
|
||||
int rc = 0;
|
||||
u8 rc;
|
||||
void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
|
||||
|
||||
switch ((condition & 15) >> 1) {
|
||||
case 0: /* o */
|
||||
rc |= (flags & EFLG_OF);
|
||||
break;
|
||||
case 1: /* b/c/nae */
|
||||
rc |= (flags & EFLG_CF);
|
||||
break;
|
||||
case 2: /* z/e */
|
||||
rc |= (flags & EFLG_ZF);
|
||||
break;
|
||||
case 3: /* be/na */
|
||||
rc |= (flags & (EFLG_CF|EFLG_ZF));
|
||||
break;
|
||||
case 4: /* s */
|
||||
rc |= (flags & EFLG_SF);
|
||||
break;
|
||||
case 5: /* p/pe */
|
||||
rc |= (flags & EFLG_PF);
|
||||
break;
|
||||
case 7: /* le/ng */
|
||||
rc |= (flags & EFLG_ZF);
|
||||
/* fall through */
|
||||
case 6: /* l/nge */
|
||||
rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
|
||||
break;
|
||||
}
|
||||
|
||||
/* Odd condition identifiers (lsb == 1) have inverted sense. */
|
||||
return (!!rc ^ (condition & 1));
|
||||
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
|
||||
asm("push %[flags]; popf; call *%[fastop]"
|
||||
: "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void fetch_register_operand(struct operand *op)
|
||||
@ -994,6 +1125,53 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
|
||||
ctxt->ops->put_fpu(ctxt);
|
||||
}
|
||||
|
||||
static int em_fninit(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
|
||||
return emulate_nm(ctxt);
|
||||
|
||||
ctxt->ops->get_fpu(ctxt);
|
||||
asm volatile("fninit");
|
||||
ctxt->ops->put_fpu(ctxt);
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u16 fcw;
|
||||
|
||||
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
|
||||
return emulate_nm(ctxt);
|
||||
|
||||
ctxt->ops->get_fpu(ctxt);
|
||||
asm volatile("fnstcw %0": "+m"(fcw));
|
||||
ctxt->ops->put_fpu(ctxt);
|
||||
|
||||
/* force 2 byte destination */
|
||||
ctxt->dst.bytes = 2;
|
||||
ctxt->dst.val = fcw;
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u16 fsw;
|
||||
|
||||
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
|
||||
return emulate_nm(ctxt);
|
||||
|
||||
ctxt->ops->get_fpu(ctxt);
|
||||
asm volatile("fnstsw %0": "+m"(fsw));
|
||||
ctxt->ops->put_fpu(ctxt);
|
||||
|
||||
/* force 2 byte destination */
|
||||
ctxt->dst.bytes = 2;
|
||||
ctxt->dst.val = fsw;
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
|
||||
struct operand *op)
|
||||
{
|
||||
@ -1534,6 +1712,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (ctxt->d & NoWrite)
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
switch (ctxt->dst.type) {
|
||||
case OP_REG:
|
||||
write_register_operand(&ctxt->dst);
|
||||
@ -1918,47 +2099,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_grp2(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
switch (ctxt->modrm_reg) {
|
||||
case 0: /* rol */
|
||||
emulate_2op_SrcB(ctxt, "rol");
|
||||
break;
|
||||
case 1: /* ror */
|
||||
emulate_2op_SrcB(ctxt, "ror");
|
||||
break;
|
||||
case 2: /* rcl */
|
||||
emulate_2op_SrcB(ctxt, "rcl");
|
||||
break;
|
||||
case 3: /* rcr */
|
||||
emulate_2op_SrcB(ctxt, "rcr");
|
||||
break;
|
||||
case 4: /* sal/shl */
|
||||
case 6: /* sal/shl */
|
||||
emulate_2op_SrcB(ctxt, "sal");
|
||||
break;
|
||||
case 5: /* shr */
|
||||
emulate_2op_SrcB(ctxt, "shr");
|
||||
break;
|
||||
case 7: /* sar */
|
||||
emulate_2op_SrcB(ctxt, "sar");
|
||||
break;
|
||||
}
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_not(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
ctxt->dst.val = ~ctxt->dst.val;
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_neg(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_1op(ctxt, "neg");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u8 ex = 0;
|
||||
@ -2000,12 +2140,6 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
|
||||
int rc = X86EMUL_CONTINUE;
|
||||
|
||||
switch (ctxt->modrm_reg) {
|
||||
case 0: /* inc */
|
||||
emulate_1op(ctxt, "inc");
|
||||
break;
|
||||
case 1: /* dec */
|
||||
emulate_1op(ctxt, "dec");
|
||||
break;
|
||||
case 2: /* call near abs */ {
|
||||
long int old_eip;
|
||||
old_eip = ctxt->_eip;
|
||||
@ -2075,7 +2209,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
|
||||
/* Save real source value, then compare EAX against destination. */
|
||||
ctxt->src.orig_val = ctxt->src.val;
|
||||
ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
|
||||
emulate_2op_SrcV(ctxt, "cmp");
|
||||
fastop(ctxt, em_cmp);
|
||||
|
||||
if (ctxt->eflags & EFLG_ZF) {
|
||||
/* Success: write back to memory. */
|
||||
@ -2843,7 +2977,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
|
||||
ctxt->src.type = OP_IMM;
|
||||
ctxt->src.val = 0;
|
||||
ctxt->src.bytes = 1;
|
||||
emulate_2op_SrcV(ctxt, "or");
|
||||
fastop(ctxt, em_or);
|
||||
ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
|
||||
if (cf)
|
||||
ctxt->eflags |= X86_EFLAGS_CF;
|
||||
@ -2852,6 +2986,24 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_aad(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u8 al = ctxt->dst.val & 0xff;
|
||||
u8 ah = (ctxt->dst.val >> 8) & 0xff;
|
||||
|
||||
al = (al + (ah * ctxt->src.val)) & 0xff;
|
||||
|
||||
ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
|
||||
|
||||
/* Set PF, ZF, SF */
|
||||
ctxt->src.type = OP_IMM;
|
||||
ctxt->src.val = 0;
|
||||
ctxt->src.bytes = 1;
|
||||
fastop(ctxt, em_or);
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_call(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
long rel = ctxt->src.val;
|
||||
@ -2900,64 +3052,6 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_add(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV(ctxt, "add");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_or(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV(ctxt, "or");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_adc(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV(ctxt, "adc");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_sbb(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV(ctxt, "sbb");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_and(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV(ctxt, "and");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_sub(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV(ctxt, "sub");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_xor(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV(ctxt, "xor");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_cmp(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV(ctxt, "cmp");
|
||||
/* Disable writeback. */
|
||||
ctxt->dst.type = OP_NONE;
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_test(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV(ctxt, "test");
|
||||
/* Disable writeback. */
|
||||
ctxt->dst.type = OP_NONE;
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_xchg(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
/* Write back the register source. */
|
||||
@ -2970,16 +3064,10 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt)
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_imul(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV_nobyte(ctxt, "imul");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
ctxt->dst.val = ctxt->src2.val;
|
||||
return em_imul(ctxt);
|
||||
return fastop(ctxt, em_imul);
|
||||
}
|
||||
|
||||
static int em_cwd(struct x86_emulate_ctxt *ctxt)
|
||||
@ -3300,47 +3388,6 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_bt(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
/* Disable writeback. */
|
||||
ctxt->dst.type = OP_NONE;
|
||||
/* only subword offset */
|
||||
ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
|
||||
|
||||
emulate_2op_SrcV_nobyte(ctxt, "bt");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_bts(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV_nobyte(ctxt, "bts");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_btr(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV_nobyte(ctxt, "btr");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_btc(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV_nobyte(ctxt, "btc");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_bsf(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV_nobyte(ctxt, "bsf");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_bsr(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
emulate_2op_SrcV_nobyte(ctxt, "bsr");
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static int em_cpuid(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
@ -3572,7 +3619,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
|
||||
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
|
||||
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
|
||||
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
|
||||
#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
|
||||
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
|
||||
#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
|
||||
#define II(_f, _e, _i) \
|
||||
{ .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
|
||||
#define IIP(_f, _e, _i, _p) \
|
||||
@ -3583,12 +3632,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
|
||||
#define D2bv(_f) D((_f) | ByteOp), D(_f)
|
||||
#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
|
||||
#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
|
||||
#define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
|
||||
#define I2bvIP(_f, _e, _i, _p) \
|
||||
IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
|
||||
|
||||
#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \
|
||||
I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
|
||||
I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
|
||||
#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
|
||||
F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
|
||||
F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
|
||||
|
||||
static const struct opcode group7_rm1[] = {
|
||||
DI(SrcNone | Priv, monitor),
|
||||
@ -3614,25 +3664,36 @@ static const struct opcode group7_rm7[] = {
|
||||
};
|
||||
|
||||
static const struct opcode group1[] = {
|
||||
I(Lock, em_add),
|
||||
I(Lock | PageTable, em_or),
|
||||
I(Lock, em_adc),
|
||||
I(Lock, em_sbb),
|
||||
I(Lock | PageTable, em_and),
|
||||
I(Lock, em_sub),
|
||||
I(Lock, em_xor),
|
||||
I(0, em_cmp),
|
||||
F(Lock, em_add),
|
||||
F(Lock | PageTable, em_or),
|
||||
F(Lock, em_adc),
|
||||
F(Lock, em_sbb),
|
||||
F(Lock | PageTable, em_and),
|
||||
F(Lock, em_sub),
|
||||
F(Lock, em_xor),
|
||||
F(NoWrite, em_cmp),
|
||||
};
|
||||
|
||||
static const struct opcode group1A[] = {
|
||||
I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
|
||||
};
|
||||
|
||||
static const struct opcode group2[] = {
|
||||
F(DstMem | ModRM, em_rol),
|
||||
F(DstMem | ModRM, em_ror),
|
||||
F(DstMem | ModRM, em_rcl),
|
||||
F(DstMem | ModRM, em_rcr),
|
||||
F(DstMem | ModRM, em_shl),
|
||||
F(DstMem | ModRM, em_shr),
|
||||
F(DstMem | ModRM, em_shl),
|
||||
F(DstMem | ModRM, em_sar),
|
||||
};
|
||||
|
||||
static const struct opcode group3[] = {
|
||||
I(DstMem | SrcImm, em_test),
|
||||
I(DstMem | SrcImm, em_test),
|
||||
I(DstMem | SrcNone | Lock, em_not),
|
||||
I(DstMem | SrcNone | Lock, em_neg),
|
||||
F(DstMem | SrcImm | NoWrite, em_test),
|
||||
F(DstMem | SrcImm | NoWrite, em_test),
|
||||
F(DstMem | SrcNone | Lock, em_not),
|
||||
F(DstMem | SrcNone | Lock, em_neg),
|
||||
I(SrcMem, em_mul_ex),
|
||||
I(SrcMem, em_imul_ex),
|
||||
I(SrcMem, em_div_ex),
|
||||
@ -3640,14 +3701,14 @@ static const struct opcode group3[] = {
|
||||
};
|
||||
|
||||
static const struct opcode group4[] = {
|
||||
I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
|
||||
I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
|
||||
F(ByteOp | DstMem | SrcNone | Lock, em_inc),
|
||||
F(ByteOp | DstMem | SrcNone | Lock, em_dec),
|
||||
N, N, N, N, N, N,
|
||||
};
|
||||
|
||||
static const struct opcode group5[] = {
|
||||
I(DstMem | SrcNone | Lock, em_grp45),
|
||||
I(DstMem | SrcNone | Lock, em_grp45),
|
||||
F(DstMem | SrcNone | Lock, em_inc),
|
||||
F(DstMem | SrcNone | Lock, em_dec),
|
||||
I(SrcMem | Stack, em_grp45),
|
||||
I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
|
||||
I(SrcMem | Stack, em_grp45),
|
||||
@ -3682,10 +3743,10 @@ static const struct group_dual group7 = { {
|
||||
|
||||
static const struct opcode group8[] = {
|
||||
N, N, N, N,
|
||||
I(DstMem | SrcImmByte, em_bt),
|
||||
I(DstMem | SrcImmByte | Lock | PageTable, em_bts),
|
||||
I(DstMem | SrcImmByte | Lock, em_btr),
|
||||
I(DstMem | SrcImmByte | Lock | PageTable, em_btc),
|
||||
F(DstMem | SrcImmByte | NoWrite, em_bt),
|
||||
F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
|
||||
F(DstMem | SrcImmByte | Lock, em_btr),
|
||||
F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
|
||||
};
|
||||
|
||||
static const struct group_dual group9 = { {
|
||||
@ -3707,33 +3768,96 @@ static const struct gprefix pfx_vmovntpx = {
|
||||
I(0, em_mov), N, N, N,
|
||||
};
|
||||
|
||||
static const struct escape escape_d9 = { {
|
||||
N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
|
||||
}, {
|
||||
/* 0xC0 - 0xC7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xC8 - 0xCF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xD0 - 0xC7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xD8 - 0xDF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xE0 - 0xE7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xE8 - 0xEF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xF0 - 0xF7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xF8 - 0xFF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
} };
|
||||
|
||||
static const struct escape escape_db = { {
|
||||
N, N, N, N, N, N, N, N,
|
||||
}, {
|
||||
/* 0xC0 - 0xC7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xC8 - 0xCF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xD0 - 0xC7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xD8 - 0xDF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xE0 - 0xE7 */
|
||||
N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
|
||||
/* 0xE8 - 0xEF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xF0 - 0xF7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xF8 - 0xFF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
} };
|
||||
|
||||
static const struct escape escape_dd = { {
|
||||
N, N, N, N, N, N, N, I(DstMem, em_fnstsw),
|
||||
}, {
|
||||
/* 0xC0 - 0xC7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xC8 - 0xCF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xD0 - 0xC7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xD8 - 0xDF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xE0 - 0xE7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xE8 - 0xEF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xF0 - 0xF7 */
|
||||
N, N, N, N, N, N, N, N,
|
||||
/* 0xF8 - 0xFF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
} };
|
||||
|
||||
static const struct opcode opcode_table[256] = {
|
||||
/* 0x00 - 0x07 */
|
||||
I6ALU(Lock, em_add),
|
||||
F6ALU(Lock, em_add),
|
||||
I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
|
||||
I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
|
||||
/* 0x08 - 0x0F */
|
||||
I6ALU(Lock | PageTable, em_or),
|
||||
F6ALU(Lock | PageTable, em_or),
|
||||
I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
|
||||
N,
|
||||
/* 0x10 - 0x17 */
|
||||
I6ALU(Lock, em_adc),
|
||||
F6ALU(Lock, em_adc),
|
||||
I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
|
||||
I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
|
||||
/* 0x18 - 0x1F */
|
||||
I6ALU(Lock, em_sbb),
|
||||
F6ALU(Lock, em_sbb),
|
||||
I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
|
||||
I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
|
||||
/* 0x20 - 0x27 */
|
||||
I6ALU(Lock | PageTable, em_and), N, N,
|
||||
F6ALU(Lock | PageTable, em_and), N, N,
|
||||
/* 0x28 - 0x2F */
|
||||
I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
|
||||
F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
|
||||
/* 0x30 - 0x37 */
|
||||
I6ALU(Lock, em_xor), N, N,
|
||||
F6ALU(Lock, em_xor), N, N,
|
||||
/* 0x38 - 0x3F */
|
||||
I6ALU(0, em_cmp), N, N,
|
||||
F6ALU(NoWrite, em_cmp), N, N,
|
||||
/* 0x40 - 0x4F */
|
||||
X16(D(DstReg)),
|
||||
X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
|
||||
/* 0x50 - 0x57 */
|
||||
X8(I(SrcReg | Stack, em_push)),
|
||||
/* 0x58 - 0x5F */
|
||||
@ -3757,7 +3881,7 @@ static const struct opcode opcode_table[256] = {
|
||||
G(DstMem | SrcImm, group1),
|
||||
G(ByteOp | DstMem | SrcImm | No64, group1),
|
||||
G(DstMem | SrcImmByte, group1),
|
||||
I2bv(DstMem | SrcReg | ModRM, em_test),
|
||||
F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
|
||||
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
|
||||
/* 0x88 - 0x8F */
|
||||
I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
|
||||
@ -3777,18 +3901,18 @@ static const struct opcode opcode_table[256] = {
|
||||
I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
|
||||
I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
|
||||
I2bv(SrcSI | DstDI | Mov | String, em_mov),
|
||||
I2bv(SrcSI | DstDI | String, em_cmp),
|
||||
F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
|
||||
/* 0xA8 - 0xAF */
|
||||
I2bv(DstAcc | SrcImm, em_test),
|
||||
F2bv(DstAcc | SrcImm | NoWrite, em_test),
|
||||
I2bv(SrcAcc | DstDI | Mov | String, em_mov),
|
||||
I2bv(SrcSI | DstAcc | Mov | String, em_mov),
|
||||
I2bv(SrcAcc | DstDI | String, em_cmp),
|
||||
F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
|
||||
/* 0xB0 - 0xB7 */
|
||||
X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
|
||||
/* 0xB8 - 0xBF */
|
||||
X8(I(DstReg | SrcImm | Mov, em_mov)),
|
||||
X8(I(DstReg | SrcImm64 | Mov, em_mov)),
|
||||
/* 0xC0 - 0xC7 */
|
||||
D2bv(DstMem | SrcImmByte | ModRM),
|
||||
G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
|
||||
I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
|
||||
I(ImplicitOps | Stack, em_ret),
|
||||
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
|
||||
@ -3800,10 +3924,11 @@ static const struct opcode opcode_table[256] = {
|
||||
D(ImplicitOps), DI(SrcImmByte, intn),
|
||||
D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
|
||||
/* 0xD0 - 0xD7 */
|
||||
D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM),
|
||||
N, N, N, N,
|
||||
G(Src2One | ByteOp, group2), G(Src2One, group2),
|
||||
G(Src2CL | ByteOp, group2), G(Src2CL, group2),
|
||||
N, I(DstAcc | SrcImmByte | No64, em_aad), N, N,
|
||||
/* 0xD8 - 0xDF */
|
||||
N, N, N, N, N, N, N, N,
|
||||
N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
|
||||
/* 0xE0 - 0xE7 */
|
||||
X3(I(SrcImmByte, em_loop)),
|
||||
I(SrcImmByte, em_jcxz),
|
||||
@ -3870,28 +3995,29 @@ static const struct opcode twobyte_table[256] = {
|
||||
X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
|
||||
/* 0xA0 - 0xA7 */
|
||||
I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
|
||||
II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt),
|
||||
D(DstMem | SrcReg | Src2ImmByte | ModRM),
|
||||
D(DstMem | SrcReg | Src2CL | ModRM), N, N,
|
||||
II(ImplicitOps, em_cpuid, cpuid),
|
||||
F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
|
||||
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
|
||||
F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
|
||||
/* 0xA8 - 0xAF */
|
||||
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
|
||||
DI(ImplicitOps, rsm),
|
||||
I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
|
||||
D(DstMem | SrcReg | Src2ImmByte | ModRM),
|
||||
D(DstMem | SrcReg | Src2CL | ModRM),
|
||||
D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
|
||||
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
|
||||
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
|
||||
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
|
||||
D(ModRM), F(DstReg | SrcMem | ModRM, em_imul),
|
||||
/* 0xB0 - 0xB7 */
|
||||
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
|
||||
I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
|
||||
I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
|
||||
F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
|
||||
I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
|
||||
I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
|
||||
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
|
||||
/* 0xB8 - 0xBF */
|
||||
N, N,
|
||||
G(BitOp, group8),
|
||||
I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
|
||||
I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr),
|
||||
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
|
||||
F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
|
||||
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
|
||||
/* 0xC0 - 0xC7 */
|
||||
D2bv(DstMem | SrcReg | ModRM | Lock),
|
||||
@ -3950,6 +4076,9 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
|
||||
case 4:
|
||||
op->val = insn_fetch(s32, ctxt);
|
||||
break;
|
||||
case 8:
|
||||
op->val = insn_fetch(s64, ctxt);
|
||||
break;
|
||||
}
|
||||
if (!sign_extension) {
|
||||
switch (op->bytes) {
|
||||
@ -4028,6 +4157,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
|
||||
case OpImm:
|
||||
rc = decode_imm(ctxt, op, imm_size(ctxt), true);
|
||||
break;
|
||||
case OpImm64:
|
||||
rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
|
||||
break;
|
||||
case OpMem8:
|
||||
ctxt->memop.bytes = 1;
|
||||
goto mem_common;
|
||||
@ -4222,6 +4354,12 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
|
||||
case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
|
||||
}
|
||||
break;
|
||||
case Escape:
|
||||
if (ctxt->modrm > 0xbf)
|
||||
opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
|
||||
else
|
||||
opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
|
||||
break;
|
||||
default:
|
||||
return EMULATION_FAILED;
|
||||
}
|
||||
@ -4354,6 +4492,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
|
||||
read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
|
||||
}
|
||||
|
||||
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
|
||||
{
|
||||
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
|
||||
fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
|
||||
asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
|
||||
: "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags)
|
||||
: "c"(ctxt->src2.val), [fastop]"S"(fop));
|
||||
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
@ -4483,6 +4631,13 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
}
|
||||
|
||||
if (ctxt->execute) {
|
||||
if (ctxt->d & Fastop) {
|
||||
void (*fop)(struct fastop *) = (void *)ctxt->execute;
|
||||
rc = fastop(ctxt, fop);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
goto done;
|
||||
goto writeback;
|
||||
}
|
||||
rc = ctxt->execute(ctxt);
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
goto done;
|
||||
@ -4493,12 +4648,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
goto twobyte_insn;
|
||||
|
||||
switch (ctxt->b) {
|
||||
case 0x40 ... 0x47: /* inc r16/r32 */
|
||||
emulate_1op(ctxt, "inc");
|
||||
break;
|
||||
case 0x48 ... 0x4f: /* dec r16/r32 */
|
||||
emulate_1op(ctxt, "dec");
|
||||
break;
|
||||
case 0x63: /* movsxd */
|
||||
if (ctxt->mode != X86EMUL_MODE_PROT64)
|
||||
goto cannot_emulate;
|
||||
@ -4523,9 +4672,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
|
||||
}
|
||||
break;
|
||||
case 0xc0 ... 0xc1:
|
||||
rc = em_grp2(ctxt);
|
||||
break;
|
||||
case 0xcc: /* int3 */
|
||||
rc = emulate_int(ctxt, 3);
|
||||
break;
|
||||
@ -4536,13 +4682,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
if (ctxt->eflags & EFLG_OF)
|
||||
rc = emulate_int(ctxt, 4);
|
||||
break;
|
||||
case 0xd0 ... 0xd1: /* Grp2 */
|
||||
rc = em_grp2(ctxt);
|
||||
break;
|
||||
case 0xd2 ... 0xd3: /* Grp2 */
|
||||
ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX);
|
||||
rc = em_grp2(ctxt);
|
||||
break;
|
||||
case 0xe9: /* jmp rel */
|
||||
case 0xeb: /* jmp rel short */
|
||||
jmp_rel(ctxt, ctxt->src.val);
|
||||
@ -4661,14 +4800,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
case 0x90 ... 0x9f: /* setcc r/m8 */
|
||||
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
|
||||
break;
|
||||
case 0xa4: /* shld imm8, r, r/m */
|
||||
case 0xa5: /* shld cl, r, r/m */
|
||||
emulate_2op_cl(ctxt, "shld");
|
||||
break;
|
||||
case 0xac: /* shrd imm8, r, r/m */
|
||||
case 0xad: /* shrd cl, r, r/m */
|
||||
emulate_2op_cl(ctxt, "shrd");
|
||||
break;
|
||||
case 0xae: /* clflush */
|
||||
break;
|
||||
case 0xb6 ... 0xb7: /* movzx */
|
||||
@ -4682,7 +4813,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
|
||||
(s16) ctxt->src.val;
|
||||
break;
|
||||
case 0xc0 ... 0xc1: /* xadd */
|
||||
emulate_2op_SrcV(ctxt, "add");
|
||||
fastop(ctxt, em_add);
|
||||
/* Write back the register source. */
|
||||
ctxt->src.val = ctxt->dst.orig_val;
|
||||
write_register_operand(&ctxt->src);
|
||||
|
@ -122,7 +122,6 @@ static s64 __kpit_elapsed(struct kvm *kvm)
|
||||
*/
|
||||
remaining = hrtimer_get_remaining(&ps->timer);
|
||||
elapsed = ps->period - ktime_to_ns(remaining);
|
||||
elapsed = mod_64(elapsed, ps->period);
|
||||
|
||||
return elapsed;
|
||||
}
|
||||
|
@ -241,6 +241,8 @@ int kvm_pic_read_irq(struct kvm *kvm)
|
||||
int irq, irq2, intno;
|
||||
struct kvm_pic *s = pic_irqchip(kvm);
|
||||
|
||||
s->output = 0;
|
||||
|
||||
pic_lock(s);
|
||||
irq = pic_get_irq(&s->pics[0]);
|
||||
if (irq >= 0) {
|
||||
|
@ -37,50 +37,82 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
|
||||
|
||||
/*
|
||||
* check if there is pending interrupt from
|
||||
* non-APIC source without intack.
|
||||
*/
|
||||
static int kvm_cpu_has_extint(struct kvm_vcpu *v)
|
||||
{
|
||||
if (kvm_apic_accept_pic_intr(v))
|
||||
return pic_irqchip(v->kvm)->output; /* PIC */
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* check if there is injectable interrupt:
|
||||
* when virtual interrupt delivery enabled,
|
||||
* interrupt from apic will handled by hardware,
|
||||
* we don't need to check it here.
|
||||
*/
|
||||
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
|
||||
{
|
||||
if (!irqchip_in_kernel(v->kvm))
|
||||
return v->arch.interrupt.pending;
|
||||
|
||||
if (kvm_cpu_has_extint(v))
|
||||
return 1;
|
||||
|
||||
if (kvm_apic_vid_enabled(v->kvm))
|
||||
return 0;
|
||||
|
||||
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
|
||||
}
|
||||
|
||||
/*
|
||||
* check if there is pending interrupt without
|
||||
* intack.
|
||||
*/
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
|
||||
{
|
||||
struct kvm_pic *s;
|
||||
|
||||
if (!irqchip_in_kernel(v->kvm))
|
||||
return v->arch.interrupt.pending;
|
||||
|
||||
if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
|
||||
if (kvm_apic_accept_pic_intr(v)) {
|
||||
s = pic_irqchip(v->kvm); /* PIC */
|
||||
return s->output;
|
||||
} else
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
if (kvm_cpu_has_extint(v))
|
||||
return 1;
|
||||
|
||||
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
|
||||
|
||||
/*
|
||||
* Read pending interrupt(from non-APIC source)
|
||||
* vector and intack.
|
||||
*/
|
||||
static int kvm_cpu_get_extint(struct kvm_vcpu *v)
|
||||
{
|
||||
if (kvm_cpu_has_extint(v))
|
||||
return kvm_pic_read_irq(v->kvm); /* PIC */
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read pending interrupt vector and intack.
|
||||
*/
|
||||
int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
|
||||
{
|
||||
struct kvm_pic *s;
|
||||
int vector;
|
||||
|
||||
if (!irqchip_in_kernel(v->kvm))
|
||||
return v->arch.interrupt.nr;
|
||||
|
||||
vector = kvm_get_apic_interrupt(v); /* APIC */
|
||||
if (vector == -1) {
|
||||
if (kvm_apic_accept_pic_intr(v)) {
|
||||
s = pic_irqchip(v->kvm);
|
||||
s->output = 0; /* PIC */
|
||||
vector = kvm_pic_read_irq(v->kvm);
|
||||
}
|
||||
}
|
||||
return vector;
|
||||
vector = kvm_cpu_get_extint(v);
|
||||
|
||||
if (kvm_apic_vid_enabled(v->kvm) || vector != -1)
|
||||
return vector; /* PIC */
|
||||
|
||||
return kvm_get_apic_interrupt(v); /* APIC */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
|
||||
|
||||
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -140,31 +140,56 @@ static inline int apic_enabled(struct kvm_lapic *apic)
|
||||
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
|
||||
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
|
||||
|
||||
static inline int apic_x2apic_mode(struct kvm_lapic *apic)
|
||||
{
|
||||
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
|
||||
}
|
||||
|
||||
static inline int kvm_apic_id(struct kvm_lapic *apic)
|
||||
{
|
||||
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
|
||||
}
|
||||
|
||||
static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
|
||||
void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
|
||||
struct kvm_lapic_irq *irq,
|
||||
u64 *eoi_exit_bitmap)
|
||||
{
|
||||
u16 cid;
|
||||
ldr >>= 32 - map->ldr_bits;
|
||||
cid = (ldr >> map->cid_shift) & map->cid_mask;
|
||||
struct kvm_lapic **dst;
|
||||
struct kvm_apic_map *map;
|
||||
unsigned long bitmap = 1;
|
||||
int i;
|
||||
|
||||
BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
|
||||
rcu_read_lock();
|
||||
map = rcu_dereference(vcpu->kvm->arch.apic_map);
|
||||
|
||||
return cid;
|
||||
}
|
||||
if (unlikely(!map)) {
|
||||
__set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
|
||||
{
|
||||
ldr >>= (32 - map->ldr_bits);
|
||||
return ldr & map->lid_mask;
|
||||
if (irq->dest_mode == 0) { /* physical mode */
|
||||
if (irq->delivery_mode == APIC_DM_LOWEST ||
|
||||
irq->dest_id == 0xff) {
|
||||
__set_bit(irq->vector,
|
||||
(unsigned long *)eoi_exit_bitmap);
|
||||
goto out;
|
||||
}
|
||||
dst = &map->phys_map[irq->dest_id & 0xff];
|
||||
} else {
|
||||
u32 mda = irq->dest_id << (32 - map->ldr_bits);
|
||||
|
||||
dst = map->logical_map[apic_cluster_id(map, mda)];
|
||||
|
||||
bitmap = apic_logical_id(map, mda);
|
||||
}
|
||||
|
||||
for_each_set_bit(i, &bitmap, 16) {
|
||||
if (!dst[i])
|
||||
continue;
|
||||
if (dst[i]->vcpu == vcpu) {
|
||||
__set_bit(irq->vector,
|
||||
(unsigned long *)eoi_exit_bitmap);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void recalculate_apic_map(struct kvm *kvm)
|
||||
@ -230,6 +255,8 @@ static void recalculate_apic_map(struct kvm *kvm)
|
||||
|
||||
if (old)
|
||||
kfree_rcu(old, rcu);
|
||||
|
||||
kvm_ioapic_make_eoibitmap_request(kvm);
|
||||
}
|
||||
|
||||
static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
|
||||
@ -345,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
|
||||
{
|
||||
int result;
|
||||
|
||||
/*
|
||||
* Note that irr_pending is just a hint. It will be always
|
||||
* true with virtual interrupt delivery enabled.
|
||||
*/
|
||||
if (!apic->irr_pending)
|
||||
return -1;
|
||||
|
||||
@ -461,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
|
||||
static inline int apic_find_highest_isr(struct kvm_lapic *apic)
|
||||
{
|
||||
int result;
|
||||
|
||||
/* Note that isr_count is always 1 with vid enabled */
|
||||
if (!apic->isr_count)
|
||||
return -1;
|
||||
if (likely(apic->highest_isr_cache != -1))
|
||||
@ -740,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
|
||||
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
|
||||
}
|
||||
|
||||
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
|
||||
{
|
||||
if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
|
||||
kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
|
||||
int trigger_mode;
|
||||
if (apic_test_vector(vector, apic->regs + APIC_TMR))
|
||||
trigger_mode = IOAPIC_LEVEL_TRIG;
|
||||
else
|
||||
trigger_mode = IOAPIC_EDGE_TRIG;
|
||||
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
|
||||
}
|
||||
}
|
||||
|
||||
static int apic_set_eoi(struct kvm_lapic *apic)
|
||||
{
|
||||
int vector = apic_find_highest_isr(apic);
|
||||
@ -756,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic)
|
||||
apic_clear_isr(vector, apic);
|
||||
apic_update_ppr(apic);
|
||||
|
||||
if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
|
||||
kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
|
||||
int trigger_mode;
|
||||
if (apic_test_vector(vector, apic->regs + APIC_TMR))
|
||||
trigger_mode = IOAPIC_LEVEL_TRIG;
|
||||
else
|
||||
trigger_mode = IOAPIC_EDGE_TRIG;
|
||||
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
|
||||
}
|
||||
kvm_ioapic_send_eoi(apic, vector);
|
||||
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
|
||||
return vector;
|
||||
}
|
||||
|
||||
/*
|
||||
* this interface assumes a trap-like exit, which has already finished
|
||||
* desired side effect including vISR and vPPR update.
|
||||
*/
|
||||
void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
trace_kvm_eoi(apic, vector);
|
||||
|
||||
kvm_ioapic_send_eoi(apic, vector);
|
||||
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
|
||||
|
||||
static void apic_send_ipi(struct kvm_lapic *apic)
|
||||
{
|
||||
u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
|
||||
@ -1212,6 +1265,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
|
||||
|
||||
/* emulate APIC access in a trap manner */
|
||||
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
|
||||
{
|
||||
u32 val = 0;
|
||||
|
||||
/* hw has done the conditional check and inst decode */
|
||||
offset &= 0xff0;
|
||||
|
||||
apic_reg_read(vcpu->arch.apic, offset, 4, &val);
|
||||
|
||||
/* TODO: optimize to just emulate side effect w/o one more write */
|
||||
apic_reg_write(vcpu->arch.apic, offset, val);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
|
||||
|
||||
void kvm_free_lapic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
@ -1288,6 +1356,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
{
|
||||
u64 old_value = vcpu->arch.apic_base;
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
if (!apic) {
|
||||
@ -1309,11 +1378,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
value &= ~MSR_IA32_APICBASE_BSP;
|
||||
|
||||
vcpu->arch.apic_base = value;
|
||||
if (apic_x2apic_mode(apic)) {
|
||||
u32 id = kvm_apic_id(apic);
|
||||
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
|
||||
kvm_apic_set_ldr(apic, ldr);
|
||||
if ((old_value ^ value) & X2APIC_ENABLE) {
|
||||
if (value & X2APIC_ENABLE) {
|
||||
u32 id = kvm_apic_id(apic);
|
||||
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
|
||||
kvm_apic_set_ldr(apic, ldr);
|
||||
kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
|
||||
} else
|
||||
kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
|
||||
}
|
||||
|
||||
apic->base_address = apic->vcpu->arch.apic_base &
|
||||
MSR_IA32_APICBASE_BASE;
|
||||
|
||||
@ -1359,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
||||
apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
|
||||
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
|
||||
}
|
||||
apic->irr_pending = false;
|
||||
apic->isr_count = 0;
|
||||
apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
|
||||
apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm);
|
||||
apic->highest_isr_cache = -1;
|
||||
update_divide_count(apic);
|
||||
atomic_set(&apic->lapic_timer.pending, 0);
|
||||
@ -1575,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
|
||||
update_divide_count(apic);
|
||||
start_apic_timer(apic);
|
||||
apic->irr_pending = true;
|
||||
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
|
||||
apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
|
||||
1 : count_vectors(apic->regs + APIC_ISR);
|
||||
apic->highest_isr_cache = -1;
|
||||
kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
}
|
||||
|
||||
|
@ -64,6 +64,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
|
||||
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
|
||||
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
|
||||
|
||||
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
|
||||
void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
|
||||
|
||||
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
|
||||
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
|
||||
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
|
||||
@ -124,4 +127,35 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
|
||||
return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
|
||||
}
|
||||
|
||||
static inline int apic_x2apic_mode(struct kvm_lapic *apic)
|
||||
{
|
||||
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
|
||||
}
|
||||
|
||||
static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
|
||||
{
|
||||
return kvm_x86_ops->vm_has_apicv(kvm);
|
||||
}
|
||||
|
||||
static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
|
||||
{
|
||||
u16 cid;
|
||||
ldr >>= 32 - map->ldr_bits;
|
||||
cid = (ldr >> map->cid_shift) & map->cid_mask;
|
||||
|
||||
BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
|
||||
|
||||
return cid;
|
||||
}
|
||||
|
||||
static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
|
||||
{
|
||||
ldr >>= (32 - map->ldr_bits);
|
||||
return ldr & map->lid_mask;
|
||||
}
|
||||
|
||||
void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
|
||||
struct kvm_lapic_irq *irq,
|
||||
u64 *eoi_bitmap);
|
||||
|
||||
#endif
|
||||
|
@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte)
|
||||
|
||||
static bool spte_is_locklessly_modifiable(u64 spte)
|
||||
{
|
||||
return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));
|
||||
return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
|
||||
(SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
|
||||
}
|
||||
|
||||
static bool spte_has_volatile_bits(u64 spte)
|
||||
@ -831,8 +832,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
|
||||
if (host_level == PT_PAGE_TABLE_LEVEL)
|
||||
return host_level;
|
||||
|
||||
max_level = kvm_x86_ops->get_lpage_level() < host_level ?
|
||||
kvm_x86_ops->get_lpage_level() : host_level;
|
||||
max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
|
||||
|
||||
for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
|
||||
if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
|
||||
@ -1142,7 +1142,7 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
|
||||
}
|
||||
|
||||
static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
|
||||
int level, bool pt_protect)
|
||||
bool pt_protect)
|
||||
{
|
||||
u64 *sptep;
|
||||
struct rmap_iterator iter;
|
||||
@ -1180,7 +1180,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
while (mask) {
|
||||
rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
|
||||
PT_PAGE_TABLE_LEVEL, slot);
|
||||
__rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false);
|
||||
__rmap_write_protect(kvm, rmapp, false);
|
||||
|
||||
/* clear the first set bit */
|
||||
mask &= mask - 1;
|
||||
@ -1199,7 +1199,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
|
||||
for (i = PT_PAGE_TABLE_LEVEL;
|
||||
i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
|
||||
rmapp = __gfn_to_rmap(gfn, i, slot);
|
||||
write_protected |= __rmap_write_protect(kvm, rmapp, i, true);
|
||||
write_protected |= __rmap_write_protect(kvm, rmapp, true);
|
||||
}
|
||||
|
||||
return write_protected;
|
||||
@ -1460,28 +1460,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
|
||||
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the sp from shadow page cache, after call it,
|
||||
* we can not find this sp from the cache, and the shadow
|
||||
* page table is still valid.
|
||||
* It should be under the protection of mmu lock.
|
||||
*/
|
||||
static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp)
|
||||
static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
|
||||
{
|
||||
ASSERT(is_empty_shadow_page(sp->spt));
|
||||
hlist_del(&sp->hash_link);
|
||||
if (!sp->role.direct)
|
||||
free_page((unsigned long)sp->gfns);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the shadow page table and the sp, we can do it
|
||||
* out of the protection of mmu lock.
|
||||
*/
|
||||
static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
|
||||
{
|
||||
list_del(&sp->link);
|
||||
free_page((unsigned long)sp->spt);
|
||||
if (!sp->role.direct)
|
||||
free_page((unsigned long)sp->gfns);
|
||||
kmem_cache_free(mmu_page_header_cache, sp);
|
||||
}
|
||||
|
||||
@ -1522,7 +1508,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
|
||||
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
|
||||
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
|
||||
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
|
||||
bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM);
|
||||
sp->parent_ptes = 0;
|
||||
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
|
||||
kvm_mod_used_mmu_pages(vcpu->kvm, +1);
|
||||
@ -1973,9 +1958,9 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
|
||||
{
|
||||
u64 spte;
|
||||
|
||||
spte = __pa(sp->spt)
|
||||
| PT_PRESENT_MASK | PT_ACCESSED_MASK
|
||||
| PT_WRITABLE_MASK | PT_USER_MASK;
|
||||
spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
|
||||
shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
|
||||
|
||||
mmu_spte_set(sptep, spte);
|
||||
}
|
||||
|
||||
@ -2126,7 +2111,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
|
||||
do {
|
||||
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
|
||||
WARN_ON(!sp->role.invalid || sp->root_count);
|
||||
kvm_mmu_isolate_page(sp);
|
||||
kvm_mmu_free_page(sp);
|
||||
} while (!list_empty(invalid_list));
|
||||
}
|
||||
@ -2144,6 +2128,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
|
||||
* change the value
|
||||
*/
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
|
||||
while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages &&
|
||||
!list_empty(&kvm->arch.active_mmu_pages)) {
|
||||
@ -2158,6 +2144,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
|
||||
}
|
||||
|
||||
kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
||||
@ -2183,14 +2171,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
|
||||
|
||||
static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
|
||||
{
|
||||
int slot = memslot_id(kvm, gfn);
|
||||
struct kvm_mmu_page *sp = page_header(__pa(pte));
|
||||
|
||||
__set_bit(slot, sp->slot_bitmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* The function is based on mtrr_type_lookup() in
|
||||
* arch/x86/kernel/cpu/mtrr/generic.c
|
||||
@ -2332,9 +2312,8 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
if (s->role.level != PT_PAGE_TABLE_LEVEL)
|
||||
return 1;
|
||||
|
||||
if (!need_unsync && !s->unsync) {
|
||||
if (!s->unsync)
|
||||
need_unsync = true;
|
||||
}
|
||||
}
|
||||
if (need_unsync)
|
||||
kvm_unsync_pages(vcpu, gfn);
|
||||
@ -2342,8 +2321,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
}
|
||||
|
||||
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
unsigned pte_access, int user_fault,
|
||||
int write_fault, int level,
|
||||
unsigned pte_access, int level,
|
||||
gfn_t gfn, pfn_t pfn, bool speculative,
|
||||
bool can_unsync, bool host_writable)
|
||||
{
|
||||
@ -2378,20 +2356,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
|
||||
spte |= (u64)pfn << PAGE_SHIFT;
|
||||
|
||||
if ((pte_access & ACC_WRITE_MASK)
|
||||
|| (!vcpu->arch.mmu.direct_map && write_fault
|
||||
&& !is_write_protection(vcpu) && !user_fault)) {
|
||||
if (pte_access & ACC_WRITE_MASK) {
|
||||
|
||||
/*
|
||||
* There are two cases:
|
||||
* - the one is other vcpu creates new sp in the window
|
||||
* between mapping_level() and acquiring mmu-lock.
|
||||
* - the another case is the new sp is created by itself
|
||||
* (page-fault path) when guest uses the target gfn as
|
||||
* its page table.
|
||||
* Both of these cases can be fixed by allowing guest to
|
||||
* retry the access, it will refault, then we can establish
|
||||
* the mapping by using small page.
|
||||
* Other vcpu creates new sp in the window between
|
||||
* mapping_level() and acquiring mmu-lock. We can
|
||||
* allow guest to retry the access, the mapping can
|
||||
* be fixed if guest refault.
|
||||
*/
|
||||
if (level > PT_PAGE_TABLE_LEVEL &&
|
||||
has_wrprotected_page(vcpu->kvm, gfn, level))
|
||||
@ -2399,19 +2370,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
|
||||
spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
|
||||
|
||||
if (!vcpu->arch.mmu.direct_map
|
||||
&& !(pte_access & ACC_WRITE_MASK)) {
|
||||
spte &= ~PT_USER_MASK;
|
||||
/*
|
||||
* If we converted a user page to a kernel page,
|
||||
* so that the kernel can write to it when cr0.wp=0,
|
||||
* then we should prevent the kernel from executing it
|
||||
* if SMEP is enabled.
|
||||
*/
|
||||
if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
|
||||
spte |= PT64_NX_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Optimization: for pte sync, if spte was writable the hash
|
||||
* lookup is unnecessary (and expensive). Write protection
|
||||
@ -2441,19 +2399,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
}
|
||||
|
||||
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
unsigned pt_access, unsigned pte_access,
|
||||
int user_fault, int write_fault,
|
||||
int *emulate, int level, gfn_t gfn,
|
||||
pfn_t pfn, bool speculative,
|
||||
unsigned pte_access, int write_fault, int *emulate,
|
||||
int level, gfn_t gfn, pfn_t pfn, bool speculative,
|
||||
bool host_writable)
|
||||
{
|
||||
int was_rmapped = 0;
|
||||
int rmap_count;
|
||||
|
||||
pgprintk("%s: spte %llx access %x write_fault %d"
|
||||
" user_fault %d gfn %llx\n",
|
||||
__func__, *sptep, pt_access,
|
||||
write_fault, user_fault, gfn);
|
||||
pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
|
||||
*sptep, write_fault, gfn);
|
||||
|
||||
if (is_rmap_spte(*sptep)) {
|
||||
/*
|
||||
@ -2477,9 +2431,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
was_rmapped = 1;
|
||||
}
|
||||
|
||||
if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
|
||||
level, gfn, pfn, speculative, true,
|
||||
host_writable)) {
|
||||
if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
|
||||
true, host_writable)) {
|
||||
if (write_fault)
|
||||
*emulate = 1;
|
||||
kvm_mmu_flush_tlb(vcpu);
|
||||
@ -2497,7 +2450,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
++vcpu->kvm->stat.lpages;
|
||||
|
||||
if (is_shadow_present_pte(*sptep)) {
|
||||
page_header_update_slot(vcpu->kvm, sptep, gfn);
|
||||
if (!was_rmapped) {
|
||||
rmap_count = rmap_add(vcpu, sptep, gfn);
|
||||
if (rmap_count > RMAP_RECYCLE_THRESHOLD)
|
||||
@ -2571,10 +2523,9 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < ret; i++, gfn++, start++)
|
||||
mmu_set_spte(vcpu, start, ACC_ALL,
|
||||
access, 0, 0, NULL,
|
||||
sp->role.level, gfn,
|
||||
page_to_pfn(pages[i]), true, true);
|
||||
mmu_set_spte(vcpu, start, access, 0, NULL,
|
||||
sp->role.level, gfn, page_to_pfn(pages[i]),
|
||||
true, true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2633,11 +2584,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
|
||||
|
||||
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
|
||||
if (iterator.level == level) {
|
||||
unsigned pte_access = ACC_ALL;
|
||||
|
||||
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
|
||||
0, write, &emulate,
|
||||
level, gfn, pfn, prefault, map_writable);
|
||||
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
|
||||
write, &emulate, level, gfn, pfn,
|
||||
prefault, map_writable);
|
||||
direct_pte_prefetch(vcpu, iterator.sptep);
|
||||
++vcpu->stat.pf_fixed;
|
||||
break;
|
||||
@ -2652,11 +2601,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
|
||||
iterator.level - 1,
|
||||
1, ACC_ALL, iterator.sptep);
|
||||
|
||||
mmu_spte_set(iterator.sptep,
|
||||
__pa(sp->spt)
|
||||
| PT_PRESENT_MASK | PT_WRITABLE_MASK
|
||||
| shadow_user_mask | shadow_x_mask
|
||||
| shadow_accessed_mask);
|
||||
link_shadow_page(iterator.sptep, sp);
|
||||
}
|
||||
}
|
||||
return emulate;
|
||||
@ -3719,6 +3664,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
|
||||
else
|
||||
r = paging32_init_context(vcpu, context);
|
||||
|
||||
vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
|
||||
vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
|
||||
vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
|
||||
vcpu->arch.mmu.base_role.smep_andnot_wp
|
||||
@ -3885,7 +3831,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
|
||||
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
|
||||
*gpa &= ~(gpa_t)7;
|
||||
*bytes = 8;
|
||||
r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8));
|
||||
r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8);
|
||||
if (r)
|
||||
gentry = 0;
|
||||
new = (const u8 *)&gentry;
|
||||
@ -4039,7 +3985,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
|
||||
& mask.word) && rmap_can_add(vcpu))
|
||||
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
|
||||
if (!remote_flush && need_remote_flush(entry, *spte))
|
||||
if (need_remote_flush(entry, *spte))
|
||||
remote_flush = true;
|
||||
++spte;
|
||||
}
|
||||
@ -4198,26 +4144,36 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
bool flush = false;
|
||||
struct kvm_memory_slot *memslot;
|
||||
gfn_t last_gfn;
|
||||
int i;
|
||||
|
||||
list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
|
||||
int i;
|
||||
u64 *pt;
|
||||
memslot = id_to_memslot(kvm->memslots, slot);
|
||||
last_gfn = memslot->base_gfn + memslot->npages - 1;
|
||||
|
||||
if (!test_bit(slot, sp->slot_bitmap))
|
||||
continue;
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
pt = sp->spt;
|
||||
for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
|
||||
if (!is_shadow_present_pte(pt[i]) ||
|
||||
!is_last_spte(pt[i], sp->role.level))
|
||||
continue;
|
||||
for (i = PT_PAGE_TABLE_LEVEL;
|
||||
i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
|
||||
unsigned long *rmapp;
|
||||
unsigned long last_index, index;
|
||||
|
||||
spte_write_protect(kvm, &pt[i], &flush, false);
|
||||
rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
|
||||
last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
|
||||
|
||||
for (index = 0; index <= last_index; ++index, ++rmapp) {
|
||||
if (*rmapp)
|
||||
__rmap_write_protect(kvm, rmapp, false);
|
||||
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
|
@ -195,12 +195,6 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
|
||||
TP_ARGS(sp)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_delay_free_pages,
|
||||
TP_PROTO(struct kvm_mmu_page *sp),
|
||||
|
||||
TP_ARGS(sp)
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
mark_mmio_spte,
|
||||
TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access),
|
||||
|
@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
||||
pt_element_t pte;
|
||||
pt_element_t __user *uninitialized_var(ptep_user);
|
||||
gfn_t table_gfn;
|
||||
unsigned index, pt_access, pte_access, accessed_dirty, shift;
|
||||
unsigned index, pt_access, pte_access, accessed_dirty;
|
||||
gpa_t pte_gpa;
|
||||
int offset;
|
||||
const int write_fault = access & PFERR_WRITE_MASK;
|
||||
@ -249,16 +249,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
||||
|
||||
if (!write_fault)
|
||||
protect_clean_gpte(&pte_access, pte);
|
||||
|
||||
/*
|
||||
* On a write fault, fold the dirty bit into accessed_dirty by shifting it one
|
||||
* place right.
|
||||
*
|
||||
* On a read fault, do nothing.
|
||||
*/
|
||||
shift = write_fault >> ilog2(PFERR_WRITE_MASK);
|
||||
shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
|
||||
accessed_dirty &= pte >> shift;
|
||||
else
|
||||
/*
|
||||
* On a write fault, fold the dirty bit into accessed_dirty by
|
||||
* shifting it one place right.
|
||||
*/
|
||||
accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT);
|
||||
|
||||
if (unlikely(!accessed_dirty)) {
|
||||
ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
|
||||
@ -330,8 +326,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
* we call mmu_set_spte() with host_writable = true because
|
||||
* pte_prefetch_gfn_to_pfn always gets a writable pfn.
|
||||
*/
|
||||
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
||||
NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true);
|
||||
mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL,
|
||||
gfn, pfn, true, true);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -405,7 +401,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
||||
*/
|
||||
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
struct guest_walker *gw,
|
||||
int user_fault, int write_fault, int hlevel,
|
||||
int write_fault, int hlevel,
|
||||
pfn_t pfn, bool map_writable, bool prefault)
|
||||
{
|
||||
struct kvm_mmu_page *sp = NULL;
|
||||
@ -413,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
unsigned direct_access, access = gw->pt_access;
|
||||
int top_level, emulate = 0;
|
||||
|
||||
if (!is_present_gpte(gw->ptes[gw->level - 1]))
|
||||
return 0;
|
||||
|
||||
direct_access = gw->pte_access;
|
||||
|
||||
top_level = vcpu->arch.mmu.root_level;
|
||||
@ -477,9 +470,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
}
|
||||
|
||||
clear_sp_write_flooding_count(it.sptep);
|
||||
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
|
||||
user_fault, write_fault, &emulate, it.level,
|
||||
gw->gfn, pfn, prefault, map_writable);
|
||||
mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate,
|
||||
it.level, gw->gfn, pfn, prefault, map_writable);
|
||||
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
|
||||
|
||||
return emulate;
|
||||
@ -491,6 +483,46 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* To see whether the mapped gfn can write its page table in the current
|
||||
* mapping.
|
||||
*
|
||||
* It is the helper function of FNAME(page_fault). When guest uses large page
|
||||
* size to map the writable gfn which is used as current page table, we should
|
||||
* force kvm to use small page size to map it because new shadow page will be
|
||||
* created when kvm establishes shadow page table that stop kvm using large
|
||||
* page size. Do it early can avoid unnecessary #PF and emulation.
|
||||
*
|
||||
* @write_fault_to_shadow_pgtable will return true if the fault gfn is
|
||||
* currently used as its page table.
|
||||
*
|
||||
* Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
|
||||
* since the PDPT is always shadowed, that means, we can not use large page
|
||||
* size to map the gfn which is used as PDPT.
|
||||
*/
|
||||
static bool
|
||||
FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
|
||||
struct guest_walker *walker, int user_fault,
|
||||
bool *write_fault_to_shadow_pgtable)
|
||||
{
|
||||
int level;
|
||||
gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
|
||||
bool self_changed = false;
|
||||
|
||||
if (!(walker->pte_access & ACC_WRITE_MASK ||
|
||||
(!is_write_protection(vcpu) && !user_fault)))
|
||||
return false;
|
||||
|
||||
for (level = walker->level; level <= walker->max_level; level++) {
|
||||
gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
|
||||
|
||||
self_changed |= !(gfn & mask);
|
||||
*write_fault_to_shadow_pgtable |= !gfn;
|
||||
}
|
||||
|
||||
return self_changed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Page fault handler. There are several causes for a page fault:
|
||||
* - there is no shadow pte for the guest pte
|
||||
@ -516,7 +548,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
int level = PT_PAGE_TABLE_LEVEL;
|
||||
int force_pt_level;
|
||||
unsigned long mmu_seq;
|
||||
bool map_writable;
|
||||
bool map_writable, is_self_change_mapping;
|
||||
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
|
||||
|
||||
@ -544,8 +576,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
return 0;
|
||||
}
|
||||
|
||||
vcpu->arch.write_fault_to_shadow_pgtable = false;
|
||||
|
||||
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
|
||||
&walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
|
||||
|
||||
if (walker.level >= PT_DIRECTORY_LEVEL)
|
||||
force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn);
|
||||
force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
|
||||
|| is_self_change_mapping;
|
||||
else
|
||||
force_pt_level = 1;
|
||||
if (!force_pt_level) {
|
||||
@ -564,6 +602,26 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
walker.gfn, pfn, walker.pte_access, &r))
|
||||
return r;
|
||||
|
||||
/*
|
||||
* Do not change pte_access if the pfn is a mmio page, otherwise
|
||||
* we will cache the incorrect access into mmio spte.
|
||||
*/
|
||||
if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) &&
|
||||
!is_write_protection(vcpu) && !user_fault &&
|
||||
!is_noslot_pfn(pfn)) {
|
||||
walker.pte_access |= ACC_WRITE_MASK;
|
||||
walker.pte_access &= ~ACC_USER_MASK;
|
||||
|
||||
/*
|
||||
* If we converted a user page to a kernel page,
|
||||
* so that the kernel can write to it when cr0.wp=0,
|
||||
* then we should prevent the kernel from executing it
|
||||
* if SMEP is enabled.
|
||||
*/
|
||||
if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
|
||||
walker.pte_access &= ~ACC_EXEC_MASK;
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
@ -572,7 +630,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
if (!force_pt_level)
|
||||
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
|
||||
r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
|
||||
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
|
||||
level, pfn, map_writable, prefault);
|
||||
++vcpu->stat.pf_fixed;
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
|
||||
@ -747,7 +805,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
|
||||
host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
|
||||
|
||||
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
|
||||
set_spte(vcpu, &sp->spt[i], pte_access,
|
||||
PT_PAGE_TABLE_LEVEL, gfn,
|
||||
spte_to_pfn(sp->spt[i]), true, false,
|
||||
host_writable);
|
||||
|
@ -3571,6 +3571,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
|
||||
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
|
||||
}
|
||||
|
||||
static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static int svm_vm_has_apicv(struct kvm *kvm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@ -4290,6 +4310,10 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.enable_nmi_window = enable_nmi_window,
|
||||
.enable_irq_window = enable_irq_window,
|
||||
.update_cr8_intercept = update_cr8_intercept,
|
||||
.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
|
||||
.vm_has_apicv = svm_vm_has_apicv,
|
||||
.load_eoi_exitmap = svm_load_eoi_exitmap,
|
||||
.hwapic_isr_update = svm_hwapic_isr_update,
|
||||
|
||||
.set_tss_addr = svm_set_tss_addr,
|
||||
.get_tdp_level = get_npt_level,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -872,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
|
||||
kvm_x86_ops->set_efer(vcpu, efer);
|
||||
|
||||
vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
|
||||
|
||||
/* Update reserved bits */
|
||||
if ((efer ^ old_efer) & EFER_NX)
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
@ -2522,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
r = KVM_MAX_VCPUS;
|
||||
break;
|
||||
case KVM_CAP_NR_MEMSLOTS:
|
||||
r = KVM_MEMORY_SLOTS;
|
||||
r = KVM_USER_MEM_SLOTS;
|
||||
break;
|
||||
case KVM_CAP_PV_MMU: /* obsolete */
|
||||
r = 0;
|
||||
@ -3274,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
|
||||
kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return 0;
|
||||
}
|
||||
@ -3439,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = -EINVAL;
|
||||
if (log->slot >= KVM_MEMORY_SLOTS)
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
@ -4495,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
|
||||
kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
|
||||
*selector = var.selector;
|
||||
|
||||
if (var.unusable)
|
||||
if (var.unusable) {
|
||||
memset(desc, 0, sizeof(*desc));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (var.g)
|
||||
var.limit >>= 12;
|
||||
@ -4757,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
|
||||
return r;
|
||||
}
|
||||
|
||||
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
|
||||
bool write_fault_to_shadow_pgtable)
|
||||
{
|
||||
gpa_t gpa;
|
||||
gpa_t gpa = cr2;
|
||||
pfn_t pfn;
|
||||
|
||||
if (tdp_enabled)
|
||||
return false;
|
||||
if (!vcpu->arch.mmu.direct_map) {
|
||||
/*
|
||||
* Write permission should be allowed since only
|
||||
* write access need to be emulated.
|
||||
*/
|
||||
gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
|
||||
|
||||
/*
|
||||
* if emulation was due to access to shadowed page table
|
||||
* and it failed try to unshadow page and re-enter the
|
||||
* guest to let CPU execute the instruction.
|
||||
*/
|
||||
if (kvm_mmu_unprotect_page_virt(vcpu, gva))
|
||||
return true;
|
||||
|
||||
gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
|
||||
|
||||
if (gpa == UNMAPPED_GVA)
|
||||
return true; /* let cpu generate fault */
|
||||
/*
|
||||
* If the mapping is invalid in guest, let cpu retry
|
||||
* it to generate fault.
|
||||
*/
|
||||
if (gpa == UNMAPPED_GVA)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do not retry the unhandleable instruction if it faults on the
|
||||
@ -4785,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
* instruction -> ...
|
||||
*/
|
||||
pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
|
||||
if (!is_error_noslot_pfn(pfn)) {
|
||||
kvm_release_pfn_clean(pfn);
|
||||
|
||||
/*
|
||||
* If the instruction failed on the error pfn, it can not be fixed,
|
||||
* report the error to userspace.
|
||||
*/
|
||||
if (is_error_noslot_pfn(pfn))
|
||||
return false;
|
||||
|
||||
kvm_release_pfn_clean(pfn);
|
||||
|
||||
/* The instructions are well-emulated on direct mmu. */
|
||||
if (vcpu->arch.mmu.direct_map) {
|
||||
unsigned int indirect_shadow_pages;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (indirect_shadow_pages)
|
||||
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
/*
|
||||
* if emulation was due to access to shadowed page table
|
||||
* and it failed try to unshadow page and re-enter the
|
||||
* guest to let CPU execute the instruction.
|
||||
*/
|
||||
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
|
||||
|
||||
/*
|
||||
* If the access faults on its page table, it can not
|
||||
* be fixed by unprotecting shadow page and it should
|
||||
* be reported to userspace.
|
||||
*/
|
||||
return !write_fault_to_shadow_pgtable;
|
||||
}
|
||||
|
||||
static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
|
||||
@ -4832,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
|
||||
if (!vcpu->arch.mmu.direct_map)
|
||||
gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
|
||||
|
||||
kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
|
||||
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -4849,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
int r;
|
||||
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
|
||||
bool writeback = true;
|
||||
bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
|
||||
|
||||
/*
|
||||
* Clear write_fault_to_shadow_pgtable here to ensure it is
|
||||
* never reused.
|
||||
*/
|
||||
vcpu->arch.write_fault_to_shadow_pgtable = false;
|
||||
kvm_clear_exception_queue(vcpu);
|
||||
|
||||
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
|
||||
@ -4868,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
if (r != EMULATION_OK) {
|
||||
if (emulation_type & EMULTYPE_TRAP_UD)
|
||||
return EMULATE_FAIL;
|
||||
if (reexecute_instruction(vcpu, cr2))
|
||||
if (reexecute_instruction(vcpu, cr2,
|
||||
write_fault_to_spt))
|
||||
return EMULATE_DONE;
|
||||
if (emulation_type & EMULTYPE_SKIP)
|
||||
return EMULATE_FAIL;
|
||||
@ -4898,7 +4934,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
return EMULATE_DONE;
|
||||
|
||||
if (r == EMULATION_FAILED) {
|
||||
if (reexecute_instruction(vcpu, cr2))
|
||||
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
|
||||
return EMULATE_DONE;
|
||||
|
||||
return handle_emulation_failure(vcpu);
|
||||
@ -5541,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.nmi_injected = true;
|
||||
kvm_x86_ops->set_nmi(vcpu);
|
||||
}
|
||||
} else if (kvm_cpu_has_interrupt(vcpu)) {
|
||||
} else if (kvm_cpu_has_injectable_intr(vcpu)) {
|
||||
if (kvm_x86_ops->interrupt_allowed(vcpu)) {
|
||||
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
|
||||
false);
|
||||
@ -5609,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
|
||||
#endif
|
||||
}
|
||||
|
||||
static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 eoi_exit_bitmap[4];
|
||||
|
||||
memset(eoi_exit_bitmap, 0, 32);
|
||||
|
||||
kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
|
||||
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
|
||||
}
|
||||
|
||||
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
@ -5662,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
kvm_handle_pmu_event(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_PMI, vcpu))
|
||||
kvm_deliver_pmi(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
|
||||
update_eoi_exitmap(vcpu);
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
|
||||
@ -5670,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
/* enable NMI/IRQ window open exits if needed */
|
||||
if (vcpu->arch.nmi_pending)
|
||||
kvm_x86_ops->enable_nmi_window(vcpu);
|
||||
else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
|
||||
else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
|
||||
kvm_x86_ops->enable_irq_window(vcpu);
|
||||
|
||||
if (kvm_lapic_enabled(vcpu)) {
|
||||
/*
|
||||
* Update architecture specific hints for APIC
|
||||
* virtual interrupt delivery.
|
||||
*/
|
||||
if (kvm_x86_ops->hwapic_irr_update)
|
||||
kvm_x86_ops->hwapic_irr_update(vcpu,
|
||||
kvm_lapic_find_highest_irr(vcpu));
|
||||
update_cr8_intercept(vcpu);
|
||||
kvm_lapic_sync_to_vapic(vcpu);
|
||||
}
|
||||
@ -6853,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_memory_slot old,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
int npages = memslot->npages;
|
||||
int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||
|
||||
/* Prevent internal slot pages from being moved by fork()/COW. */
|
||||
if (memslot->id >= KVM_MEMORY_SLOTS)
|
||||
map_flags = MAP_SHARED | MAP_ANONYMOUS;
|
||||
|
||||
/*To keep backward compatibility with older userspace,
|
||||
*x86 needs to handle !user_alloc case.
|
||||
/*
|
||||
* Only private memory slots need to be mapped here since
|
||||
* KVM_SET_MEMORY_REGION ioctl is no longer supported.
|
||||
*/
|
||||
if (!user_alloc) {
|
||||
if (npages && !old.npages) {
|
||||
unsigned long userspace_addr;
|
||||
if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
|
||||
unsigned long userspace_addr;
|
||||
|
||||
userspace_addr = vm_mmap(NULL, 0,
|
||||
npages * PAGE_SIZE,
|
||||
PROT_READ | PROT_WRITE,
|
||||
map_flags,
|
||||
0);
|
||||
/*
|
||||
* MAP_SHARED to prevent internal slot pages from being moved
|
||||
* by fork()/COW.
|
||||
*/
|
||||
userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS, 0);
|
||||
|
||||
if (IS_ERR((void *)userspace_addr))
|
||||
return PTR_ERR((void *)userspace_addr);
|
||||
if (IS_ERR((void *)userspace_addr))
|
||||
return PTR_ERR((void *)userspace_addr);
|
||||
|
||||
memslot->userspace_addr = userspace_addr;
|
||||
}
|
||||
memslot->userspace_addr = userspace_addr;
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot old,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
|
||||
int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
|
||||
|
||||
if (!user_alloc && !old.user_alloc && old.npages && !npages) {
|
||||
if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
|
||||
int ret;
|
||||
|
||||
ret = vm_munmap(old.userspace_addr,
|
||||
@ -6908,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
if (!kvm->arch.n_requested_mmu_pages)
|
||||
nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (nr_mmu_pages)
|
||||
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
|
||||
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
/*
|
||||
* Write protect all pages for dirty logging.
|
||||
* Existing largepage mappings are destroyed here and new ones will
|
||||
* not be created until the end of the logging.
|
||||
*/
|
||||
if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
|
||||
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
|
||||
/*
|
||||
* If memory slot is created, or moved, we need to clear all
|
||||
* mmio sptes.
|
||||
|
@ -6,4 +6,4 @@
|
||||
# it under the terms of the GNU General Public License (version 2 only)
|
||||
# as published by the Free Software Foundation.
|
||||
|
||||
obj-$(CONFIG_S390_GUEST) += kvm_virtio.o
|
||||
obj-$(CONFIG_S390_GUEST) += kvm_virtio.o virtio_ccw.o
|
||||
|
@ -421,6 +421,26 @@ static void kvm_extint_handler(struct ext_code ext_code,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For s390-virtio, we expect a page above main storage containing
|
||||
* the virtio configuration. Try to actually load from this area
|
||||
* in order to figure out if the host provides this page.
|
||||
*/
|
||||
static int __init test_devices_support(unsigned long addr)
|
||||
{
|
||||
int ret = -EIO;
|
||||
|
||||
asm volatile(
|
||||
"0: lura 0,%1\n"
|
||||
"1: xgr %0,%0\n"
|
||||
"2:\n"
|
||||
EX_TABLE(0b,2b)
|
||||
EX_TABLE(1b,2b)
|
||||
: "+d" (ret)
|
||||
: "a" (addr)
|
||||
: "0", "cc");
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* Init function for virtio
|
||||
* devices are in a single page above top of "normal" mem
|
||||
@ -432,21 +452,23 @@ static int __init kvm_devices_init(void)
|
||||
if (!MACHINE_IS_KVM)
|
||||
return -ENODEV;
|
||||
|
||||
if (test_devices_support(real_memory_size) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
rc = vmem_add_mapping(real_memory_size, PAGE_SIZE);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
kvm_devices = (void *) real_memory_size;
|
||||
|
||||
kvm_root = root_device_register("kvm_s390");
|
||||
if (IS_ERR(kvm_root)) {
|
||||
rc = PTR_ERR(kvm_root);
|
||||
printk(KERN_ERR "Could not register kvm_s390 root device");
|
||||
vmem_remove_mapping(real_memory_size, PAGE_SIZE);
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = vmem_add_mapping(real_memory_size, PAGE_SIZE);
|
||||
if (rc) {
|
||||
root_device_unregister(kvm_root);
|
||||
return rc;
|
||||
}
|
||||
|
||||
kvm_devices = (void *) real_memory_size;
|
||||
|
||||
INIT_WORK(&hotplug_work, hotplug_devices);
|
||||
|
||||
service_subclass_irq_register();
|
||||
|
926
drivers/s390/kvm/virtio_ccw.c
Normal file
926
drivers/s390/kvm/virtio_ccw.c
Normal file
@ -0,0 +1,926 @@
|
||||
/*
|
||||
* ccw based virtio transport
|
||||
*
|
||||
* Copyright IBM Corp. 2012
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License (version 2 only)
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/virtio.h>
|
||||
#include <linux/virtio_config.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/virtio_ring.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/async.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/kvm_para.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/cio.h>
|
||||
#include <asm/ccwdev.h>
|
||||
|
||||
/*
|
||||
* virtio related functions
|
||||
*/
|
||||
|
||||
struct vq_config_block {
|
||||
__u16 index;
|
||||
__u16 num;
|
||||
} __packed;
|
||||
|
||||
#define VIRTIO_CCW_CONFIG_SIZE 0x100
|
||||
/* same as PCI config space size, should be enough for all drivers */
|
||||
|
||||
struct virtio_ccw_device {
|
||||
struct virtio_device vdev;
|
||||
__u8 *status;
|
||||
__u8 config[VIRTIO_CCW_CONFIG_SIZE];
|
||||
struct ccw_device *cdev;
|
||||
__u32 curr_io;
|
||||
int err;
|
||||
wait_queue_head_t wait_q;
|
||||
spinlock_t lock;
|
||||
struct list_head virtqueues;
|
||||
unsigned long indicators;
|
||||
unsigned long indicators2;
|
||||
struct vq_config_block *config_block;
|
||||
};
|
||||
|
||||
struct vq_info_block {
|
||||
__u64 queue;
|
||||
__u32 align;
|
||||
__u16 index;
|
||||
__u16 num;
|
||||
} __packed;
|
||||
|
||||
struct virtio_feature_desc {
|
||||
__u32 features;
|
||||
__u8 index;
|
||||
} __packed;
|
||||
|
||||
struct virtio_ccw_vq_info {
|
||||
struct virtqueue *vq;
|
||||
int num;
|
||||
void *queue;
|
||||
struct vq_info_block *info_block;
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
#define KVM_VIRTIO_CCW_RING_ALIGN 4096
|
||||
|
||||
#define KVM_S390_VIRTIO_CCW_NOTIFY 3
|
||||
|
||||
#define CCW_CMD_SET_VQ 0x13
|
||||
#define CCW_CMD_VDEV_RESET 0x33
|
||||
#define CCW_CMD_SET_IND 0x43
|
||||
#define CCW_CMD_SET_CONF_IND 0x53
|
||||
#define CCW_CMD_READ_FEAT 0x12
|
||||
#define CCW_CMD_WRITE_FEAT 0x11
|
||||
#define CCW_CMD_READ_CONF 0x22
|
||||
#define CCW_CMD_WRITE_CONF 0x21
|
||||
#define CCW_CMD_WRITE_STATUS 0x31
|
||||
#define CCW_CMD_READ_VQ_CONF 0x32
|
||||
|
||||
#define VIRTIO_CCW_DOING_SET_VQ 0x00010000
|
||||
#define VIRTIO_CCW_DOING_RESET 0x00040000
|
||||
#define VIRTIO_CCW_DOING_READ_FEAT 0x00080000
|
||||
#define VIRTIO_CCW_DOING_WRITE_FEAT 0x00100000
|
||||
#define VIRTIO_CCW_DOING_READ_CONFIG 0x00200000
|
||||
#define VIRTIO_CCW_DOING_WRITE_CONFIG 0x00400000
|
||||
#define VIRTIO_CCW_DOING_WRITE_STATUS 0x00800000
|
||||
#define VIRTIO_CCW_DOING_SET_IND 0x01000000
|
||||
#define VIRTIO_CCW_DOING_READ_VQ_CONF 0x02000000
|
||||
#define VIRTIO_CCW_DOING_SET_CONF_IND 0x04000000
|
||||
#define VIRTIO_CCW_INTPARM_MASK 0xffff0000
|
||||
|
||||
static struct virtio_ccw_device *to_vc_device(struct virtio_device *vdev)
|
||||
{
|
||||
return container_of(vdev, struct virtio_ccw_device, vdev);
|
||||
}
|
||||
|
||||
static int doing_io(struct virtio_ccw_device *vcdev, __u32 flag)
|
||||
{
|
||||
unsigned long flags;
|
||||
__u32 ret;
|
||||
|
||||
spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags);
|
||||
if (vcdev->err)
|
||||
ret = 0;
|
||||
else
|
||||
ret = vcdev->curr_io & flag;
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(vcdev->cdev), flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ccw_io_helper(struct virtio_ccw_device *vcdev,
|
||||
struct ccw1 *ccw, __u32 intparm)
|
||||
{
|
||||
int ret;
|
||||
unsigned long flags;
|
||||
int flag = intparm & VIRTIO_CCW_INTPARM_MASK;
|
||||
|
||||
do {
|
||||
spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags);
|
||||
ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0);
|
||||
if (!ret)
|
||||
vcdev->curr_io |= flag;
|
||||
spin_unlock_irqrestore(get_ccwdev_lock(vcdev->cdev), flags);
|
||||
cpu_relax();
|
||||
} while (ret == -EBUSY);
|
||||
wait_event(vcdev->wait_q, doing_io(vcdev, flag) == 0);
|
||||
return ret ? ret : vcdev->err;
|
||||
}
|
||||
|
||||
static inline long do_kvm_notify(struct subchannel_id schid,
|
||||
unsigned long queue_index)
|
||||
{
|
||||
register unsigned long __nr asm("1") = KVM_S390_VIRTIO_CCW_NOTIFY;
|
||||
register struct subchannel_id __schid asm("2") = schid;
|
||||
register unsigned long __index asm("3") = queue_index;
|
||||
register long __rc asm("2");
|
||||
|
||||
asm volatile ("diag 2,4,0x500\n"
|
||||
: "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index)
|
||||
: "memory", "cc");
|
||||
return __rc;
|
||||
}
|
||||
|
||||
static void virtio_ccw_kvm_notify(struct virtqueue *vq)
|
||||
{
|
||||
struct virtio_ccw_vq_info *info = vq->priv;
|
||||
struct virtio_ccw_device *vcdev;
|
||||
struct subchannel_id schid;
|
||||
|
||||
vcdev = to_vc_device(info->vq->vdev);
|
||||
ccw_device_get_schid(vcdev->cdev, &schid);
|
||||
do_kvm_notify(schid, virtqueue_get_queue_index(vq));
|
||||
}
|
||||
|
||||
static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev,
|
||||
struct ccw1 *ccw, int index)
|
||||
{
|
||||
vcdev->config_block->index = index;
|
||||
ccw->cmd_code = CCW_CMD_READ_VQ_CONF;
|
||||
ccw->flags = 0;
|
||||
ccw->count = sizeof(struct vq_config_block);
|
||||
ccw->cda = (__u32)(unsigned long)(vcdev->config_block);
|
||||
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF);
|
||||
return vcdev->config_block->num;
|
||||
}
|
||||
|
||||
static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vq->vdev);
|
||||
struct virtio_ccw_vq_info *info = vq->priv;
|
||||
unsigned long flags;
|
||||
unsigned long size;
|
||||
int ret;
|
||||
unsigned int index = virtqueue_get_queue_index(vq);
|
||||
|
||||
/* Remove from our list. */
|
||||
spin_lock_irqsave(&vcdev->lock, flags);
|
||||
list_del(&info->node);
|
||||
spin_unlock_irqrestore(&vcdev->lock, flags);
|
||||
|
||||
/* Release from host. */
|
||||
info->info_block->queue = 0;
|
||||
info->info_block->align = 0;
|
||||
info->info_block->index = index;
|
||||
info->info_block->num = 0;
|
||||
ccw->cmd_code = CCW_CMD_SET_VQ;
|
||||
ccw->flags = 0;
|
||||
ccw->count = sizeof(*info->info_block);
|
||||
ccw->cda = (__u32)(unsigned long)(info->info_block);
|
||||
ret = ccw_io_helper(vcdev, ccw,
|
||||
VIRTIO_CCW_DOING_SET_VQ | index);
|
||||
/*
|
||||
* -ENODEV isn't considered an error: The device is gone anyway.
|
||||
* This may happen on device detach.
|
||||
*/
|
||||
if (ret && (ret != -ENODEV))
|
||||
dev_warn(&vq->vdev->dev, "Error %d while deleting queue %d",
|
||||
ret, index);
|
||||
|
||||
vring_del_virtqueue(vq);
|
||||
size = PAGE_ALIGN(vring_size(info->num, KVM_VIRTIO_CCW_RING_ALIGN));
|
||||
free_pages_exact(info->queue, size);
|
||||
kfree(info->info_block);
|
||||
kfree(info);
|
||||
}
|
||||
|
||||
static void virtio_ccw_del_vqs(struct virtio_device *vdev)
|
||||
{
|
||||
struct virtqueue *vq, *n;
|
||||
struct ccw1 *ccw;
|
||||
|
||||
ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
|
||||
if (!ccw)
|
||||
return;
|
||||
|
||||
|
||||
list_for_each_entry_safe(vq, n, &vdev->vqs, list)
|
||||
virtio_ccw_del_vq(vq, ccw);
|
||||
|
||||
kfree(ccw);
|
||||
}
|
||||
|
||||
static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
|
||||
int i, vq_callback_t *callback,
|
||||
const char *name,
|
||||
struct ccw1 *ccw)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
|
||||
int err;
|
||||
struct virtqueue *vq = NULL;
|
||||
struct virtio_ccw_vq_info *info;
|
||||
unsigned long size = 0; /* silence the compiler */
|
||||
unsigned long flags;
|
||||
|
||||
/* Allocate queue. */
|
||||
info = kzalloc(sizeof(struct virtio_ccw_vq_info), GFP_KERNEL);
|
||||
if (!info) {
|
||||
dev_warn(&vcdev->cdev->dev, "no info\n");
|
||||
err = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
info->info_block = kzalloc(sizeof(*info->info_block),
|
||||
GFP_DMA | GFP_KERNEL);
|
||||
if (!info->info_block) {
|
||||
dev_warn(&vcdev->cdev->dev, "no info block\n");
|
||||
err = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
info->num = virtio_ccw_read_vq_conf(vcdev, ccw, i);
|
||||
size = PAGE_ALIGN(vring_size(info->num, KVM_VIRTIO_CCW_RING_ALIGN));
|
||||
info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
|
||||
if (info->queue == NULL) {
|
||||
dev_warn(&vcdev->cdev->dev, "no queue\n");
|
||||
err = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
vq = vring_new_virtqueue(i, info->num, KVM_VIRTIO_CCW_RING_ALIGN, vdev,
|
||||
true, info->queue, virtio_ccw_kvm_notify,
|
||||
callback, name);
|
||||
if (!vq) {
|
||||
/* For now, we fail if we can't get the requested size. */
|
||||
dev_warn(&vcdev->cdev->dev, "no vq\n");
|
||||
err = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* Register it with the host. */
|
||||
info->info_block->queue = (__u64)info->queue;
|
||||
info->info_block->align = KVM_VIRTIO_CCW_RING_ALIGN;
|
||||
info->info_block->index = i;
|
||||
info->info_block->num = info->num;
|
||||
ccw->cmd_code = CCW_CMD_SET_VQ;
|
||||
ccw->flags = 0;
|
||||
ccw->count = sizeof(*info->info_block);
|
||||
ccw->cda = (__u32)(unsigned long)(info->info_block);
|
||||
err = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_VQ | i);
|
||||
if (err) {
|
||||
dev_warn(&vcdev->cdev->dev, "SET_VQ failed\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
info->vq = vq;
|
||||
vq->priv = info;
|
||||
|
||||
/* Save it to our list. */
|
||||
spin_lock_irqsave(&vcdev->lock, flags);
|
||||
list_add(&info->node, &vcdev->virtqueues);
|
||||
spin_unlock_irqrestore(&vcdev->lock, flags);
|
||||
|
||||
return vq;
|
||||
|
||||
out_err:
|
||||
if (vq)
|
||||
vring_del_virtqueue(vq);
|
||||
if (info) {
|
||||
if (info->queue)
|
||||
free_pages_exact(info->queue, size);
|
||||
kfree(info->info_block);
|
||||
}
|
||||
kfree(info);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
|
||||
struct virtqueue *vqs[],
|
||||
vq_callback_t *callbacks[],
|
||||
const char *names[])
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
|
||||
unsigned long *indicatorp = NULL;
|
||||
int ret, i;
|
||||
struct ccw1 *ccw;
|
||||
|
||||
ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
|
||||
if (!ccw)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < nvqs; ++i) {
|
||||
vqs[i] = virtio_ccw_setup_vq(vdev, i, callbacks[i], names[i],
|
||||
ccw);
|
||||
if (IS_ERR(vqs[i])) {
|
||||
ret = PTR_ERR(vqs[i]);
|
||||
vqs[i] = NULL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = -ENOMEM;
|
||||
/* We need a data area under 2G to communicate. */
|
||||
indicatorp = kmalloc(sizeof(&vcdev->indicators), GFP_DMA | GFP_KERNEL);
|
||||
if (!indicatorp)
|
||||
goto out;
|
||||
*indicatorp = (unsigned long) &vcdev->indicators;
|
||||
/* Register queue indicators with host. */
|
||||
vcdev->indicators = 0;
|
||||
ccw->cmd_code = CCW_CMD_SET_IND;
|
||||
ccw->flags = 0;
|
||||
ccw->count = sizeof(vcdev->indicators);
|
||||
ccw->cda = (__u32)(unsigned long) indicatorp;
|
||||
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND);
|
||||
if (ret)
|
||||
goto out;
|
||||
/* Register indicators2 with host for config changes */
|
||||
*indicatorp = (unsigned long) &vcdev->indicators2;
|
||||
vcdev->indicators2 = 0;
|
||||
ccw->cmd_code = CCW_CMD_SET_CONF_IND;
|
||||
ccw->flags = 0;
|
||||
ccw->count = sizeof(vcdev->indicators2);
|
||||
ccw->cda = (__u32)(unsigned long) indicatorp;
|
||||
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_CONF_IND);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
kfree(indicatorp);
|
||||
kfree(ccw);
|
||||
return 0;
|
||||
out:
|
||||
kfree(indicatorp);
|
||||
kfree(ccw);
|
||||
virtio_ccw_del_vqs(vdev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void virtio_ccw_reset(struct virtio_device *vdev)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
|
||||
struct ccw1 *ccw;
|
||||
|
||||
ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
|
||||
if (!ccw)
|
||||
return;
|
||||
|
||||
/* Zero status bits. */
|
||||
*vcdev->status = 0;
|
||||
|
||||
/* Send a reset ccw on device. */
|
||||
ccw->cmd_code = CCW_CMD_VDEV_RESET;
|
||||
ccw->flags = 0;
|
||||
ccw->count = 0;
|
||||
ccw->cda = 0;
|
||||
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_RESET);
|
||||
kfree(ccw);
|
||||
}
|
||||
|
||||
static u32 virtio_ccw_get_features(struct virtio_device *vdev)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
|
||||
struct virtio_feature_desc *features;
|
||||
int ret, rc;
|
||||
struct ccw1 *ccw;
|
||||
|
||||
ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
|
||||
if (!ccw)
|
||||
return 0;
|
||||
|
||||
features = kzalloc(sizeof(*features), GFP_DMA | GFP_KERNEL);
|
||||
if (!features) {
|
||||
rc = 0;
|
||||
goto out_free;
|
||||
}
|
||||
/* Read the feature bits from the host. */
|
||||
/* TODO: Features > 32 bits */
|
||||
features->index = 0;
|
||||
ccw->cmd_code = CCW_CMD_READ_FEAT;
|
||||
ccw->flags = 0;
|
||||
ccw->count = sizeof(*features);
|
||||
ccw->cda = (__u32)(unsigned long)features;
|
||||
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_FEAT);
|
||||
if (ret) {
|
||||
rc = 0;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
rc = le32_to_cpu(features->features);
|
||||
|
||||
out_free:
|
||||
kfree(features);
|
||||
kfree(ccw);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void virtio_ccw_finalize_features(struct virtio_device *vdev)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
|
||||
struct virtio_feature_desc *features;
|
||||
int i;
|
||||
struct ccw1 *ccw;
|
||||
|
||||
ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
|
||||
if (!ccw)
|
||||
return;
|
||||
|
||||
features = kzalloc(sizeof(*features), GFP_DMA | GFP_KERNEL);
|
||||
if (!features)
|
||||
goto out_free;
|
||||
|
||||
/* Give virtio_ring a chance to accept features. */
|
||||
vring_transport_features(vdev);
|
||||
|
||||
for (i = 0; i < sizeof(*vdev->features) / sizeof(features->features);
|
||||
i++) {
|
||||
int highbits = i % 2 ? 32 : 0;
|
||||
features->index = i;
|
||||
features->features = cpu_to_le32(vdev->features[i / 2]
|
||||
>> highbits);
|
||||
/* Write the feature bits to the host. */
|
||||
ccw->cmd_code = CCW_CMD_WRITE_FEAT;
|
||||
ccw->flags = 0;
|
||||
ccw->count = sizeof(*features);
|
||||
ccw->cda = (__u32)(unsigned long)features;
|
||||
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT);
|
||||
}
|
||||
out_free:
|
||||
kfree(features);
|
||||
kfree(ccw);
|
||||
}
|
||||
|
||||
static void virtio_ccw_get_config(struct virtio_device *vdev,
|
||||
unsigned int offset, void *buf, unsigned len)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
|
||||
int ret;
|
||||
struct ccw1 *ccw;
|
||||
void *config_area;
|
||||
|
||||
ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
|
||||
if (!ccw)
|
||||
return;
|
||||
|
||||
config_area = kzalloc(VIRTIO_CCW_CONFIG_SIZE, GFP_DMA | GFP_KERNEL);
|
||||
if (!config_area)
|
||||
goto out_free;
|
||||
|
||||
/* Read the config area from the host. */
|
||||
ccw->cmd_code = CCW_CMD_READ_CONF;
|
||||
ccw->flags = 0;
|
||||
ccw->count = offset + len;
|
||||
ccw->cda = (__u32)(unsigned long)config_area;
|
||||
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_CONFIG);
|
||||
if (ret)
|
||||
goto out_free;
|
||||
|
||||
memcpy(vcdev->config, config_area, sizeof(vcdev->config));
|
||||
memcpy(buf, &vcdev->config[offset], len);
|
||||
|
||||
out_free:
|
||||
kfree(config_area);
|
||||
kfree(ccw);
|
||||
}
|
||||
|
||||
static void virtio_ccw_set_config(struct virtio_device *vdev,
|
||||
unsigned int offset, const void *buf,
|
||||
unsigned len)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
|
||||
struct ccw1 *ccw;
|
||||
void *config_area;
|
||||
|
||||
ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
|
||||
if (!ccw)
|
||||
return;
|
||||
|
||||
config_area = kzalloc(VIRTIO_CCW_CONFIG_SIZE, GFP_DMA | GFP_KERNEL);
|
||||
if (!config_area)
|
||||
goto out_free;
|
||||
|
||||
memcpy(&vcdev->config[offset], buf, len);
|
||||
/* Write the config area to the host. */
|
||||
memcpy(config_area, vcdev->config, sizeof(vcdev->config));
|
||||
ccw->cmd_code = CCW_CMD_WRITE_CONF;
|
||||
ccw->flags = 0;
|
||||
ccw->count = offset + len;
|
||||
ccw->cda = (__u32)(unsigned long)config_area;
|
||||
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_CONFIG);
|
||||
|
||||
out_free:
|
||||
kfree(config_area);
|
||||
kfree(ccw);
|
||||
}
|
||||
|
||||
static u8 virtio_ccw_get_status(struct virtio_device *vdev)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
|
||||
|
||||
return *vcdev->status;
|
||||
}
|
||||
|
||||
static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
|
||||
struct ccw1 *ccw;
|
||||
|
||||
ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
|
||||
if (!ccw)
|
||||
return;
|
||||
|
||||
/* Write the status to the host. */
|
||||
*vcdev->status = status;
|
||||
ccw->cmd_code = CCW_CMD_WRITE_STATUS;
|
||||
ccw->flags = 0;
|
||||
ccw->count = sizeof(status);
|
||||
ccw->cda = (__u32)(unsigned long)vcdev->status;
|
||||
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS);
|
||||
kfree(ccw);
|
||||
}
|
||||
|
||||
static struct virtio_config_ops virtio_ccw_config_ops = {
|
||||
.get_features = virtio_ccw_get_features,
|
||||
.finalize_features = virtio_ccw_finalize_features,
|
||||
.get = virtio_ccw_get_config,
|
||||
.set = virtio_ccw_set_config,
|
||||
.get_status = virtio_ccw_get_status,
|
||||
.set_status = virtio_ccw_set_status,
|
||||
.reset = virtio_ccw_reset,
|
||||
.find_vqs = virtio_ccw_find_vqs,
|
||||
.del_vqs = virtio_ccw_del_vqs,
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* ccw bus driver related functions
|
||||
*/
|
||||
|
||||
static void virtio_ccw_release_dev(struct device *_d)
|
||||
{
|
||||
struct virtio_device *dev = container_of(_d, struct virtio_device,
|
||||
dev);
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(dev);
|
||||
|
||||
kfree(vcdev->status);
|
||||
kfree(vcdev->config_block);
|
||||
kfree(vcdev);
|
||||
}
|
||||
|
||||
static int irb_is_error(struct irb *irb)
|
||||
{
|
||||
if (scsw_cstat(&irb->scsw) != 0)
|
||||
return 1;
|
||||
if (scsw_dstat(&irb->scsw) & ~(DEV_STAT_CHN_END | DEV_STAT_DEV_END))
|
||||
return 1;
|
||||
if (scsw_cc(&irb->scsw) != 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct virtqueue *virtio_ccw_vq_by_ind(struct virtio_ccw_device *vcdev,
|
||||
int index)
|
||||
{
|
||||
struct virtio_ccw_vq_info *info;
|
||||
unsigned long flags;
|
||||
struct virtqueue *vq;
|
||||
|
||||
vq = NULL;
|
||||
spin_lock_irqsave(&vcdev->lock, flags);
|
||||
list_for_each_entry(info, &vcdev->virtqueues, node) {
|
||||
if (virtqueue_get_queue_index(info->vq) == index) {
|
||||
vq = info->vq;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&vcdev->lock, flags);
|
||||
return vq;
|
||||
}
|
||||
|
||||
static void virtio_ccw_int_handler(struct ccw_device *cdev,
|
||||
unsigned long intparm,
|
||||
struct irb *irb)
|
||||
{
|
||||
__u32 activity = intparm & VIRTIO_CCW_INTPARM_MASK;
|
||||
struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
|
||||
int i;
|
||||
struct virtqueue *vq;
|
||||
struct virtio_driver *drv;
|
||||
|
||||
/* Check if it's a notification from the host. */
|
||||
if ((intparm == 0) &&
|
||||
(scsw_stctl(&irb->scsw) ==
|
||||
(SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))) {
|
||||
/* OK */
|
||||
}
|
||||
if (irb_is_error(irb))
|
||||
vcdev->err = -EIO; /* XXX - use real error */
|
||||
if (vcdev->curr_io & activity) {
|
||||
switch (activity) {
|
||||
case VIRTIO_CCW_DOING_READ_FEAT:
|
||||
case VIRTIO_CCW_DOING_WRITE_FEAT:
|
||||
case VIRTIO_CCW_DOING_READ_CONFIG:
|
||||
case VIRTIO_CCW_DOING_WRITE_CONFIG:
|
||||
case VIRTIO_CCW_DOING_WRITE_STATUS:
|
||||
case VIRTIO_CCW_DOING_SET_VQ:
|
||||
case VIRTIO_CCW_DOING_SET_IND:
|
||||
case VIRTIO_CCW_DOING_SET_CONF_IND:
|
||||
case VIRTIO_CCW_DOING_RESET:
|
||||
case VIRTIO_CCW_DOING_READ_VQ_CONF:
|
||||
vcdev->curr_io &= ~activity;
|
||||
wake_up(&vcdev->wait_q);
|
||||
break;
|
||||
default:
|
||||
/* don't know what to do... */
|
||||
dev_warn(&cdev->dev, "Suspicious activity '%08x'\n",
|
||||
activity);
|
||||
WARN_ON(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
for_each_set_bit(i, &vcdev->indicators,
|
||||
sizeof(vcdev->indicators) * BITS_PER_BYTE) {
|
||||
/* The bit clear must happen before the vring kick. */
|
||||
clear_bit(i, &vcdev->indicators);
|
||||
barrier();
|
||||
vq = virtio_ccw_vq_by_ind(vcdev, i);
|
||||
vring_interrupt(0, vq);
|
||||
}
|
||||
if (test_bit(0, &vcdev->indicators2)) {
|
||||
drv = container_of(vcdev->vdev.dev.driver,
|
||||
struct virtio_driver, driver);
|
||||
|
||||
if (drv && drv->config_changed)
|
||||
drv->config_changed(&vcdev->vdev);
|
||||
clear_bit(0, &vcdev->indicators2);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We usually want to autoonline all devices, but give the admin
|
||||
* a way to exempt devices from this.
|
||||
*/
|
||||
#define __DEV_WORDS ((__MAX_SUBCHANNEL + (8*sizeof(long) - 1)) / \
|
||||
(8*sizeof(long)))
|
||||
static unsigned long devs_no_auto[__MAX_SSID + 1][__DEV_WORDS];
|
||||
|
||||
static char *no_auto = "";
|
||||
|
||||
module_param(no_auto, charp, 0444);
|
||||
MODULE_PARM_DESC(no_auto, "list of ccw bus id ranges not to be auto-onlined");
|
||||
|
||||
static int virtio_ccw_check_autoonline(struct ccw_device *cdev)
|
||||
{
|
||||
struct ccw_dev_id id;
|
||||
|
||||
ccw_device_get_id(cdev, &id);
|
||||
if (test_bit(id.devno, devs_no_auto[id.ssid]))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void virtio_ccw_auto_online(void *data, async_cookie_t cookie)
|
||||
{
|
||||
struct ccw_device *cdev = data;
|
||||
int ret;
|
||||
|
||||
ret = ccw_device_set_online(cdev);
|
||||
if (ret)
|
||||
dev_warn(&cdev->dev, "Failed to set online: %d\n", ret);
|
||||
}
|
||||
|
||||
static int virtio_ccw_probe(struct ccw_device *cdev)
|
||||
{
|
||||
cdev->handler = virtio_ccw_int_handler;
|
||||
|
||||
if (virtio_ccw_check_autoonline(cdev))
|
||||
async_schedule(virtio_ccw_auto_online, cdev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void virtio_ccw_remove(struct ccw_device *cdev)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
|
||||
|
||||
if (cdev->online) {
|
||||
unregister_virtio_device(&vcdev->vdev);
|
||||
dev_set_drvdata(&cdev->dev, NULL);
|
||||
}
|
||||
cdev->handler = NULL;
|
||||
}
|
||||
|
||||
static int virtio_ccw_offline(struct ccw_device *cdev)
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
|
||||
|
||||
unregister_virtio_device(&vcdev->vdev);
|
||||
dev_set_drvdata(&cdev->dev, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int virtio_ccw_online(struct ccw_device *cdev)
|
||||
{
|
||||
int ret;
|
||||
struct virtio_ccw_device *vcdev;
|
||||
|
||||
vcdev = kzalloc(sizeof(*vcdev), GFP_KERNEL);
|
||||
if (!vcdev) {
|
||||
dev_warn(&cdev->dev, "Could not get memory for virtio\n");
|
||||
ret = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
vcdev->config_block = kzalloc(sizeof(*vcdev->config_block),
|
||||
GFP_DMA | GFP_KERNEL);
|
||||
if (!vcdev->config_block) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
vcdev->status = kzalloc(sizeof(*vcdev->status), GFP_DMA | GFP_KERNEL);
|
||||
if (!vcdev->status) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
vcdev->vdev.dev.parent = &cdev->dev;
|
||||
vcdev->vdev.dev.release = virtio_ccw_release_dev;
|
||||
vcdev->vdev.config = &virtio_ccw_config_ops;
|
||||
vcdev->cdev = cdev;
|
||||
init_waitqueue_head(&vcdev->wait_q);
|
||||
INIT_LIST_HEAD(&vcdev->virtqueues);
|
||||
spin_lock_init(&vcdev->lock);
|
||||
|
||||
dev_set_drvdata(&cdev->dev, vcdev);
|
||||
vcdev->vdev.id.vendor = cdev->id.cu_type;
|
||||
vcdev->vdev.id.device = cdev->id.cu_model;
|
||||
ret = register_virtio_device(&vcdev->vdev);
|
||||
if (ret) {
|
||||
dev_warn(&cdev->dev, "Failed to register virtio device: %d\n",
|
||||
ret);
|
||||
goto out_put;
|
||||
}
|
||||
return 0;
|
||||
out_put:
|
||||
dev_set_drvdata(&cdev->dev, NULL);
|
||||
put_device(&vcdev->vdev.dev);
|
||||
return ret;
|
||||
out_free:
|
||||
if (vcdev) {
|
||||
kfree(vcdev->status);
|
||||
kfree(vcdev->config_block);
|
||||
}
|
||||
kfree(vcdev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int virtio_ccw_cio_notify(struct ccw_device *cdev, int event)
|
||||
{
|
||||
/* TODO: Check whether we need special handling here. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct ccw_device_id virtio_ids[] = {
|
||||
{ CCW_DEVICE(0x3832, 0) },
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(ccw, virtio_ids);
|
||||
|
||||
static struct ccw_driver virtio_ccw_driver = {
|
||||
.driver = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = "virtio_ccw",
|
||||
},
|
||||
.ids = virtio_ids,
|
||||
.probe = virtio_ccw_probe,
|
||||
.remove = virtio_ccw_remove,
|
||||
.set_offline = virtio_ccw_offline,
|
||||
.set_online = virtio_ccw_online,
|
||||
.notify = virtio_ccw_cio_notify,
|
||||
.int_class = IRQIO_VIR,
|
||||
};
|
||||
|
||||
static int __init pure_hex(char **cp, unsigned int *val, int min_digit,
|
||||
int max_digit, int max_val)
|
||||
{
|
||||
int diff;
|
||||
|
||||
diff = 0;
|
||||
*val = 0;
|
||||
|
||||
while (diff <= max_digit) {
|
||||
int value = hex_to_bin(**cp);
|
||||
|
||||
if (value < 0)
|
||||
break;
|
||||
*val = *val * 16 + value;
|
||||
(*cp)++;
|
||||
diff++;
|
||||
}
|
||||
|
||||
if ((diff < min_digit) || (diff > max_digit) || (*val > max_val))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init parse_busid(char *str, unsigned int *cssid,
|
||||
unsigned int *ssid, unsigned int *devno)
|
||||
{
|
||||
char *str_work;
|
||||
int rc, ret;
|
||||
|
||||
rc = 1;
|
||||
|
||||
if (*str == '\0')
|
||||
goto out;
|
||||
|
||||
str_work = str;
|
||||
ret = pure_hex(&str_work, cssid, 1, 2, __MAX_CSSID);
|
||||
if (ret || (str_work[0] != '.'))
|
||||
goto out;
|
||||
str_work++;
|
||||
ret = pure_hex(&str_work, ssid, 1, 1, __MAX_SSID);
|
||||
if (ret || (str_work[0] != '.'))
|
||||
goto out;
|
||||
str_work++;
|
||||
ret = pure_hex(&str_work, devno, 4, 4, __MAX_SUBCHANNEL);
|
||||
if (ret || (str_work[0] != '\0'))
|
||||
goto out;
|
||||
|
||||
rc = 0;
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void __init no_auto_parse(void)
|
||||
{
|
||||
unsigned int from_cssid, to_cssid, from_ssid, to_ssid, from, to;
|
||||
char *parm, *str;
|
||||
int rc;
|
||||
|
||||
str = no_auto;
|
||||
while ((parm = strsep(&str, ","))) {
|
||||
rc = parse_busid(strsep(&parm, "-"), &from_cssid,
|
||||
&from_ssid, &from);
|
||||
if (rc)
|
||||
continue;
|
||||
if (parm != NULL) {
|
||||
rc = parse_busid(parm, &to_cssid,
|
||||
&to_ssid, &to);
|
||||
if ((from_ssid > to_ssid) ||
|
||||
((from_ssid == to_ssid) && (from > to)))
|
||||
rc = -EINVAL;
|
||||
} else {
|
||||
to_cssid = from_cssid;
|
||||
to_ssid = from_ssid;
|
||||
to = from;
|
||||
}
|
||||
if (rc)
|
||||
continue;
|
||||
while ((from_ssid < to_ssid) ||
|
||||
((from_ssid == to_ssid) && (from <= to))) {
|
||||
set_bit(from, devs_no_auto[from_ssid]);
|
||||
from++;
|
||||
if (from > __MAX_SUBCHANNEL) {
|
||||
from_ssid++;
|
||||
from = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int __init virtio_ccw_init(void)
|
||||
{
|
||||
/* parse no_auto string before we do anything further */
|
||||
no_auto_parse();
|
||||
return ccw_driver_register(&virtio_ccw_driver);
|
||||
}
|
||||
module_init(virtio_ccw_init);
|
||||
|
||||
static void __exit virtio_ccw_exit(void)
|
||||
{
|
||||
ccw_driver_unregister(&virtio_ccw_driver);
|
||||
}
|
||||
module_exit(virtio_ccw_exit);
|
@ -123,6 +123,8 @@ static inline bool is_error_page(struct page *page)
|
||||
#define KVM_REQ_WATCHDOG 18
|
||||
#define KVM_REQ_MASTERCLOCK_UPDATE 19
|
||||
#define KVM_REQ_MCLOCK_INPROGRESS 20
|
||||
#define KVM_REQ_EPR_EXIT 21
|
||||
#define KVM_REQ_EOIBITMAP 22
|
||||
|
||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
|
||||
@ -267,12 +269,11 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
|
||||
struct kvm_memory_slot {
|
||||
gfn_t base_gfn;
|
||||
unsigned long npages;
|
||||
unsigned long flags;
|
||||
unsigned long *dirty_bitmap;
|
||||
struct kvm_arch_memory_slot arch;
|
||||
unsigned long userspace_addr;
|
||||
int user_alloc;
|
||||
int id;
|
||||
u32 flags;
|
||||
short id;
|
||||
};
|
||||
|
||||
static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
|
||||
@ -314,8 +315,12 @@ struct kvm_irq_routing_table {};
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef KVM_PRIVATE_MEM_SLOTS
|
||||
#define KVM_PRIVATE_MEM_SLOTS 0
|
||||
#endif
|
||||
|
||||
#ifndef KVM_MEM_SLOTS_NUM
|
||||
#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
|
||||
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -327,7 +332,7 @@ struct kvm_memslots {
|
||||
u64 generation;
|
||||
struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
|
||||
/* The mapping table from slot id to the index in memslots[]. */
|
||||
int id_to_index[KVM_MEM_SLOTS_NUM];
|
||||
short id_to_index[KVM_MEM_SLOTS_NUM];
|
||||
};
|
||||
|
||||
struct kvm {
|
||||
@ -425,7 +430,8 @@ void kvm_exit(void);
|
||||
|
||||
void kvm_get_kvm(struct kvm *kvm);
|
||||
void kvm_put_kvm(struct kvm *kvm);
|
||||
void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new);
|
||||
void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
|
||||
u64 last_generation);
|
||||
|
||||
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
|
||||
{
|
||||
@ -448,10 +454,10 @@ id_to_memslot(struct kvm_memslots *slots, int id)
|
||||
|
||||
int kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc);
|
||||
bool user_alloc);
|
||||
int __kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc);
|
||||
bool user_alloc);
|
||||
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont);
|
||||
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
|
||||
@ -459,11 +465,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
struct kvm_memory_slot old,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc);
|
||||
bool user_alloc);
|
||||
void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot old,
|
||||
int user_alloc);
|
||||
bool user_alloc);
|
||||
bool kvm_largepages_enabled(void);
|
||||
void kvm_disable_largepages(void);
|
||||
/* flush all memory translations */
|
||||
@ -533,6 +539,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm);
|
||||
void kvm_reload_remote_mmus(struct kvm *kvm);
|
||||
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
|
||||
void kvm_make_update_eoibitmap_request(struct kvm *kvm);
|
||||
|
||||
long kvm_arch_dev_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg);
|
||||
@ -550,7 +557,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
||||
int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
|
||||
struct
|
||||
kvm_userspace_memory_region *mem,
|
||||
int user_alloc);
|
||||
bool user_alloc);
|
||||
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
|
||||
long kvm_arch_vm_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg);
|
||||
@ -686,6 +693,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
|
||||
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
|
||||
int irq_source_id, int level);
|
||||
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
||||
void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian);
|
||||
|
@ -14,7 +14,7 @@
|
||||
ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR), \
|
||||
ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\
|
||||
ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \
|
||||
ERSN(S390_UCONTROL)
|
||||
ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH)
|
||||
|
||||
TRACE_EVENT(kvm_userspace_exit,
|
||||
TP_PROTO(__u32 reason, int errno),
|
||||
|
@ -169,6 +169,8 @@ struct kvm_pit_config {
|
||||
#define KVM_EXIT_PAPR_HCALL 19
|
||||
#define KVM_EXIT_S390_UCONTROL 20
|
||||
#define KVM_EXIT_WATCHDOG 21
|
||||
#define KVM_EXIT_S390_TSCH 22
|
||||
#define KVM_EXIT_EPR 23
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
/* Emulate instruction failed. */
|
||||
@ -286,6 +288,19 @@ struct kvm_run {
|
||||
__u64 ret;
|
||||
__u64 args[9];
|
||||
} papr_hcall;
|
||||
/* KVM_EXIT_S390_TSCH */
|
||||
struct {
|
||||
__u16 subchannel_id;
|
||||
__u16 subchannel_nr;
|
||||
__u32 io_int_parm;
|
||||
__u32 io_int_word;
|
||||
__u32 ipb;
|
||||
__u8 dequeued;
|
||||
} s390_tsch;
|
||||
/* KVM_EXIT_EPR */
|
||||
struct {
|
||||
__u32 epr;
|
||||
} epr;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
@ -398,10 +413,20 @@ struct kvm_s390_psw {
|
||||
#define KVM_S390_PROGRAM_INT 0xfffe0001u
|
||||
#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
|
||||
#define KVM_S390_RESTART 0xfffe0003u
|
||||
#define KVM_S390_MCHK 0xfffe1000u
|
||||
#define KVM_S390_INT_VIRTIO 0xffff2603u
|
||||
#define KVM_S390_INT_SERVICE 0xffff2401u
|
||||
#define KVM_S390_INT_EMERGENCY 0xffff1201u
|
||||
#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
|
||||
/* Anything below 0xfffe0000u is taken by INT_IO */
|
||||
#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \
|
||||
(((schid)) | \
|
||||
((ssid) << 16) | \
|
||||
((cssid) << 18) | \
|
||||
((ai) << 26))
|
||||
#define KVM_S390_INT_IO_MIN 0x00000000u
|
||||
#define KVM_S390_INT_IO_MAX 0xfffdffffu
|
||||
|
||||
|
||||
struct kvm_s390_interrupt {
|
||||
__u32 type;
|
||||
@ -636,6 +661,8 @@ struct kvm_ppc_smmu_info {
|
||||
#define KVM_CAP_IRQFD_RESAMPLE 82
|
||||
#define KVM_CAP_PPC_BOOKE_WATCHDOG 83
|
||||
#define KVM_CAP_PPC_HTAB_FD 84
|
||||
#define KVM_CAP_S390_CSS_SUPPORT 85
|
||||
#define KVM_CAP_PPC_EPR 86
|
||||
#define KVM_CAP_ARM_PSCI 87
|
||||
#define KVM_CAP_ARM_SET_DEVICE_ADDR 88
|
||||
|
||||
|
@ -4374,7 +4374,10 @@ EXPORT_SYMBOL(yield);
|
||||
* It's the caller's job to ensure that the target task struct
|
||||
* can't go away on us before we can do any checks.
|
||||
*
|
||||
* Returns true if we indeed boosted the target task.
|
||||
* Returns:
|
||||
* true (>0) if we indeed boosted the target task.
|
||||
* false (0) if we failed to boost the target.
|
||||
* -ESRCH if there's no task to yield to.
|
||||
*/
|
||||
bool __sched yield_to(struct task_struct *p, bool preempt)
|
||||
{
|
||||
@ -4388,6 +4391,15 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
|
||||
|
||||
again:
|
||||
p_rq = task_rq(p);
|
||||
/*
|
||||
* If we're the only runnable task on the rq and target rq also
|
||||
* has only one task, there's absolutely no point in yielding.
|
||||
*/
|
||||
if (rq->nr_running == 1 && p_rq->nr_running == 1) {
|
||||
yielded = -ESRCH;
|
||||
goto out_irq;
|
||||
}
|
||||
|
||||
double_rq_lock(rq, p_rq);
|
||||
while (task_rq(p) != p_rq) {
|
||||
double_rq_unlock(rq, p_rq);
|
||||
@ -4395,13 +4407,13 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
|
||||
}
|
||||
|
||||
if (!curr->sched_class->yield_to_task)
|
||||
goto out;
|
||||
goto out_unlock;
|
||||
|
||||
if (curr->sched_class != p->sched_class)
|
||||
goto out;
|
||||
goto out_unlock;
|
||||
|
||||
if (task_running(p_rq, p) || p->state)
|
||||
goto out;
|
||||
goto out_unlock;
|
||||
|
||||
yielded = curr->sched_class->yield_to_task(rq, p, preempt);
|
||||
if (yielded) {
|
||||
@ -4414,11 +4426,12 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
|
||||
resched_task(p_rq->curr);
|
||||
}
|
||||
|
||||
out:
|
||||
out_unlock:
|
||||
double_rq_unlock(rq, p_rq);
|
||||
out_irq:
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (yielded)
|
||||
if (yielded > 0)
|
||||
schedule();
|
||||
|
||||
return yielded;
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/current.h>
|
||||
@ -115,6 +116,42 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic)
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
|
||||
u64 *eoi_exit_bitmap)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
|
||||
union kvm_ioapic_redirect_entry *e;
|
||||
struct kvm_lapic_irq irqe;
|
||||
int index;
|
||||
|
||||
spin_lock(&ioapic->lock);
|
||||
/* traverse ioapic entry to set eoi exit bitmap*/
|
||||
for (index = 0; index < IOAPIC_NUM_PINS; index++) {
|
||||
e = &ioapic->redirtbl[index];
|
||||
if (!e->fields.mask &&
|
||||
(e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
|
||||
kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC,
|
||||
index))) {
|
||||
irqe.dest_id = e->fields.dest_id;
|
||||
irqe.vector = e->fields.vector;
|
||||
irqe.dest_mode = e->fields.dest_mode;
|
||||
irqe.delivery_mode = e->fields.delivery_mode << 8;
|
||||
kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap);
|
||||
}
|
||||
}
|
||||
spin_unlock(&ioapic->lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap);
|
||||
|
||||
void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
|
||||
if (!kvm_apic_vid_enabled(kvm) || !ioapic)
|
||||
return;
|
||||
kvm_make_update_eoibitmap_request(kvm);
|
||||
}
|
||||
|
||||
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
||||
{
|
||||
unsigned index;
|
||||
@ -156,6 +193,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
||||
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
|
||||
&& ioapic->irr & (1 << index))
|
||||
ioapic_service(ioapic, index);
|
||||
kvm_ioapic_make_eoibitmap_request(ioapic->kvm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -179,15 +217,6 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
|
||||
irqe.level = 1;
|
||||
irqe.shorthand = 0;
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
/* Always delivery PIT interrupt to vcpu 0 */
|
||||
if (irq == 0) {
|
||||
irqe.dest_mode = 0; /* Physical mode. */
|
||||
/* need to read apic_id from apic regiest since
|
||||
* it can be rewritten */
|
||||
irqe.dest_id = ioapic->kvm->bsp_vcpu_id;
|
||||
}
|
||||
#endif
|
||||
return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
|
||||
}
|
||||
|
||||
@ -464,6 +493,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
|
||||
spin_lock(&ioapic->lock);
|
||||
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
|
||||
update_handled_vectors(ioapic);
|
||||
kvm_ioapic_make_eoibitmap_request(kvm);
|
||||
spin_unlock(&ioapic->lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -82,5 +82,9 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
struct kvm_lapic_irq *irq);
|
||||
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||
void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm);
|
||||
void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
|
||||
u64 *eoi_exit_bitmap);
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -76,7 +76,9 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
gfn = slot->base_gfn;
|
||||
end_gfn = gfn + slot->npages;
|
||||
|
||||
flags = IOMMU_READ | IOMMU_WRITE;
|
||||
flags = IOMMU_READ;
|
||||
if (!(slot->flags & KVM_MEM_READONLY))
|
||||
flags |= IOMMU_WRITE;
|
||||
if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
|
||||
flags |= IOMMU_CACHE;
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#include <asm/msidef.h>
|
||||
@ -237,6 +238,28 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
struct hlist_node *n;
|
||||
int gsi;
|
||||
|
||||
rcu_read_lock();
|
||||
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
if (kian->gsi == gsi) {
|
||||
rcu_read_unlock();
|
||||
return true;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
|
||||
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
@ -261,6 +284,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
kvm_ioapic_make_eoibitmap_request(kvm);
|
||||
}
|
||||
|
||||
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||
@ -270,6 +294,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||
hlist_del_init_rcu(&kian->link);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
synchronize_rcu();
|
||||
kvm_ioapic_make_eoibitmap_request(kvm);
|
||||
}
|
||||
|
||||
int kvm_request_irq_source_id(struct kvm *kvm)
|
||||
|
@ -217,6 +217,11 @@ void kvm_make_mclock_inprogress_request(struct kvm *kvm)
|
||||
make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
|
||||
}
|
||||
|
||||
void kvm_make_update_eoibitmap_request(struct kvm *kvm)
|
||||
{
|
||||
make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP);
|
||||
}
|
||||
|
||||
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
|
||||
{
|
||||
struct page *page;
|
||||
@ -474,6 +479,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
||||
INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
|
||||
#endif
|
||||
|
||||
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
|
||||
|
||||
r = -ENOMEM;
|
||||
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
|
||||
if (!kvm->memslots)
|
||||
@ -670,7 +677,8 @@ static void sort_memslots(struct kvm_memslots *slots)
|
||||
slots->id_to_index[slots->memslots[i].id] = i;
|
||||
}
|
||||
|
||||
void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
|
||||
void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
|
||||
u64 last_generation)
|
||||
{
|
||||
if (new) {
|
||||
int id = new->id;
|
||||
@ -682,7 +690,7 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
|
||||
sort_memslots(slots);
|
||||
}
|
||||
|
||||
slots->generation++;
|
||||
slots->generation = last_generation + 1;
|
||||
}
|
||||
|
||||
static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
|
||||
@ -699,6 +707,35 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
|
||||
struct kvm_memslots *slots, struct kvm_memory_slot *new)
|
||||
{
|
||||
struct kvm_memslots *old_memslots = kvm->memslots;
|
||||
|
||||
update_memslots(slots, new, kvm->memslots->generation);
|
||||
rcu_assign_pointer(kvm->memslots, slots);
|
||||
synchronize_srcu_expedited(&kvm->srcu);
|
||||
return old_memslots;
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
|
||||
* - create a new memory slot
|
||||
* - delete an existing memory slot
|
||||
* - modify an existing memory slot
|
||||
* -- move it in the guest physical memory space
|
||||
* -- just change its flags
|
||||
*
|
||||
* Since flags can be changed by some of these operations, the following
|
||||
* differentiation is the best we can do for __kvm_set_memory_region():
|
||||
*/
|
||||
enum kvm_mr_change {
|
||||
KVM_MR_CREATE,
|
||||
KVM_MR_DELETE,
|
||||
KVM_MR_MOVE,
|
||||
KVM_MR_FLAGS_ONLY,
|
||||
};
|
||||
|
||||
/*
|
||||
* Allocate some memory and give it an address in the guest physical address
|
||||
* space.
|
||||
@ -709,14 +746,15 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
|
||||
*/
|
||||
int __kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
int r;
|
||||
gfn_t base_gfn;
|
||||
unsigned long npages;
|
||||
struct kvm_memory_slot *memslot, *slot;
|
||||
struct kvm_memory_slot *slot;
|
||||
struct kvm_memory_slot old, new;
|
||||
struct kvm_memslots *slots, *old_memslots;
|
||||
struct kvm_memslots *slots = NULL, *old_memslots;
|
||||
enum kvm_mr_change change;
|
||||
|
||||
r = check_memory_region_flags(mem);
|
||||
if (r)
|
||||
@ -740,7 +778,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, mem->slot);
|
||||
slot = id_to_memslot(kvm->memslots, mem->slot);
|
||||
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
|
||||
npages = mem->memory_size >> PAGE_SHIFT;
|
||||
|
||||
@ -751,26 +789,48 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
if (!npages)
|
||||
mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
|
||||
|
||||
new = old = *memslot;
|
||||
new = old = *slot;
|
||||
|
||||
new.id = mem->slot;
|
||||
new.base_gfn = base_gfn;
|
||||
new.npages = npages;
|
||||
new.flags = mem->flags;
|
||||
|
||||
/* Disallow changing a memory slot's size. */
|
||||
r = -EINVAL;
|
||||
if (npages && old.npages && npages != old.npages)
|
||||
goto out_free;
|
||||
if (npages) {
|
||||
if (!old.npages)
|
||||
change = KVM_MR_CREATE;
|
||||
else { /* Modify an existing slot. */
|
||||
if ((mem->userspace_addr != old.userspace_addr) ||
|
||||
(npages != old.npages) ||
|
||||
((new.flags ^ old.flags) & KVM_MEM_READONLY))
|
||||
goto out;
|
||||
|
||||
/* Check for overlaps */
|
||||
r = -EEXIST;
|
||||
kvm_for_each_memslot(slot, kvm->memslots) {
|
||||
if (slot->id >= KVM_MEMORY_SLOTS || slot == memslot)
|
||||
continue;
|
||||
if (!((base_gfn + npages <= slot->base_gfn) ||
|
||||
(base_gfn >= slot->base_gfn + slot->npages)))
|
||||
goto out_free;
|
||||
if (base_gfn != old.base_gfn)
|
||||
change = KVM_MR_MOVE;
|
||||
else if (new.flags != old.flags)
|
||||
change = KVM_MR_FLAGS_ONLY;
|
||||
else { /* Nothing to change. */
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} else if (old.npages) {
|
||||
change = KVM_MR_DELETE;
|
||||
} else /* Modify a non-existent slot: disallowed. */
|
||||
goto out;
|
||||
|
||||
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
|
||||
/* Check for overlaps */
|
||||
r = -EEXIST;
|
||||
kvm_for_each_memslot(slot, kvm->memslots) {
|
||||
if ((slot->id >= KVM_USER_MEM_SLOTS) ||
|
||||
(slot->id == mem->slot))
|
||||
continue;
|
||||
if (!((base_gfn + npages <= slot->base_gfn) ||
|
||||
(base_gfn >= slot->base_gfn + slot->npages)))
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Free page dirty bitmap if unneeded */
|
||||
@ -778,10 +838,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
new.dirty_bitmap = NULL;
|
||||
|
||||
r = -ENOMEM;
|
||||
|
||||
/* Allocate if a slot is being created */
|
||||
if (npages && !old.npages) {
|
||||
new.user_alloc = user_alloc;
|
||||
if (change == KVM_MR_CREATE) {
|
||||
new.userspace_addr = mem->userspace_addr;
|
||||
|
||||
if (kvm_arch_create_memslot(&new, npages))
|
||||
@ -792,12 +849,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
|
||||
if (kvm_create_dirty_bitmap(&new) < 0)
|
||||
goto out_free;
|
||||
/* destroy any largepage mappings for dirty tracking */
|
||||
}
|
||||
|
||||
if (!npages || base_gfn != old.base_gfn) {
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
|
||||
r = -ENOMEM;
|
||||
slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
|
||||
GFP_KERNEL);
|
||||
@ -806,11 +860,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
slot = id_to_memslot(slots, mem->slot);
|
||||
slot->flags |= KVM_MEMSLOT_INVALID;
|
||||
|
||||
update_memslots(slots, NULL);
|
||||
old_memslots = install_new_memslots(kvm, slots, NULL);
|
||||
|
||||
old_memslots = kvm->memslots;
|
||||
rcu_assign_pointer(kvm->memslots, slots);
|
||||
synchronize_srcu_expedited(&kvm->srcu);
|
||||
/* slot was deleted or moved, clear iommu mapping */
|
||||
kvm_iommu_unmap_pages(kvm, &old);
|
||||
/* From this point no new shadow pages pointing to a deleted,
|
||||
* or moved, memslot will be created.
|
||||
*
|
||||
@ -819,37 +872,48 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
* - kvm_is_visible_gfn (mmu_check_roots)
|
||||
*/
|
||||
kvm_arch_flush_shadow_memslot(kvm, slot);
|
||||
kfree(old_memslots);
|
||||
slots = old_memslots;
|
||||
}
|
||||
|
||||
r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
|
||||
if (r)
|
||||
goto out_free;
|
||||
|
||||
/* map/unmap the pages in iommu page table */
|
||||
if (npages) {
|
||||
r = kvm_iommu_map_pages(kvm, &new);
|
||||
if (r)
|
||||
goto out_free;
|
||||
} else
|
||||
kvm_iommu_unmap_pages(kvm, &old);
|
||||
goto out_slots;
|
||||
|
||||
r = -ENOMEM;
|
||||
slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
|
||||
GFP_KERNEL);
|
||||
if (!slots)
|
||||
goto out_free;
|
||||
/*
|
||||
* We can re-use the old_memslots from above, the only difference
|
||||
* from the currently installed memslots is the invalid flag. This
|
||||
* will get overwritten by update_memslots anyway.
|
||||
*/
|
||||
if (!slots) {
|
||||
slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
|
||||
GFP_KERNEL);
|
||||
if (!slots)
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
/*
|
||||
* IOMMU mapping: New slots need to be mapped. Old slots need to be
|
||||
* un-mapped and re-mapped if their base changes. Since base change
|
||||
* unmapping is handled above with slot deletion, mapping alone is
|
||||
* needed here. Anything else the iommu might care about for existing
|
||||
* slots (size changes, userspace addr changes and read-only flag
|
||||
* changes) is disallowed above, so any other attribute changes getting
|
||||
* here can be skipped.
|
||||
*/
|
||||
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
|
||||
r = kvm_iommu_map_pages(kvm, &new);
|
||||
if (r)
|
||||
goto out_slots;
|
||||
}
|
||||
|
||||
/* actual memory is freed via old in kvm_free_physmem_slot below */
|
||||
if (!npages) {
|
||||
if (change == KVM_MR_DELETE) {
|
||||
new.dirty_bitmap = NULL;
|
||||
memset(&new.arch, 0, sizeof(new.arch));
|
||||
}
|
||||
|
||||
update_memslots(slots, &new);
|
||||
old_memslots = kvm->memslots;
|
||||
rcu_assign_pointer(kvm->memslots, slots);
|
||||
synchronize_srcu_expedited(&kvm->srcu);
|
||||
old_memslots = install_new_memslots(kvm, slots, &new);
|
||||
|
||||
kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
|
||||
|
||||
@ -858,17 +922,18 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
|
||||
return 0;
|
||||
|
||||
out_slots:
|
||||
kfree(slots);
|
||||
out_free:
|
||||
kvm_free_physmem_slot(&new, &old);
|
||||
out:
|
||||
return r;
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
|
||||
|
||||
int kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_userspace_memory_region *mem,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
int r;
|
||||
|
||||
@ -882,9 +947,9 @@ EXPORT_SYMBOL_GPL(kvm_set_memory_region);
|
||||
int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
|
||||
struct
|
||||
kvm_userspace_memory_region *mem,
|
||||
int user_alloc)
|
||||
bool user_alloc)
|
||||
{
|
||||
if (mem->slot >= KVM_MEMORY_SLOTS)
|
||||
if (mem->slot >= KVM_USER_MEM_SLOTS)
|
||||
return -EINVAL;
|
||||
return kvm_set_memory_region(kvm, mem, user_alloc);
|
||||
}
|
||||
@ -898,7 +963,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
|
||||
unsigned long any = 0;
|
||||
|
||||
r = -EINVAL;
|
||||
if (log->slot >= KVM_MEMORY_SLOTS)
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
@ -944,7 +1009,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
|
||||
|
||||
if (!memslot || memslot->id >= KVM_MEMORY_SLOTS ||
|
||||
if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
|
||||
memslot->flags & KVM_MEMSLOT_INVALID)
|
||||
return 0;
|
||||
|
||||
@ -1641,6 +1706,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
|
||||
{
|
||||
struct pid *pid;
|
||||
struct task_struct *task = NULL;
|
||||
bool ret = false;
|
||||
|
||||
rcu_read_lock();
|
||||
pid = rcu_dereference(target->pid);
|
||||
@ -1648,17 +1714,15 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
|
||||
task = get_pid_task(target->pid, PIDTYPE_PID);
|
||||
rcu_read_unlock();
|
||||
if (!task)
|
||||
return false;
|
||||
return ret;
|
||||
if (task->flags & PF_VCPU) {
|
||||
put_task_struct(task);
|
||||
return false;
|
||||
}
|
||||
if (yield_to(task, 1)) {
|
||||
put_task_struct(task);
|
||||
return true;
|
||||
return ret;
|
||||
}
|
||||
ret = yield_to(task, 1);
|
||||
put_task_struct(task);
|
||||
return false;
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
|
||||
|
||||
@ -1699,12 +1763,14 @@ bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
|
||||
return eligible;
|
||||
}
|
||||
#endif
|
||||
|
||||
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
{
|
||||
struct kvm *kvm = me->kvm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
|
||||
int yielded = 0;
|
||||
int try = 3;
|
||||
int pass;
|
||||
int i;
|
||||
|
||||
@ -1716,7 +1782,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
* VCPU is holding the lock that we need and will release it.
|
||||
* We approximate round-robin by starting at the last boosted VCPU.
|
||||
*/
|
||||
for (pass = 0; pass < 2 && !yielded; pass++) {
|
||||
for (pass = 0; pass < 2 && !yielded && try; pass++) {
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (!pass && i <= last_boosted_vcpu) {
|
||||
i = last_boosted_vcpu;
|
||||
@ -1729,10 +1795,15 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
continue;
|
||||
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
|
||||
continue;
|
||||
if (kvm_vcpu_yield_to(vcpu)) {
|
||||
|
||||
yielded = kvm_vcpu_yield_to(vcpu);
|
||||
if (yielded > 0) {
|
||||
kvm->last_boosted_vcpu = i;
|
||||
yielded = 1;
|
||||
break;
|
||||
} else if (yielded < 0) {
|
||||
try--;
|
||||
if (!try)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2127,7 +2198,7 @@ static long kvm_vm_ioctl(struct file *filp,
|
||||
sizeof kvm_userspace_mem))
|
||||
goto out;
|
||||
|
||||
r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1);
|
||||
r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true);
|
||||
break;
|
||||
}
|
||||
case KVM_GET_DIRTY_LOG: {
|
||||
|
Loading…
Reference in New Issue
Block a user