mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-08 22:23:18 +00:00
Merge branch 'kvm-updates/3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates/3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (75 commits) KVM: SVM: Keep intercepting task switching with NPT enabled KVM: s390: implement sigp external call KVM: s390: fix register setting KVM: s390: fix return value of kvm_arch_init_vm KVM: s390: check cpu_id prior to using it KVM: emulate lapic tsc deadline timer for guest x86: TSC deadline definitions KVM: Fix simultaneous NMIs KVM: x86 emulator: convert push %sreg/pop %sreg to direct decode KVM: x86 emulator: switch lds/les/lss/lfs/lgs to direct decode KVM: x86 emulator: streamline decode of segment registers KVM: x86 emulator: simplify OpMem64 decode KVM: x86 emulator: switch src decode to decode_operand() KVM: x86 emulator: qualify OpReg inhibit_byte_regs hack KVM: x86 emulator: switch OpImmUByte decode to decode_imm() KVM: x86 emulator: free up some flag bits near src, dst KVM: x86 emulator: switch src2 to generic decode_operand() KVM: x86 emulator: expand decode flags to 64 bits KVM: x86 emulator: split dst decode to a generic decode_operand() KVM: x86 emulator: move memop, memopp into emulation context ...
This commit is contained in:
commit
1bc87b0055
@ -1201,6 +1201,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||
[KVM,Intel] Disable FlexPriority feature (TPR shadow).
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-intel.nested=
|
||||
[KVM,Intel] Enable VMX nesting (nVMX).
|
||||
Default is 0 (disabled)
|
||||
|
||||
kvm-intel.unrestricted_guest=
|
||||
[KVM,Intel] Disable unrestricted guest feature
|
||||
(virtualized real and unpaged mode) on capable
|
||||
|
@ -175,10 +175,30 @@ Parameters: vcpu id (apic id on x86)
|
||||
Returns: vcpu fd on success, -1 on error
|
||||
|
||||
This API adds a vcpu to a virtual machine. The vcpu id is a small integer
|
||||
in the range [0, max_vcpus). You can use KVM_CAP_NR_VCPUS of the
|
||||
KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time.
|
||||
in the range [0, max_vcpus).
|
||||
|
||||
The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of
|
||||
the KVM_CHECK_EXTENSION ioctl() at run-time.
|
||||
The maximum possible value for max_vcpus can be retrieved using the
|
||||
KVM_CAP_MAX_VCPUS of the KVM_CHECK_EXTENSION ioctl() at run-time.
|
||||
|
||||
If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
|
||||
cpus max.
|
||||
If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is
|
||||
same as the value returned from KVM_CAP_NR_VCPUS.
|
||||
|
||||
On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
|
||||
threads in one or more virtual CPU cores. (This is because the
|
||||
hardware requires all the hardware threads in a CPU core to be in the
|
||||
same partition.) The KVM_CAP_PPC_SMT capability indicates the number
|
||||
of vcpus per virtual core (vcore). The vcore id is obtained by
|
||||
dividing the vcpu id by the number of vcpus per vcore. The vcpus in a
|
||||
given vcore will always be in the same physical core as each other
|
||||
(though that might be a different physical core from time to time).
|
||||
Userspace can control the threading (SMT) mode of the guest by its
|
||||
allocation of vcpu ids. For example, if userspace wants
|
||||
single-threaded guest vcpus, it should make all vcpu ids be a multiple
|
||||
of the number of vcpus per vcore.
|
||||
|
||||
On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
|
||||
threads in one or more virtual CPU cores. (This is because the
|
||||
@ -1633,3 +1653,50 @@ developer registration required to access it).
|
||||
char padding[256];
|
||||
};
|
||||
};
|
||||
|
||||
6. Capabilities that can be enabled
|
||||
|
||||
There are certain capabilities that change the behavior of the virtual CPU when
|
||||
enabled. To enable them, please see section 4.37. Below you can find a list of
|
||||
capabilities and what their effect on the vCPU is when enabling them.
|
||||
|
||||
The following information is provided along with the description:
|
||||
|
||||
Architectures: which instruction set architectures provide this ioctl.
|
||||
x86 includes both i386 and x86_64.
|
||||
|
||||
Parameters: what parameters are accepted by the capability.
|
||||
|
||||
Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
|
||||
are not detailed, but errors with specific meanings are.
|
||||
|
||||
6.1 KVM_CAP_PPC_OSI
|
||||
|
||||
Architectures: ppc
|
||||
Parameters: none
|
||||
Returns: 0 on success; -1 on error
|
||||
|
||||
This capability enables interception of OSI hypercalls that otherwise would
|
||||
be treated as normal system calls to be injected into the guest. OSI hypercalls
|
||||
were invented by Mac-on-Linux to have a standardized communication mechanism
|
||||
between the guest and the host.
|
||||
|
||||
When this capability is enabled, KVM_EXIT_OSI can occur.
|
||||
|
||||
6.2 KVM_CAP_PPC_PAPR
|
||||
|
||||
Architectures: ppc
|
||||
Parameters: none
|
||||
Returns: 0 on success; -1 on error
|
||||
|
||||
This capability enables interception of PAPR hypercalls. PAPR hypercalls are
|
||||
done using the hypercall instruction "sc 1".
|
||||
|
||||
It also sets the guest privilege level to "supervisor" mode. Usually the guest
|
||||
runs in "hypervisor" privilege mode with a few missing features.
|
||||
|
||||
In addition to the above, it changes the semantics of SDR1. In this mode, the
|
||||
HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the
|
||||
HTAB invisible to the guest.
|
||||
|
||||
When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur.
|
||||
|
@ -148,6 +148,12 @@ struct kvm_regs {
|
||||
#define KVM_SREGS_E_UPDATE_DEC (1 << 2)
|
||||
#define KVM_SREGS_E_UPDATE_DBSR (1 << 3)
|
||||
|
||||
/*
|
||||
* Book3S special bits to indicate contents in the struct by maintaining
|
||||
* backwards compatibility with older structs. If adding a new field,
|
||||
* please make sure to add a flag for that new field */
|
||||
#define KVM_SREGS_S_HIOR (1 << 0)
|
||||
|
||||
/*
|
||||
* In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
|
||||
* previous KVM_GET_REGS.
|
||||
@ -173,6 +179,8 @@ struct kvm_sregs {
|
||||
__u64 ibat[8];
|
||||
__u64 dbat[8];
|
||||
} ppc32;
|
||||
__u64 flags; /* KVM_SREGS_S_ */
|
||||
__u64 hior;
|
||||
} s;
|
||||
struct {
|
||||
union {
|
||||
@ -276,6 +284,11 @@ struct kvm_guest_debug_arch {
|
||||
#define KVM_INTERRUPT_UNSET -2U
|
||||
#define KVM_INTERRUPT_SET_LEVEL -3U
|
||||
|
||||
#define KVM_CPU_440 1
|
||||
#define KVM_CPU_E500V2 2
|
||||
#define KVM_CPU_3S_32 3
|
||||
#define KVM_CPU_3S_64 4
|
||||
|
||||
/* for KVM_CAP_SPAPR_TCE */
|
||||
struct kvm_create_spapr_tce {
|
||||
__u64 liobn;
|
||||
|
@ -90,6 +90,8 @@ struct kvmppc_vcpu_book3s {
|
||||
#endif
|
||||
int context_id[SID_CONTEXTS];
|
||||
|
||||
bool hior_sregs; /* HIOR is set by SREGS, not PVR */
|
||||
|
||||
struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
|
||||
struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
|
||||
struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
|
||||
@ -139,15 +141,14 @@ extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
|
||||
extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
||||
extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
|
||||
extern void kvmppc_handler_lowmem_trampoline(void);
|
||||
extern void kvmppc_handler_trampoline_enter(void);
|
||||
extern void kvmppc_rmcall(ulong srr0, ulong srr1);
|
||||
extern void kvmppc_entry_trampoline(void);
|
||||
extern void kvmppc_hv_entry_trampoline(void);
|
||||
extern void kvmppc_load_up_fpu(void);
|
||||
extern void kvmppc_load_up_altivec(void);
|
||||
extern void kvmppc_load_up_vsx(void);
|
||||
extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
|
||||
extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
|
||||
extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
|
||||
|
||||
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -382,6 +383,39 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
|
||||
unsigned long pte_index)
|
||||
{
|
||||
unsigned long rb, va_low;
|
||||
|
||||
rb = (v & ~0x7fUL) << 16; /* AVA field */
|
||||
va_low = pte_index >> 3;
|
||||
if (v & HPTE_V_SECONDARY)
|
||||
va_low = ~va_low;
|
||||
/* xor vsid from AVA */
|
||||
if (!(v & HPTE_V_1TB_SEG))
|
||||
va_low ^= v >> 12;
|
||||
else
|
||||
va_low ^= v >> 24;
|
||||
va_low &= 0x7ff;
|
||||
if (v & HPTE_V_LARGE) {
|
||||
rb |= 1; /* L field */
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
|
||||
(r & 0xff000)) {
|
||||
/* non-16MB large page, must be 64k */
|
||||
/* (masks depend on page size) */
|
||||
rb |= 0x1000; /* page encoding in LP field */
|
||||
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
|
||||
rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
|
||||
}
|
||||
} else {
|
||||
/* 4kB page */
|
||||
rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
|
||||
}
|
||||
rb |= (v >> 54) & 0x300; /* B field */
|
||||
return rb;
|
||||
}
|
||||
|
||||
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
|
||||
* instruction for the OSI hypercalls */
|
||||
#define OSI_SC_MAGIC_R3 0x113724FA
|
||||
|
@ -75,6 +75,8 @@ struct kvmppc_host_state {
|
||||
ulong scratch0;
|
||||
ulong scratch1;
|
||||
u8 in_guest;
|
||||
u8 restore_hid5;
|
||||
u8 napping;
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
struct kvm_vcpu *kvm_vcpu;
|
||||
|
@ -198,21 +198,29 @@ struct kvm_arch {
|
||||
*/
|
||||
struct kvmppc_vcore {
|
||||
int n_runnable;
|
||||
int n_blocked;
|
||||
int n_busy;
|
||||
int num_threads;
|
||||
int entry_exit_count;
|
||||
int n_woken;
|
||||
int nap_count;
|
||||
int napping_threads;
|
||||
u16 pcpu;
|
||||
u8 vcore_running;
|
||||
u8 vcore_state;
|
||||
u8 in_guest;
|
||||
struct list_head runnable_threads;
|
||||
spinlock_t lock;
|
||||
wait_queue_head_t wq;
|
||||
};
|
||||
|
||||
#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
|
||||
#define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8)
|
||||
|
||||
/* Values for vcore_state */
|
||||
#define VCORE_INACTIVE 0
|
||||
#define VCORE_RUNNING 1
|
||||
#define VCORE_EXITING 2
|
||||
#define VCORE_SLEEPING 3
|
||||
|
||||
struct kvmppc_pte {
|
||||
ulong eaddr;
|
||||
u64 vpage;
|
||||
@ -258,14 +266,6 @@ struct kvm_vcpu_arch {
|
||||
ulong host_stack;
|
||||
u32 host_pid;
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
ulong host_msr;
|
||||
ulong host_r2;
|
||||
void *host_retip;
|
||||
ulong trampoline_lowmem;
|
||||
ulong trampoline_enter;
|
||||
ulong highmem_handler;
|
||||
ulong rmcall;
|
||||
ulong host_paca_phys;
|
||||
struct kvmppc_slb slb[64];
|
||||
int slb_max; /* 1 + index of last valid entry in slb[] */
|
||||
int slb_nr; /* total number of entries in SLB */
|
||||
@ -389,6 +389,9 @@ struct kvm_vcpu_arch {
|
||||
u8 dcr_is_write;
|
||||
u8 osi_needed;
|
||||
u8 osi_enabled;
|
||||
u8 papr_enabled;
|
||||
u8 sane;
|
||||
u8 cpu_type;
|
||||
u8 hcall_needed;
|
||||
|
||||
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
|
||||
@ -408,11 +411,13 @@ struct kvm_vcpu_arch {
|
||||
struct dtl *dtl;
|
||||
struct dtl *dtl_end;
|
||||
|
||||
wait_queue_head_t *wqp;
|
||||
struct kvmppc_vcore *vcore;
|
||||
int ret;
|
||||
int trap;
|
||||
int state;
|
||||
int ptid;
|
||||
bool timer_running;
|
||||
wait_queue_head_t cpu_run;
|
||||
|
||||
struct kvm_vcpu_arch_shared *shared;
|
||||
@ -428,8 +433,9 @@ struct kvm_vcpu_arch {
|
||||
#endif
|
||||
};
|
||||
|
||||
#define KVMPPC_VCPU_BUSY_IN_HOST 0
|
||||
#define KVMPPC_VCPU_BLOCKED 1
|
||||
/* Values for vcpu->arch.state */
|
||||
#define KVMPPC_VCPU_STOPPED 0
|
||||
#define KVMPPC_VCPU_BUSY_IN_HOST 1
|
||||
#define KVMPPC_VCPU_RUNNABLE 2
|
||||
|
||||
#endif /* __POWERPC_KVM_HOST_H__ */
|
||||
|
@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
|
||||
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
||||
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
|
||||
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
|
||||
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* Core-specific hooks */
|
||||
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include <asm/compat.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/hvcall.h>
|
||||
#include <asm/xics.h>
|
||||
#endif
|
||||
#ifdef CONFIG_PPC_ISERIES
|
||||
#include <asm/iseries/alpaca.h>
|
||||
@ -449,8 +450,6 @@ int main(void)
|
||||
#ifdef CONFIG_PPC_BOOK3S
|
||||
DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
|
||||
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
|
||||
DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
|
||||
DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
|
||||
DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
|
||||
DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
|
||||
DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
|
||||
@ -458,14 +457,12 @@ int main(void)
|
||||
DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
|
||||
DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
|
||||
DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
|
||||
DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
|
||||
DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
|
||||
DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
|
||||
DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
|
||||
DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
|
||||
DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
|
||||
DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
|
||||
DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
|
||||
DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
|
||||
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
|
||||
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
|
||||
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
|
||||
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
|
||||
@ -481,6 +478,7 @@ int main(void)
|
||||
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
|
||||
DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
|
||||
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
|
||||
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
|
||||
DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
|
||||
offsetof(struct kvmppc_vcpu_book3s, vcpu));
|
||||
DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
|
||||
@ -537,6 +535,8 @@ int main(void)
|
||||
HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
|
||||
HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
|
||||
HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
|
||||
HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
|
||||
HSTATE_FIELD(HSTATE_NAPPING, napping);
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
|
||||
@ -549,6 +549,7 @@ int main(void)
|
||||
HSTATE_FIELD(HSTATE_DSCR, host_dscr);
|
||||
HSTATE_FIELD(HSTATE_DABR, dabr);
|
||||
HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
|
||||
DEFINE(IPI_PRIORITY, IPI_PRIORITY);
|
||||
#endif /* CONFIG_KVM_BOOK3S_64_HV */
|
||||
|
||||
#else /* CONFIG_PPC_BOOK3S */
|
||||
|
@ -427,16 +427,6 @@ slb_miss_user_pseries:
|
||||
b . /* prevent spec. execution */
|
||||
#endif /* __DISABLED__ */
|
||||
|
||||
/* KVM's trampoline code needs to be close to the interrupt handlers */
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
||||
#ifdef CONFIG_KVM_BOOK3S_PR
|
||||
#include "../kvm/book3s_rmhandlers.S"
|
||||
#else
|
||||
#include "../kvm/book3s_hv_rmhandlers.S"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
.align 7
|
||||
.globl __end_interrupts
|
||||
__end_interrupts:
|
||||
|
@ -78,6 +78,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
|
||||
vcpu_44x->shadow_refs[i].gtlb_index = -1;
|
||||
|
||||
vcpu->arch.cpu_type = KVM_CPU_440;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -43,18 +43,22 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
|
||||
fpu.o \
|
||||
book3s_paired_singles.o \
|
||||
book3s_pr.o \
|
||||
book3s_pr_papr.o \
|
||||
book3s_emulate.o \
|
||||
book3s_interrupts.o \
|
||||
book3s_mmu_hpte.o \
|
||||
book3s_64_mmu_host.o \
|
||||
book3s_64_mmu.o \
|
||||
book3s_32_mmu.o
|
||||
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
|
||||
book3s_rmhandlers.o
|
||||
|
||||
kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
|
||||
book3s_hv.o \
|
||||
book3s_hv_interrupts.o \
|
||||
book3s_64_mmu_hv.o
|
||||
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
|
||||
book3s_hv_rmhandlers.o \
|
||||
book3s_hv_rm_mmu.o \
|
||||
book3s_64_vio_hv.o \
|
||||
book3s_hv_builtin.o
|
||||
|
@ -31,7 +31,7 @@
|
||||
* R1 = host R1
|
||||
* R2 = host R2
|
||||
* R3 = shadow vcpu
|
||||
* all other volatile GPRS = free
|
||||
* all other volatile GPRS = free except R4, R6
|
||||
* SVCPU[CR] = guest CR
|
||||
* SVCPU[XER] = guest XER
|
||||
* SVCPU[CTR] = guest CTR
|
||||
|
@ -128,7 +128,13 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
|
||||
dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n",
|
||||
page, vcpu_book3s->sdr1, pteg, slbe->vsid);
|
||||
|
||||
r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
|
||||
/* When running a PAPR guest, SDR1 contains a HVA address instead
|
||||
of a GPA */
|
||||
if (vcpu_book3s->vcpu.arch.papr_enabled)
|
||||
r = pteg;
|
||||
else
|
||||
r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
|
||||
|
||||
if (kvm_is_error_hva(r))
|
||||
return r;
|
||||
return r | (pteg & ~PAGE_MASK);
|
||||
|
@ -53,7 +53,7 @@ slb_exit_skip_ ## num:
|
||||
* R1 = host R1
|
||||
* R2 = host R2
|
||||
* R3 = shadow vcpu
|
||||
* all other volatile GPRS = free
|
||||
* all other volatile GPRS = free except R4, R6
|
||||
* SVCPU[CR] = guest CR
|
||||
* SVCPU[XER] = guest XER
|
||||
* SVCPU[CTR] = guest CTR
|
||||
|
@ -63,6 +63,25 @@
|
||||
* function pointers, so let's just disable the define. */
|
||||
#undef mfsrin
|
||||
|
||||
enum priv_level {
|
||||
PRIV_PROBLEM = 0,
|
||||
PRIV_SUPER = 1,
|
||||
PRIV_HYPER = 2,
|
||||
};
|
||||
|
||||
static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
|
||||
{
|
||||
/* PAPR VMs only access supervisor SPRs */
|
||||
if (vcpu->arch.papr_enabled && (level > PRIV_SUPER))
|
||||
return false;
|
||||
|
||||
/* Limit user space to its own small SPR set */
|
||||
if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned int inst, int *advance)
|
||||
{
|
||||
@ -296,6 +315,8 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
|
||||
switch (sprn) {
|
||||
case SPRN_SDR1:
|
||||
if (!spr_allowed(vcpu, PRIV_HYPER))
|
||||
goto unprivileged;
|
||||
to_book3s(vcpu)->sdr1 = spr_val;
|
||||
break;
|
||||
case SPRN_DSISR:
|
||||
@ -390,6 +411,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
|
||||
case SPRN_PMC4_GEKKO:
|
||||
case SPRN_WPAR_GEKKO:
|
||||
break;
|
||||
unprivileged:
|
||||
default:
|
||||
printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
|
||||
#ifndef DEBUG_SPR
|
||||
@ -421,6 +443,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
||||
break;
|
||||
}
|
||||
case SPRN_SDR1:
|
||||
if (!spr_allowed(vcpu, PRIV_HYPER))
|
||||
goto unprivileged;
|
||||
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
|
||||
break;
|
||||
case SPRN_DSISR:
|
||||
@ -449,6 +473,10 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
||||
case SPRN_HID5:
|
||||
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
|
||||
break;
|
||||
case SPRN_CFAR:
|
||||
case SPRN_PURR:
|
||||
kvmppc_set_gpr(vcpu, rt, 0);
|
||||
break;
|
||||
case SPRN_GQR0:
|
||||
case SPRN_GQR1:
|
||||
case SPRN_GQR2:
|
||||
@ -476,6 +504,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
||||
kvmppc_set_gpr(vcpu, rt, 0);
|
||||
break;
|
||||
default:
|
||||
unprivileged:
|
||||
printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
|
||||
#ifndef DEBUG_SPR
|
||||
emulated = EMULATE_FAIL;
|
||||
|
@ -23,9 +23,7 @@
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
|
||||
#else
|
||||
EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter);
|
||||
EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline);
|
||||
EXPORT_SYMBOL_GPL(kvmppc_rmcall);
|
||||
EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
|
||||
EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
|
||||
|
@ -62,6 +62,8 @@
|
||||
/* #define EXIT_DEBUG_SIMPLE */
|
||||
/* #define EXIT_DEBUG_INT */
|
||||
|
||||
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
local_paca->kvm_hstate.kvm_vcpu = vcpu;
|
||||
@ -72,40 +74,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
|
||||
static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
|
||||
static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 now;
|
||||
unsigned long dec_nsec;
|
||||
|
||||
now = get_tb();
|
||||
if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
|
||||
kvmppc_core_queue_dec(vcpu);
|
||||
if (vcpu->arch.pending_exceptions)
|
||||
return;
|
||||
if (vcpu->arch.dec_expires != ~(u64)0) {
|
||||
dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
|
||||
tb_ticks_per_sec;
|
||||
hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
|
||||
HRTIMER_MODE_REL);
|
||||
}
|
||||
|
||||
kvmppc_vcpu_blocked(vcpu);
|
||||
|
||||
kvm_vcpu_block(vcpu);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
|
||||
if (vcpu->arch.dec_expires != ~(u64)0)
|
||||
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
|
||||
|
||||
kvmppc_vcpu_unblocked(vcpu);
|
||||
}
|
||||
|
||||
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
|
||||
{
|
||||
vcpu->arch.shregs.msr = msr;
|
||||
kvmppc_end_cede(vcpu);
|
||||
}
|
||||
|
||||
void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
|
||||
@ -257,15 +229,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
|
||||
|
||||
switch (req) {
|
||||
case H_CEDE:
|
||||
vcpu->arch.shregs.msr |= MSR_EE;
|
||||
vcpu->arch.ceded = 1;
|
||||
smp_mb();
|
||||
if (!vcpu->arch.prodded)
|
||||
kvmppc_vcpu_block(vcpu);
|
||||
else
|
||||
vcpu->arch.prodded = 0;
|
||||
smp_mb();
|
||||
vcpu->arch.ceded = 0;
|
||||
break;
|
||||
case H_PROD:
|
||||
target = kvmppc_get_gpr(vcpu, 4);
|
||||
@ -388,20 +351,6 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if (!(r & RESUME_HOST)) {
|
||||
/* To avoid clobbering exit_reason, only check for signals if
|
||||
* we aren't already exiting to userspace for some other
|
||||
* reason. */
|
||||
if (signal_pending(tsk)) {
|
||||
vcpu->stat.signal_exits++;
|
||||
run->exit_reason = KVM_EXIT_INTR;
|
||||
r = -EINTR;
|
||||
} else {
|
||||
kvmppc_core_deliver_interrupts(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -479,13 +428,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
kvmppc_mmu_book3s_hv_init(vcpu);
|
||||
|
||||
/*
|
||||
* Some vcpus may start out in stopped state. If we initialize
|
||||
* them to busy-in-host state they will stop other vcpus in the
|
||||
* vcore from running. Instead we initialize them to blocked
|
||||
* state, effectively considering them to be stopped until we
|
||||
* see the first run ioctl for them.
|
||||
* We consider the vcpu stopped until we see the first run ioctl for it.
|
||||
*/
|
||||
vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
|
||||
vcpu->arch.state = KVMPPC_VCPU_STOPPED;
|
||||
|
||||
init_waitqueue_head(&vcpu->arch.cpu_run);
|
||||
|
||||
@ -496,6 +441,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
if (vcore) {
|
||||
INIT_LIST_HEAD(&vcore->runnable_threads);
|
||||
spin_lock_init(&vcore->lock);
|
||||
init_waitqueue_head(&vcore->wq);
|
||||
}
|
||||
kvm->arch.vcores[core] = vcore;
|
||||
}
|
||||
@ -506,10 +452,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
|
||||
spin_lock(&vcore->lock);
|
||||
++vcore->num_threads;
|
||||
++vcore->n_blocked;
|
||||
spin_unlock(&vcore->lock);
|
||||
vcpu->arch.vcore = vcore;
|
||||
|
||||
vcpu->arch.cpu_type = KVM_CPU_3S_64;
|
||||
kvmppc_sanity_check(vcpu);
|
||||
|
||||
return vcpu;
|
||||
|
||||
free_vcpu:
|
||||
@ -524,30 +472,31 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
|
||||
kfree(vcpu);
|
||||
}
|
||||
|
||||
static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
|
||||
static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
||||
unsigned long dec_nsec, now;
|
||||
|
||||
spin_lock(&vc->lock);
|
||||
vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
|
||||
++vc->n_blocked;
|
||||
if (vc->n_runnable > 0 &&
|
||||
vc->n_runnable + vc->n_blocked == vc->num_threads) {
|
||||
vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
|
||||
arch.run_list);
|
||||
wake_up(&vcpu->arch.cpu_run);
|
||||
now = get_tb();
|
||||
if (now > vcpu->arch.dec_expires) {
|
||||
/* decrementer has already gone negative */
|
||||
kvmppc_core_queue_dec(vcpu);
|
||||
kvmppc_core_deliver_interrupts(vcpu);
|
||||
return;
|
||||
}
|
||||
spin_unlock(&vc->lock);
|
||||
dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
|
||||
/ tb_ticks_per_sec;
|
||||
hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
|
||||
HRTIMER_MODE_REL);
|
||||
vcpu->arch.timer_running = 1;
|
||||
}
|
||||
|
||||
static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
|
||||
static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
||||
|
||||
spin_lock(&vc->lock);
|
||||
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
|
||||
--vc->n_blocked;
|
||||
spin_unlock(&vc->lock);
|
||||
vcpu->arch.ceded = 0;
|
||||
if (vcpu->arch.timer_running) {
|
||||
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
|
||||
vcpu->arch.timer_running = 0;
|
||||
}
|
||||
}
|
||||
|
||||
extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
|
||||
@ -562,6 +511,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
|
||||
return;
|
||||
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
|
||||
--vc->n_runnable;
|
||||
++vc->n_busy;
|
||||
/* decrement the physical thread id of each following vcpu */
|
||||
v = vcpu;
|
||||
list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
|
||||
@ -575,15 +525,20 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
|
||||
struct paca_struct *tpaca;
|
||||
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
||||
|
||||
if (vcpu->arch.timer_running) {
|
||||
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
|
||||
vcpu->arch.timer_running = 0;
|
||||
}
|
||||
cpu = vc->pcpu + vcpu->arch.ptid;
|
||||
tpaca = &paca[cpu];
|
||||
tpaca->kvm_hstate.kvm_vcpu = vcpu;
|
||||
tpaca->kvm_hstate.kvm_vcore = vc;
|
||||
tpaca->kvm_hstate.napping = 0;
|
||||
vcpu->cpu = vc->pcpu;
|
||||
smp_wmb();
|
||||
#ifdef CONFIG_PPC_ICP_NATIVE
|
||||
if (vcpu->arch.ptid) {
|
||||
tpaca->cpu_start = 0x80;
|
||||
tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
|
||||
wmb();
|
||||
xics_wake_cpu(cpu);
|
||||
++vc->n_woken;
|
||||
@ -631,9 +586,10 @@ static int on_primary_thread(void)
|
||||
*/
|
||||
static int kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
{
|
||||
struct kvm_vcpu *vcpu, *vnext;
|
||||
struct kvm_vcpu *vcpu, *vcpu0, *vnext;
|
||||
long ret;
|
||||
u64 now;
|
||||
int ptid;
|
||||
|
||||
/* don't start if any threads have a signal pending */
|
||||
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
|
||||
@ -652,29 +608,50 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Assign physical thread IDs, first to non-ceded vcpus
|
||||
* and then to ceded ones.
|
||||
*/
|
||||
ptid = 0;
|
||||
vcpu0 = NULL;
|
||||
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
|
||||
if (!vcpu->arch.ceded) {
|
||||
if (!ptid)
|
||||
vcpu0 = vcpu;
|
||||
vcpu->arch.ptid = ptid++;
|
||||
}
|
||||
}
|
||||
if (!vcpu0)
|
||||
return 0; /* nothing to run */
|
||||
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
|
||||
if (vcpu->arch.ceded)
|
||||
vcpu->arch.ptid = ptid++;
|
||||
|
||||
vc->n_woken = 0;
|
||||
vc->nap_count = 0;
|
||||
vc->entry_exit_count = 0;
|
||||
vc->vcore_running = 1;
|
||||
vc->vcore_state = VCORE_RUNNING;
|
||||
vc->in_guest = 0;
|
||||
vc->pcpu = smp_processor_id();
|
||||
vc->napping_threads = 0;
|
||||
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
|
||||
kvmppc_start_thread(vcpu);
|
||||
vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
|
||||
arch.run_list);
|
||||
|
||||
spin_unlock(&vc->lock);
|
||||
|
||||
preempt_disable();
|
||||
kvm_guest_enter();
|
||||
__kvmppc_vcore_entry(NULL, vcpu);
|
||||
spin_unlock(&vc->lock);
|
||||
|
||||
kvm_guest_enter();
|
||||
__kvmppc_vcore_entry(NULL, vcpu0);
|
||||
|
||||
/* wait for secondary threads to finish writing their state to memory */
|
||||
spin_lock(&vc->lock);
|
||||
/* disable sending of IPIs on virtual external irqs */
|
||||
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
|
||||
vcpu->cpu = -1;
|
||||
/* wait for secondary threads to finish writing their state to memory */
|
||||
if (vc->nap_count < vc->n_woken)
|
||||
kvmppc_wait_for_nap(vc);
|
||||
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
|
||||
vc->vcore_running = 2;
|
||||
vc->vcore_state = VCORE_EXITING;
|
||||
spin_unlock(&vc->lock);
|
||||
|
||||
/* make sure updates to secondary vcpu structs are visible now */
|
||||
@ -690,22 +667,26 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
if (now < vcpu->arch.dec_expires &&
|
||||
kvmppc_core_pending_dec(vcpu))
|
||||
kvmppc_core_dequeue_dec(vcpu);
|
||||
if (!vcpu->arch.trap) {
|
||||
if (signal_pending(vcpu->arch.run_task)) {
|
||||
vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
vcpu->arch.ret = -EINTR;
|
||||
}
|
||||
continue; /* didn't get to run */
|
||||
}
|
||||
ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
|
||||
vcpu->arch.run_task);
|
||||
|
||||
ret = RESUME_GUEST;
|
||||
if (vcpu->arch.trap)
|
||||
ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
|
||||
vcpu->arch.run_task);
|
||||
|
||||
vcpu->arch.ret = ret;
|
||||
vcpu->arch.trap = 0;
|
||||
|
||||
if (vcpu->arch.ceded) {
|
||||
if (ret != RESUME_GUEST)
|
||||
kvmppc_end_cede(vcpu);
|
||||
else
|
||||
kvmppc_set_timer(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&vc->lock);
|
||||
out:
|
||||
vc->vcore_running = 0;
|
||||
vc->vcore_state = VCORE_INACTIVE;
|
||||
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
|
||||
arch.run_list) {
|
||||
if (vcpu->arch.ret != RESUME_GUEST) {
|
||||
@ -717,16 +698,145 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
/*
|
||||
* Wait for some other vcpu thread to execute us, and
|
||||
* wake us up when we need to handle something in the host.
|
||||
*/
|
||||
static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
|
||||
{
|
||||
int ptid;
|
||||
int wait_state;
|
||||
struct kvmppc_vcore *vc;
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
/* No need to go into the guest when all we do is going out */
|
||||
prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
|
||||
if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
|
||||
schedule();
|
||||
finish_wait(&vcpu->arch.cpu_run, &wait);
|
||||
}
|
||||
|
||||
/*
|
||||
* All the vcpus in this vcore are idle, so wait for a decrementer
|
||||
* or external interrupt to one of the vcpus. vc->lock is held.
|
||||
*/
|
||||
static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
|
||||
{
|
||||
DEFINE_WAIT(wait);
|
||||
struct kvm_vcpu *v;
|
||||
int all_idle = 1;
|
||||
|
||||
prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
vc->vcore_state = VCORE_SLEEPING;
|
||||
spin_unlock(&vc->lock);
|
||||
list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
|
||||
if (!v->arch.ceded || v->arch.pending_exceptions) {
|
||||
all_idle = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (all_idle)
|
||||
schedule();
|
||||
finish_wait(&vc->wq, &wait);
|
||||
spin_lock(&vc->lock);
|
||||
vc->vcore_state = VCORE_INACTIVE;
|
||||
}
|
||||
|
||||
static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int n_ceded;
|
||||
int prev_state;
|
||||
struct kvmppc_vcore *vc;
|
||||
struct kvm_vcpu *v, *vn;
|
||||
|
||||
kvm_run->exit_reason = 0;
|
||||
vcpu->arch.ret = RESUME_GUEST;
|
||||
vcpu->arch.trap = 0;
|
||||
|
||||
/*
|
||||
* Synchronize with other threads in this virtual core
|
||||
*/
|
||||
vc = vcpu->arch.vcore;
|
||||
spin_lock(&vc->lock);
|
||||
vcpu->arch.ceded = 0;
|
||||
vcpu->arch.run_task = current;
|
||||
vcpu->arch.kvm_run = kvm_run;
|
||||
prev_state = vcpu->arch.state;
|
||||
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
|
||||
list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
|
||||
++vc->n_runnable;
|
||||
|
||||
/*
|
||||
* This happens the first time this is called for a vcpu.
|
||||
* If the vcore is already running, we may be able to start
|
||||
* this thread straight away and have it join in.
|
||||
*/
|
||||
if (prev_state == KVMPPC_VCPU_STOPPED) {
|
||||
if (vc->vcore_state == VCORE_RUNNING &&
|
||||
VCORE_EXIT_COUNT(vc) == 0) {
|
||||
vcpu->arch.ptid = vc->n_runnable - 1;
|
||||
kvmppc_start_thread(vcpu);
|
||||
}
|
||||
|
||||
} else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
|
||||
--vc->n_busy;
|
||||
|
||||
while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
|
||||
!signal_pending(current)) {
|
||||
if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
|
||||
spin_unlock(&vc->lock);
|
||||
kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
|
||||
spin_lock(&vc->lock);
|
||||
continue;
|
||||
}
|
||||
n_ceded = 0;
|
||||
list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
|
||||
n_ceded += v->arch.ceded;
|
||||
if (n_ceded == vc->n_runnable)
|
||||
kvmppc_vcore_blocked(vc);
|
||||
else
|
||||
kvmppc_run_core(vc);
|
||||
|
||||
list_for_each_entry_safe(v, vn, &vc->runnable_threads,
|
||||
arch.run_list) {
|
||||
kvmppc_core_deliver_interrupts(v);
|
||||
if (signal_pending(v->arch.run_task)) {
|
||||
kvmppc_remove_runnable(vc, v);
|
||||
v->stat.signal_exits++;
|
||||
v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
v->arch.ret = -EINTR;
|
||||
wake_up(&v->arch.cpu_run);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (signal_pending(current)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
if (vc->vcore_state == VCORE_RUNNING ||
|
||||
vc->vcore_state == VCORE_EXITING) {
|
||||
spin_unlock(&vc->lock);
|
||||
kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
|
||||
spin_lock(&vc->lock);
|
||||
}
|
||||
if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
|
||||
kvmppc_remove_runnable(vc, vcpu);
|
||||
vcpu->stat.signal_exits++;
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
vcpu->arch.ret = -EINTR;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&vc->lock);
|
||||
return vcpu->arch.ret;
|
||||
}
|
||||
|
||||
int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (!vcpu->arch.sane) {
|
||||
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* No need to go into the guest when all we'll do is come back out */
|
||||
if (signal_pending(current)) {
|
||||
run->exit_reason = KVM_EXIT_INTR;
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
@ -734,71 +844,10 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
|
||||
return -EPERM;
|
||||
|
||||
kvm_run->exit_reason = 0;
|
||||
vcpu->arch.ret = RESUME_GUEST;
|
||||
vcpu->arch.trap = 0;
|
||||
|
||||
flush_fp_to_thread(current);
|
||||
flush_altivec_to_thread(current);
|
||||
flush_vsx_to_thread(current);
|
||||
|
||||
/*
|
||||
* Synchronize with other threads in this virtual core
|
||||
*/
|
||||
vc = vcpu->arch.vcore;
|
||||
spin_lock(&vc->lock);
|
||||
/* This happens the first time this is called for a vcpu */
|
||||
if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
|
||||
--vc->n_blocked;
|
||||
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
|
||||
ptid = vc->n_runnable;
|
||||
vcpu->arch.run_task = current;
|
||||
vcpu->arch.kvm_run = kvm_run;
|
||||
vcpu->arch.ptid = ptid;
|
||||
list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
|
||||
++vc->n_runnable;
|
||||
|
||||
wait_state = TASK_INTERRUPTIBLE;
|
||||
while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
|
||||
if (signal_pending(current)) {
|
||||
if (!vc->vcore_running) {
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
vcpu->arch.ret = -EINTR;
|
||||
break;
|
||||
}
|
||||
/* have to wait for vcore to stop executing guest */
|
||||
wait_state = TASK_UNINTERRUPTIBLE;
|
||||
smp_send_reschedule(vc->pcpu);
|
||||
}
|
||||
|
||||
if (!vc->vcore_running &&
|
||||
vc->n_runnable + vc->n_blocked == vc->num_threads) {
|
||||
/* we can run now */
|
||||
if (kvmppc_run_core(vc))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
|
||||
kvmppc_start_thread(vcpu);
|
||||
|
||||
/* wait for other threads to come in, or wait for vcore */
|
||||
prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
|
||||
spin_unlock(&vc->lock);
|
||||
schedule();
|
||||
finish_wait(&vcpu->arch.cpu_run, &wait);
|
||||
spin_lock(&vc->lock);
|
||||
}
|
||||
|
||||
if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
|
||||
kvmppc_remove_runnable(vc, vcpu);
|
||||
spin_unlock(&vc->lock);
|
||||
|
||||
return vcpu->arch.ret;
|
||||
}
|
||||
|
||||
int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
|
||||
|
||||
do {
|
||||
r = kvmppc_run_vcpu(run, vcpu);
|
||||
|
@ -110,39 +110,6 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
|
||||
unsigned long pte_index)
|
||||
{
|
||||
unsigned long rb, va_low;
|
||||
|
||||
rb = (v & ~0x7fUL) << 16; /* AVA field */
|
||||
va_low = pte_index >> 3;
|
||||
if (v & HPTE_V_SECONDARY)
|
||||
va_low = ~va_low;
|
||||
/* xor vsid from AVA */
|
||||
if (!(v & HPTE_V_1TB_SEG))
|
||||
va_low ^= v >> 12;
|
||||
else
|
||||
va_low ^= v >> 24;
|
||||
va_low &= 0x7ff;
|
||||
if (v & HPTE_V_LARGE) {
|
||||
rb |= 1; /* L field */
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
|
||||
(r & 0xff000)) {
|
||||
/* non-16MB large page, must be 64k */
|
||||
/* (masks depend on page size) */
|
||||
rb |= 0x1000; /* page encoding in LP field */
|
||||
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
|
||||
rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
|
||||
}
|
||||
} else {
|
||||
/* 4kB page */
|
||||
rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
|
||||
}
|
||||
rb |= (v >> 54) & 0x300; /* B field */
|
||||
return rb;
|
||||
}
|
||||
|
||||
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
|
||||
|
||||
static inline int try_lock_tlbie(unsigned int *lock)
|
||||
|
@ -20,7 +20,10 @@
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/reg.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/hvcall.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/exception-64s.h>
|
||||
|
||||
@ -49,7 +52,7 @@ kvmppc_skip_Hinterrupt:
|
||||
b .
|
||||
|
||||
/*
|
||||
* Call kvmppc_handler_trampoline_enter in real mode.
|
||||
* Call kvmppc_hv_entry in real mode.
|
||||
* Must be called with interrupts hard-disabled.
|
||||
*
|
||||
* Input Registers:
|
||||
@ -89,6 +92,12 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
|
||||
kvm_start_guest:
|
||||
ld r1,PACAEMERGSP(r13)
|
||||
subi r1,r1,STACK_FRAME_OVERHEAD
|
||||
ld r2,PACATOC(r13)
|
||||
|
||||
/* were we napping due to cede? */
|
||||
lbz r0,HSTATE_NAPPING(r13)
|
||||
cmpwi r0,0
|
||||
bne kvm_end_cede
|
||||
|
||||
/* get vcpu pointer */
|
||||
ld r4, HSTATE_KVM_VCPU(r13)
|
||||
@ -276,15 +285,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
|
||||
cmpwi r0,0
|
||||
beq 20b
|
||||
|
||||
/* Set LPCR. Set the MER bit if there is a pending external irq. */
|
||||
/* Set LPCR and RMOR. */
|
||||
10: ld r8,KVM_LPCR(r9)
|
||||
ld r0,VCPU_PENDING_EXC(r4)
|
||||
li r7,(1 << BOOK3S_IRQPRIO_EXTERNAL)
|
||||
oris r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
|
||||
and. r0,r0,r7
|
||||
beq 11f
|
||||
ori r8,r8,LPCR_MER
|
||||
11: mtspr SPRN_LPCR,r8
|
||||
mtspr SPRN_LPCR,r8
|
||||
ld r8,KVM_RMOR(r9)
|
||||
mtspr SPRN_RMOR,r8
|
||||
isync
|
||||
@ -448,19 +451,50 @@ toc_tlbie_lock:
|
||||
mtctr r6
|
||||
mtxer r7
|
||||
|
||||
/* Move SRR0 and SRR1 into the respective regs */
|
||||
kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
|
||||
ld r6, VCPU_SRR0(r4)
|
||||
ld r7, VCPU_SRR1(r4)
|
||||
mtspr SPRN_SRR0, r6
|
||||
mtspr SPRN_SRR1, r7
|
||||
|
||||
ld r10, VCPU_PC(r4)
|
||||
ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */
|
||||
|
||||
ld r11, VCPU_MSR(r4) /* r10 = vcpu->arch.msr & ~MSR_HV */
|
||||
rldicl r11, r11, 63 - MSR_HV_LG, 1
|
||||
rotldi r11, r11, 1 + MSR_HV_LG
|
||||
ori r11, r11, MSR_ME
|
||||
|
||||
/* Check if we can deliver an external or decrementer interrupt now */
|
||||
ld r0,VCPU_PENDING_EXC(r4)
|
||||
li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
|
||||
oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
|
||||
and r0,r0,r8
|
||||
cmpdi cr1,r0,0
|
||||
andi. r0,r11,MSR_EE
|
||||
beq cr1,11f
|
||||
BEGIN_FTR_SECTION
|
||||
mfspr r8,SPRN_LPCR
|
||||
ori r8,r8,LPCR_MER
|
||||
mtspr SPRN_LPCR,r8
|
||||
isync
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
|
||||
beq 5f
|
||||
li r0,BOOK3S_INTERRUPT_EXTERNAL
|
||||
12: mr r6,r10
|
||||
mr r10,r0
|
||||
mr r7,r11
|
||||
li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
|
||||
rotldi r11,r11,63
|
||||
b 5f
|
||||
11: beq 5f
|
||||
mfspr r0,SPRN_DEC
|
||||
cmpwi r0,0
|
||||
li r0,BOOK3S_INTERRUPT_DECREMENTER
|
||||
blt 12b
|
||||
|
||||
/* Move SRR0 and SRR1 into the respective regs */
|
||||
5: mtspr SPRN_SRR0, r6
|
||||
mtspr SPRN_SRR1, r7
|
||||
li r0,0
|
||||
stb r0,VCPU_CEDED(r4) /* cancel cede */
|
||||
|
||||
fast_guest_return:
|
||||
mtspr SPRN_HSRR0,r10
|
||||
mtspr SPRN_HSRR1,r11
|
||||
@ -574,21 +608,20 @@ kvmppc_interrupt:
|
||||
/* See if this is something we can handle in real mode */
|
||||
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
|
||||
beq hcall_try_real_mode
|
||||
hcall_real_cont:
|
||||
|
||||
/* Check for mediated interrupts (could be done earlier really ...) */
|
||||
BEGIN_FTR_SECTION
|
||||
cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL
|
||||
bne+ 1f
|
||||
ld r5,VCPU_KVM(r9)
|
||||
ld r5,KVM_LPCR(r5)
|
||||
andi. r0,r11,MSR_EE
|
||||
beq 1f
|
||||
mfspr r5,SPRN_LPCR
|
||||
andi. r0,r5,LPCR_MER
|
||||
bne bounce_ext_interrupt
|
||||
1:
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
|
||||
|
||||
hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
|
||||
/* Save DEC */
|
||||
mfspr r5,SPRN_DEC
|
||||
mftb r6
|
||||
@ -682,7 +715,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
|
||||
slbia
|
||||
ptesync
|
||||
|
||||
hdec_soon:
|
||||
hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */
|
||||
BEGIN_FTR_SECTION
|
||||
b 32f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
|
||||
@ -700,6 +733,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
|
||||
addi r0,r3,0x100
|
||||
stwcx. r0,0,r6
|
||||
bne 41b
|
||||
lwsync
|
||||
|
||||
/*
|
||||
* At this point we have an interrupt that we have to pass
|
||||
@ -713,18 +747,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
|
||||
* interrupt, since the other threads will already be on their
|
||||
* way here in that case.
|
||||
*/
|
||||
cmpwi r3,0x100 /* Are we the first here? */
|
||||
bge 43f
|
||||
cmpwi r3,1 /* Are any other threads in the guest? */
|
||||
ble 43f
|
||||
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
|
||||
beq 40f
|
||||
cmpwi r3,0x100 /* Are we the first here? */
|
||||
bge 40f
|
||||
cmpwi r3,1
|
||||
ble 40f
|
||||
li r0,0
|
||||
mtspr SPRN_HDEC,r0
|
||||
40:
|
||||
/*
|
||||
* Send an IPI to any napping threads, since an HDEC interrupt
|
||||
* doesn't wake CPUs up from nap.
|
||||
*/
|
||||
lwz r3,VCORE_NAPPING_THREADS(r5)
|
||||
lwz r4,VCPU_PTID(r9)
|
||||
li r0,1
|
||||
sldi r0,r0,r4
|
||||
andc. r3,r3,r0 /* no sense IPI'ing ourselves */
|
||||
beq 43f
|
||||
mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
|
||||
subf r6,r4,r13
|
||||
42: andi. r0,r3,1
|
||||
beq 44f
|
||||
ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
|
||||
li r0,IPI_PRIORITY
|
||||
li r7,XICS_QIRR
|
||||
stbcix r0,r7,r8 /* trigger the IPI */
|
||||
44: srdi. r3,r3,1
|
||||
addi r6,r6,PACA_SIZE
|
||||
bne 42b
|
||||
|
||||
/* Secondary threads wait for primary to do partition switch */
|
||||
ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
|
||||
43: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */
|
||||
ld r5,HSTATE_KVM_VCORE(r13)
|
||||
lwz r3,VCPU_PTID(r9)
|
||||
cmpwi r3,0
|
||||
@ -1077,7 +1132,6 @@ hcall_try_real_mode:
|
||||
hcall_real_fallback:
|
||||
li r12,BOOK3S_INTERRUPT_SYSCALL
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
ld r11, VCPU_MSR(r9)
|
||||
|
||||
b hcall_real_cont
|
||||
|
||||
@ -1139,7 +1193,7 @@ hcall_real_table:
|
||||
.long 0 /* 0xd4 */
|
||||
.long 0 /* 0xd8 */
|
||||
.long 0 /* 0xdc */
|
||||
.long 0 /* 0xe0 */
|
||||
.long .kvmppc_h_cede - hcall_real_table
|
||||
.long 0 /* 0xe4 */
|
||||
.long 0 /* 0xe8 */
|
||||
.long 0 /* 0xec */
|
||||
@ -1168,7 +1222,8 @@ bounce_ext_interrupt:
|
||||
mtspr SPRN_SRR0,r10
|
||||
mtspr SPRN_SRR1,r11
|
||||
li r10,BOOK3S_INTERRUPT_EXTERNAL
|
||||
LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME);
|
||||
li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
|
||||
rotldi r11,r11,63
|
||||
b fast_guest_return
|
||||
|
||||
_GLOBAL(kvmppc_h_set_dabr)
|
||||
@ -1177,6 +1232,178 @@ _GLOBAL(kvmppc_h_set_dabr)
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
_GLOBAL(kvmppc_h_cede)
|
||||
ori r11,r11,MSR_EE
|
||||
std r11,VCPU_MSR(r3)
|
||||
li r0,1
|
||||
stb r0,VCPU_CEDED(r3)
|
||||
sync /* order setting ceded vs. testing prodded */
|
||||
lbz r5,VCPU_PRODDED(r3)
|
||||
cmpwi r5,0
|
||||
bne 1f
|
||||
li r0,0 /* set trap to 0 to say hcall is handled */
|
||||
stw r0,VCPU_TRAP(r3)
|
||||
li r0,H_SUCCESS
|
||||
std r0,VCPU_GPR(r3)(r3)
|
||||
BEGIN_FTR_SECTION
|
||||
b 2f /* just send it up to host on 970 */
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
|
||||
|
||||
/*
|
||||
* Set our bit in the bitmask of napping threads unless all the
|
||||
* other threads are already napping, in which case we send this
|
||||
* up to the host.
|
||||
*/
|
||||
ld r5,HSTATE_KVM_VCORE(r13)
|
||||
lwz r6,VCPU_PTID(r3)
|
||||
lwz r8,VCORE_ENTRY_EXIT(r5)
|
||||
clrldi r8,r8,56
|
||||
li r0,1
|
||||
sld r0,r0,r6
|
||||
addi r6,r5,VCORE_NAPPING_THREADS
|
||||
31: lwarx r4,0,r6
|
||||
or r4,r4,r0
|
||||
popcntw r7,r4
|
||||
cmpw r7,r8
|
||||
bge 2f
|
||||
stwcx. r4,0,r6
|
||||
bne 31b
|
||||
li r0,1
|
||||
stb r0,HSTATE_NAPPING(r13)
|
||||
/* order napping_threads update vs testing entry_exit_count */
|
||||
lwsync
|
||||
mr r4,r3
|
||||
lwz r7,VCORE_ENTRY_EXIT(r5)
|
||||
cmpwi r7,0x100
|
||||
bge 33f /* another thread already exiting */
|
||||
|
||||
/*
|
||||
* Although not specifically required by the architecture, POWER7
|
||||
* preserves the following registers in nap mode, even if an SMT mode
|
||||
* switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
|
||||
* DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
|
||||
*/
|
||||
/* Save non-volatile GPRs */
|
||||
std r14, VCPU_GPR(r14)(r3)
|
||||
std r15, VCPU_GPR(r15)(r3)
|
||||
std r16, VCPU_GPR(r16)(r3)
|
||||
std r17, VCPU_GPR(r17)(r3)
|
||||
std r18, VCPU_GPR(r18)(r3)
|
||||
std r19, VCPU_GPR(r19)(r3)
|
||||
std r20, VCPU_GPR(r20)(r3)
|
||||
std r21, VCPU_GPR(r21)(r3)
|
||||
std r22, VCPU_GPR(r22)(r3)
|
||||
std r23, VCPU_GPR(r23)(r3)
|
||||
std r24, VCPU_GPR(r24)(r3)
|
||||
std r25, VCPU_GPR(r25)(r3)
|
||||
std r26, VCPU_GPR(r26)(r3)
|
||||
std r27, VCPU_GPR(r27)(r3)
|
||||
std r28, VCPU_GPR(r28)(r3)
|
||||
std r29, VCPU_GPR(r29)(r3)
|
||||
std r30, VCPU_GPR(r30)(r3)
|
||||
std r31, VCPU_GPR(r31)(r3)
|
||||
|
||||
/* save FP state */
|
||||
bl .kvmppc_save_fp
|
||||
|
||||
/*
|
||||
* Take a nap until a decrementer or external interrupt occurs,
|
||||
* with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
|
||||
*/
|
||||
li r0,0x80
|
||||
stb r0,PACAPROCSTART(r13)
|
||||
mfspr r5,SPRN_LPCR
|
||||
ori r5,r5,LPCR_PECE0 | LPCR_PECE1
|
||||
mtspr SPRN_LPCR,r5
|
||||
isync
|
||||
li r0, 0
|
||||
std r0, HSTATE_SCRATCH0(r13)
|
||||
ptesync
|
||||
ld r0, HSTATE_SCRATCH0(r13)
|
||||
1: cmpd r0, r0
|
||||
bne 1b
|
||||
nap
|
||||
b .
|
||||
|
||||
kvm_end_cede:
|
||||
/* Woken by external or decrementer interrupt */
|
||||
ld r1, HSTATE_HOST_R1(r13)
|
||||
ld r2, PACATOC(r13)
|
||||
|
||||
/* If we're a secondary thread and we got here by an IPI, ack it */
|
||||
ld r4,HSTATE_KVM_VCPU(r13)
|
||||
lwz r3,VCPU_PTID(r4)
|
||||
cmpwi r3,0
|
||||
beq 27f
|
||||
mfspr r3,SPRN_SRR1
|
||||
rlwinm r3,r3,44-31,0x7 /* extract wake reason field */
|
||||
cmpwi r3,4 /* was it an external interrupt? */
|
||||
bne 27f
|
||||
ld r5, HSTATE_XICS_PHYS(r13)
|
||||
li r0,0xff
|
||||
li r6,XICS_QIRR
|
||||
li r7,XICS_XIRR
|
||||
lwzcix r8,r5,r7 /* ack the interrupt */
|
||||
sync
|
||||
stbcix r0,r5,r6 /* clear it */
|
||||
stwcix r8,r5,r7 /* EOI it */
|
||||
27:
|
||||
/* load up FP state */
|
||||
bl kvmppc_load_fp
|
||||
|
||||
/* Load NV GPRS */
|
||||
ld r14, VCPU_GPR(r14)(r4)
|
||||
ld r15, VCPU_GPR(r15)(r4)
|
||||
ld r16, VCPU_GPR(r16)(r4)
|
||||
ld r17, VCPU_GPR(r17)(r4)
|
||||
ld r18, VCPU_GPR(r18)(r4)
|
||||
ld r19, VCPU_GPR(r19)(r4)
|
||||
ld r20, VCPU_GPR(r20)(r4)
|
||||
ld r21, VCPU_GPR(r21)(r4)
|
||||
ld r22, VCPU_GPR(r22)(r4)
|
||||
ld r23, VCPU_GPR(r23)(r4)
|
||||
ld r24, VCPU_GPR(r24)(r4)
|
||||
ld r25, VCPU_GPR(r25)(r4)
|
||||
ld r26, VCPU_GPR(r26)(r4)
|
||||
ld r27, VCPU_GPR(r27)(r4)
|
||||
ld r28, VCPU_GPR(r28)(r4)
|
||||
ld r29, VCPU_GPR(r29)(r4)
|
||||
ld r30, VCPU_GPR(r30)(r4)
|
||||
ld r31, VCPU_GPR(r31)(r4)
|
||||
|
||||
/* clear our bit in vcore->napping_threads */
|
||||
33: ld r5,HSTATE_KVM_VCORE(r13)
|
||||
lwz r3,VCPU_PTID(r4)
|
||||
li r0,1
|
||||
sld r0,r0,r3
|
||||
addi r6,r5,VCORE_NAPPING_THREADS
|
||||
32: lwarx r7,0,r6
|
||||
andc r7,r7,r0
|
||||
stwcx. r7,0,r6
|
||||
bne 32b
|
||||
li r0,0
|
||||
stb r0,HSTATE_NAPPING(r13)
|
||||
|
||||
/* see if any other thread is already exiting */
|
||||
lwz r0,VCORE_ENTRY_EXIT(r5)
|
||||
cmpwi r0,0x100
|
||||
blt kvmppc_cede_reentry /* if not go back to guest */
|
||||
|
||||
/* some threads are exiting, so go to the guest exit path */
|
||||
b hcall_real_fallback
|
||||
|
||||
/* cede when already previously prodded case */
|
||||
1: li r0,0
|
||||
stb r0,VCPU_PRODDED(r3)
|
||||
sync /* order testing prodded vs. clearing ceded */
|
||||
stb r0,VCPU_CEDED(r3)
|
||||
li r3,H_SUCCESS
|
||||
blr
|
||||
|
||||
/* we've ceded but we want to give control to the host */
|
||||
2: li r3,H_TOO_HARD
|
||||
blr
|
||||
|
||||
secondary_too_late:
|
||||
ld r5,HSTATE_KVM_VCORE(r13)
|
||||
HMT_LOW
|
||||
@ -1194,14 +1421,20 @@ secondary_too_late:
|
||||
slbmte r6,r5
|
||||
1: addi r11,r11,16
|
||||
.endr
|
||||
b 50f
|
||||
|
||||
secondary_nap:
|
||||
/* Clear any pending IPI */
|
||||
50: ld r5, HSTATE_XICS_PHYS(r13)
|
||||
/* Clear any pending IPI - assume we're a secondary thread */
|
||||
ld r5, HSTATE_XICS_PHYS(r13)
|
||||
li r7, XICS_XIRR
|
||||
lwzcix r3, r5, r7 /* ack any pending interrupt */
|
||||
rlwinm. r0, r3, 0, 0xffffff /* any pending? */
|
||||
beq 37f
|
||||
sync
|
||||
li r0, 0xff
|
||||
li r6, XICS_QIRR
|
||||
stbcix r0, r5, r6
|
||||
stbcix r0, r5, r6 /* clear the IPI */
|
||||
stwcix r3, r5, r7 /* EOI it */
|
||||
37: sync
|
||||
|
||||
/* increment the nap count and then go to nap mode */
|
||||
ld r4, HSTATE_KVM_VCORE(r13)
|
||||
@ -1211,13 +1444,12 @@ secondary_nap:
|
||||
addi r3, r3, 1
|
||||
stwcx. r3, 0, r4
|
||||
bne 51b
|
||||
isync
|
||||
|
||||
li r3, LPCR_PECE0
|
||||
mfspr r4, SPRN_LPCR
|
||||
li r0, LPCR_PECE
|
||||
andc r4, r4, r0
|
||||
ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */
|
||||
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
|
||||
mtspr SPRN_LPCR, r4
|
||||
isync
|
||||
li r0, 0
|
||||
std r0, HSTATE_SCRATCH0(r13)
|
||||
ptesync
|
||||
|
@ -29,27 +29,11 @@
|
||||
#define ULONG_SIZE 8
|
||||
#define FUNC(name) GLUE(.,name)
|
||||
|
||||
#define GET_SHADOW_VCPU_R13
|
||||
|
||||
#define DISABLE_INTERRUPTS \
|
||||
mfmsr r0; \
|
||||
rldicl r0,r0,48,1; \
|
||||
rotldi r0,r0,16; \
|
||||
mtmsrd r0,1; \
|
||||
|
||||
#elif defined(CONFIG_PPC_BOOK3S_32)
|
||||
|
||||
#define ULONG_SIZE 4
|
||||
#define FUNC(name) name
|
||||
|
||||
#define GET_SHADOW_VCPU_R13 \
|
||||
lwz r13, (THREAD + THREAD_KVM_SVCPU)(r2)
|
||||
|
||||
#define DISABLE_INTERRUPTS \
|
||||
mfmsr r0; \
|
||||
rlwinm r0,r0,0,17,15; \
|
||||
mtmsr r0; \
|
||||
|
||||
#endif /* CONFIG_PPC_BOOK3S_XX */
|
||||
|
||||
|
||||
@ -108,44 +92,17 @@ kvm_start_entry:
|
||||
|
||||
kvm_start_lightweight:
|
||||
|
||||
GET_SHADOW_VCPU_R13
|
||||
PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4)
|
||||
PPC_STL r3, HSTATE_VMHANDLER(r13)
|
||||
|
||||
PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
|
||||
|
||||
DISABLE_INTERRUPTS
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/* Some guests may need to have dcbz set to 32 byte length.
|
||||
*
|
||||
* Usually we ensure that by patching the guest's instructions
|
||||
* to trap on dcbz and emulate it in the hypervisor.
|
||||
*
|
||||
* If we can, we should tell the CPU to use 32 byte dcbz though,
|
||||
* because that's a lot faster.
|
||||
*/
|
||||
|
||||
PPC_LL r3, VCPU_HFLAGS(r4)
|
||||
rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */
|
||||
beq no_dcbz32_on
|
||||
|
||||
mfspr r3,SPRN_HID5
|
||||
ori r3, r3, 0x80 /* XXX HID5_dcbz32 = 0x80 */
|
||||
mtspr SPRN_HID5,r3
|
||||
|
||||
no_dcbz32_on:
|
||||
|
||||
rldicl r3, r3, 0, 63 /* r3 &= 1 */
|
||||
stb r3, HSTATE_RESTORE_HID5(r13)
|
||||
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||
|
||||
PPC_LL r6, VCPU_RMCALL(r4)
|
||||
mtctr r6
|
||||
|
||||
PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4)
|
||||
LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
|
||||
PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
|
||||
|
||||
/* Jump to segment patching handler and into our guest */
|
||||
bctr
|
||||
bl FUNC(kvmppc_entry_trampoline)
|
||||
nop
|
||||
|
||||
/*
|
||||
* This is the handler in module memory. It gets jumped at from the
|
||||
@ -170,21 +127,6 @@ kvmppc_handler_highmem:
|
||||
/* R7 = vcpu */
|
||||
PPC_LL r7, GPR4(r1)
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
|
||||
PPC_LL r5, VCPU_HFLAGS(r7)
|
||||
rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */
|
||||
beq no_dcbz32_off
|
||||
|
||||
li r4, 0
|
||||
mfspr r5,SPRN_HID5
|
||||
rldimi r5,r4,6,56
|
||||
mtspr SPRN_HID5,r5
|
||||
|
||||
no_dcbz32_off:
|
||||
|
||||
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||
|
||||
PPC_STL r14, VCPU_GPR(r14)(r7)
|
||||
PPC_STL r15, VCPU_GPR(r15)(r7)
|
||||
PPC_STL r16, VCPU_GPR(r16)(r7)
|
||||
@ -204,67 +146,6 @@ no_dcbz32_off:
|
||||
PPC_STL r30, VCPU_GPR(r30)(r7)
|
||||
PPC_STL r31, VCPU_GPR(r31)(r7)
|
||||
|
||||
/* Restore host msr -> SRR1 */
|
||||
PPC_LL r6, VCPU_HOST_MSR(r7)
|
||||
|
||||
/*
|
||||
* For some interrupts, we need to call the real Linux
|
||||
* handler, so it can do work for us. This has to happen
|
||||
* as if the interrupt arrived from the kernel though,
|
||||
* so let's fake it here where most state is restored.
|
||||
*
|
||||
* Call Linux for hardware interrupts/decrementer
|
||||
* r3 = address of interrupt handler (exit reason)
|
||||
*/
|
||||
|
||||
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
|
||||
beq call_linux_handler
|
||||
cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
|
||||
beq call_linux_handler
|
||||
cmpwi r12, BOOK3S_INTERRUPT_PERFMON
|
||||
beq call_linux_handler
|
||||
|
||||
/* Back to EE=1 */
|
||||
mtmsr r6
|
||||
sync
|
||||
b kvm_return_point
|
||||
|
||||
call_linux_handler:
|
||||
|
||||
/*
|
||||
* If we land here we need to jump back to the handler we
|
||||
* came from.
|
||||
*
|
||||
* We have a page that we can access from real mode, so let's
|
||||
* jump back to that and use it as a trampoline to get back into the
|
||||
* interrupt handler!
|
||||
*
|
||||
* R3 still contains the exit code,
|
||||
* R5 VCPU_HOST_RETIP and
|
||||
* R6 VCPU_HOST_MSR
|
||||
*/
|
||||
|
||||
/* Restore host IP -> SRR0 */
|
||||
PPC_LL r5, VCPU_HOST_RETIP(r7)
|
||||
|
||||
/* XXX Better move to a safe function?
|
||||
* What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
|
||||
|
||||
mtlr r12
|
||||
|
||||
PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7)
|
||||
mtsrr0 r4
|
||||
LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
|
||||
mtsrr1 r3
|
||||
|
||||
RFI
|
||||
|
||||
.global kvm_return_point
|
||||
kvm_return_point:
|
||||
|
||||
/* Jump back to lightweight entry if we're supposed to */
|
||||
/* go back into the guest */
|
||||
|
||||
/* Pass the exit number as 3rd argument to kvmppc_handle_exit */
|
||||
mr r5, r12
|
||||
|
||||
|
@ -150,16 +150,22 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
|
||||
kvmppc_mmu_book3s_64_init(vcpu);
|
||||
to_book3s(vcpu)->hior = 0xfff00000;
|
||||
if (!to_book3s(vcpu)->hior_sregs)
|
||||
to_book3s(vcpu)->hior = 0xfff00000;
|
||||
to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
|
||||
vcpu->arch.cpu_type = KVM_CPU_3S_64;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
kvmppc_mmu_book3s_32_init(vcpu);
|
||||
to_book3s(vcpu)->hior = 0;
|
||||
if (!to_book3s(vcpu)->hior_sregs)
|
||||
to_book3s(vcpu)->hior = 0;
|
||||
to_book3s(vcpu)->msr_mask = 0xffffffffULL;
|
||||
vcpu->arch.cpu_type = KVM_CPU_3S_32;
|
||||
}
|
||||
|
||||
kvmppc_sanity_check(vcpu);
|
||||
|
||||
/* If we are in hypervisor level on 970, we can tell the CPU to
|
||||
* treat DCBZ as 32 bytes store */
|
||||
vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
|
||||
@ -646,7 +652,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
}
|
||||
case BOOK3S_INTERRUPT_SYSCALL:
|
||||
if (vcpu->arch.osi_enabled &&
|
||||
if (vcpu->arch.papr_enabled &&
|
||||
(kvmppc_get_last_inst(vcpu) == 0x44000022) &&
|
||||
!(vcpu->arch.shared->msr & MSR_PR)) {
|
||||
/* SC 1 papr hypercalls */
|
||||
ulong cmd = kvmppc_get_gpr(vcpu, 3);
|
||||
int i;
|
||||
|
||||
if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
}
|
||||
|
||||
run->papr_hcall.nr = cmd;
|
||||
for (i = 0; i < 9; ++i) {
|
||||
ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
|
||||
run->papr_hcall.args[i] = gpr;
|
||||
}
|
||||
run->exit_reason = KVM_EXIT_PAPR_HCALL;
|
||||
vcpu->arch.hcall_needed = 1;
|
||||
r = RESUME_HOST;
|
||||
} else if (vcpu->arch.osi_enabled &&
|
||||
(((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
|
||||
(((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
|
||||
/* MOL hypercalls */
|
||||
@ -770,6 +796,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
if (sregs->u.s.flags & KVM_SREGS_S_HIOR)
|
||||
sregs->u.s.hior = to_book3s(vcpu)->hior;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -806,6 +835,11 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
/* Flush the MMU after messing with the segments */
|
||||
kvmppc_mmu_pte_flush(vcpu, 0, 0);
|
||||
|
||||
if (sregs->u.s.flags & KVM_SREGS_S_HIOR) {
|
||||
to_book3s(vcpu)->hior_sregs = true;
|
||||
to_book3s(vcpu)->hior = sregs->u.s.hior;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -841,8 +875,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
if (!p)
|
||||
goto uninit_vcpu;
|
||||
|
||||
vcpu->arch.host_retip = kvm_return_point;
|
||||
vcpu->arch.host_msr = mfmsr();
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/* default to book3s_64 (970fx) */
|
||||
vcpu->arch.pvr = 0x3C0301;
|
||||
@ -853,16 +885,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
|
||||
vcpu->arch.slb_nr = 64;
|
||||
|
||||
/* remember where some real-mode handlers are */
|
||||
vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline);
|
||||
vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter);
|
||||
vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
|
||||
#else
|
||||
vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
|
||||
#endif
|
||||
|
||||
vcpu->arch.shadow_msr = MSR_USER64;
|
||||
|
||||
err = kvmppc_mmu_init(vcpu);
|
||||
@ -908,6 +930,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
ulong ext_msr;
|
||||
|
||||
/* Check if we can run the vcpu at all */
|
||||
if (!vcpu->arch.sane) {
|
||||
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* No need to go into the guest when all we do is going out */
|
||||
if (signal_pending(current)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
|
158
arch/powerpc/kvm/book3s_pr_papr.c
Normal file
158
arch/powerpc/kvm/book3s_pr_papr.c
Normal file
@ -0,0 +1,158 @@
|
||||
/*
|
||||
* Copyright (C) 2011. Freescale Inc. All rights reserved.
|
||||
*
|
||||
* Authors:
|
||||
* Alexander Graf <agraf@suse.de>
|
||||
* Paul Mackerras <paulus@samba.org>
|
||||
*
|
||||
* Description:
|
||||
*
|
||||
* Hypercall handling for running PAPR guests in PR KVM on Book 3S
|
||||
* processors.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License, version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/kvm_book3s.h>
|
||||
|
||||
static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
|
||||
{
|
||||
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
|
||||
unsigned long pteg_addr;
|
||||
|
||||
pte_index <<= 4;
|
||||
pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70;
|
||||
pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
|
||||
pteg_addr |= pte_index;
|
||||
|
||||
return pteg_addr;
|
||||
}
|
||||
|
||||
static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
long flags = kvmppc_get_gpr(vcpu, 4);
|
||||
long pte_index = kvmppc_get_gpr(vcpu, 5);
|
||||
unsigned long pteg[2 * 8];
|
||||
unsigned long pteg_addr, i, *hpte;
|
||||
|
||||
pte_index &= ~7UL;
|
||||
pteg_addr = get_pteg_addr(vcpu, pte_index);
|
||||
|
||||
copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
|
||||
hpte = pteg;
|
||||
|
||||
if (likely((flags & H_EXACT) == 0)) {
|
||||
pte_index &= ~7UL;
|
||||
for (i = 0; ; ++i) {
|
||||
if (i == 8)
|
||||
return H_PTEG_FULL;
|
||||
if ((*hpte & HPTE_V_VALID) == 0)
|
||||
break;
|
||||
hpte += 2;
|
||||
}
|
||||
} else {
|
||||
i = kvmppc_get_gpr(vcpu, 5) & 7UL;
|
||||
hpte += i * 2;
|
||||
}
|
||||
|
||||
hpte[0] = kvmppc_get_gpr(vcpu, 6);
|
||||
hpte[1] = kvmppc_get_gpr(vcpu, 7);
|
||||
copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg));
|
||||
kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
|
||||
kvmppc_set_gpr(vcpu, 4, pte_index | i);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long flags= kvmppc_get_gpr(vcpu, 4);
|
||||
unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
|
||||
unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
|
||||
unsigned long v = 0, pteg, rb;
|
||||
unsigned long pte[2];
|
||||
|
||||
pteg = get_pteg_addr(vcpu, pte_index);
|
||||
copy_from_user(pte, (void __user *)pteg, sizeof(pte));
|
||||
|
||||
if ((pte[0] & HPTE_V_VALID) == 0 ||
|
||||
((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
|
||||
((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) {
|
||||
kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
copy_to_user((void __user *)pteg, &v, sizeof(v));
|
||||
|
||||
rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
|
||||
vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
|
||||
|
||||
kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
|
||||
kvmppc_set_gpr(vcpu, 4, pte[0]);
|
||||
kvmppc_set_gpr(vcpu, 5, pte[1]);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long flags = kvmppc_get_gpr(vcpu, 4);
|
||||
unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
|
||||
unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
|
||||
unsigned long rb, pteg, r, v;
|
||||
unsigned long pte[2];
|
||||
|
||||
pteg = get_pteg_addr(vcpu, pte_index);
|
||||
copy_from_user(pte, (void __user *)pteg, sizeof(pte));
|
||||
|
||||
if ((pte[0] & HPTE_V_VALID) == 0 ||
|
||||
((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) {
|
||||
kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
v = pte[0];
|
||||
r = pte[1];
|
||||
r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI |
|
||||
HPTE_R_KEY_LO);
|
||||
r |= (flags << 55) & HPTE_R_PP0;
|
||||
r |= (flags << 48) & HPTE_R_KEY_HI;
|
||||
r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
|
||||
|
||||
pte[1] = r;
|
||||
|
||||
rb = compute_tlbie_rb(v, r, pte_index);
|
||||
vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
|
||||
copy_to_user((void __user *)pteg, pte, sizeof(pte));
|
||||
|
||||
kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
|
||||
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
|
||||
{
|
||||
switch (cmd) {
|
||||
case H_ENTER:
|
||||
return kvmppc_h_pr_enter(vcpu);
|
||||
case H_REMOVE:
|
||||
return kvmppc_h_pr_remove(vcpu);
|
||||
case H_PROTECT:
|
||||
return kvmppc_h_pr_protect(vcpu);
|
||||
case H_BULK_REMOVE:
|
||||
/* We just flush all PTEs, so user space can
|
||||
handle the HPT modifications */
|
||||
kvmppc_mmu_pte_flush(vcpu, 0, 0);
|
||||
break;
|
||||
case H_CEDE:
|
||||
kvm_vcpu_block(vcpu);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
return EMULATE_DONE;
|
||||
}
|
||||
|
||||
return EMULATE_FAIL;
|
||||
}
|
@ -20,6 +20,7 @@
|
||||
#include <asm/ppc_asm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/reg.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
@ -35,10 +36,10 @@
|
||||
|
||||
#if defined(CONFIG_PPC_BOOK3S_64)
|
||||
|
||||
#define LOAD_SHADOW_VCPU(reg) GET_PACA(reg)
|
||||
#define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR)
|
||||
#define FUNC(name) GLUE(.,name)
|
||||
#define MTMSR_EERI(reg) mtmsrd (reg),1
|
||||
|
||||
.globl kvmppc_skip_interrupt
|
||||
kvmppc_skip_interrupt:
|
||||
/*
|
||||
* Here all GPRs are unchanged from when the interrupt happened
|
||||
@ -51,6 +52,7 @@ kvmppc_skip_interrupt:
|
||||
rfid
|
||||
b .
|
||||
|
||||
.globl kvmppc_skip_Hinterrupt
|
||||
kvmppc_skip_Hinterrupt:
|
||||
/*
|
||||
* Here all GPRs are unchanged from when the interrupt happened
|
||||
@ -65,8 +67,8 @@ kvmppc_skip_Hinterrupt:
|
||||
|
||||
#elif defined(CONFIG_PPC_BOOK3S_32)
|
||||
|
||||
#define MSR_NOIRQ MSR_KERNEL
|
||||
#define FUNC(name) name
|
||||
#define MTMSR_EERI(reg) mtmsr (reg)
|
||||
|
||||
.macro INTERRUPT_TRAMPOLINE intno
|
||||
|
||||
@ -167,40 +169,24 @@ kvmppc_handler_skip_ins:
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This trampoline brings us back to a real mode handler
|
||||
*
|
||||
* Input Registers:
|
||||
*
|
||||
* R5 = SRR0
|
||||
* R6 = SRR1
|
||||
* LR = real-mode IP
|
||||
* Call kvmppc_handler_trampoline_enter in real mode
|
||||
*
|
||||
* On entry, r4 contains the guest shadow MSR
|
||||
*/
|
||||
.global kvmppc_handler_lowmem_trampoline
|
||||
kvmppc_handler_lowmem_trampoline:
|
||||
_GLOBAL(kvmppc_entry_trampoline)
|
||||
mfmsr r5
|
||||
LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
|
||||
toreal(r7)
|
||||
|
||||
mtsrr0 r5
|
||||
li r9, MSR_RI
|
||||
ori r9, r9, MSR_EE
|
||||
andc r9, r5, r9 /* Clear EE and RI in MSR value */
|
||||
li r6, MSR_IR | MSR_DR
|
||||
ori r6, r6, MSR_EE
|
||||
andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */
|
||||
MTMSR_EERI(r9) /* Clear EE and RI in MSR */
|
||||
mtsrr0 r7 /* before we set srr0/1 */
|
||||
mtsrr1 r6
|
||||
blr
|
||||
kvmppc_handler_lowmem_trampoline_end:
|
||||
|
||||
/*
|
||||
* Call a function in real mode
|
||||
*
|
||||
* Input Registers:
|
||||
*
|
||||
* R3 = function
|
||||
* R4 = MSR
|
||||
* R5 = scratch register
|
||||
*
|
||||
*/
|
||||
_GLOBAL(kvmppc_rmcall)
|
||||
LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ)
|
||||
mtmsr r5 /* Disable relocation and interrupts, so mtsrr
|
||||
doesn't get interrupted */
|
||||
sync
|
||||
mtsrr0 r3
|
||||
mtsrr1 r4
|
||||
RFI
|
||||
|
||||
#if defined(CONFIG_PPC_BOOK3S_32)
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
#define GET_SHADOW_VCPU(reg) \
|
||||
mr reg, r13
|
||||
#define MTMSR_EERI(reg) mtmsrd (reg),1
|
||||
|
||||
#elif defined(CONFIG_PPC_BOOK3S_32)
|
||||
|
||||
@ -30,6 +31,7 @@
|
||||
tophys(reg, r2); \
|
||||
lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \
|
||||
tophys(reg, reg)
|
||||
#define MTMSR_EERI(reg) mtmsr (reg)
|
||||
|
||||
#endif
|
||||
|
||||
@ -57,10 +59,12 @@ kvmppc_handler_trampoline_enter:
|
||||
/* Required state:
|
||||
*
|
||||
* MSR = ~IR|DR
|
||||
* R13 = PACA
|
||||
* R1 = host R1
|
||||
* R2 = host R2
|
||||
* R10 = guest MSR
|
||||
* R4 = guest shadow MSR
|
||||
* R5 = normal host MSR
|
||||
* R6 = current host MSR (EE, IR, DR off)
|
||||
* LR = highmem guest exit code
|
||||
* all other volatile GPRS = free
|
||||
* SVCPU[CR] = guest CR
|
||||
* SVCPU[XER] = guest XER
|
||||
@ -71,15 +75,15 @@ kvmppc_handler_trampoline_enter:
|
||||
/* r3 = shadow vcpu */
|
||||
GET_SHADOW_VCPU(r3)
|
||||
|
||||
/* Save guest exit handler address and MSR */
|
||||
mflr r0
|
||||
PPC_STL r0, HSTATE_VMHANDLER(r3)
|
||||
PPC_STL r5, HSTATE_HOST_MSR(r3)
|
||||
|
||||
/* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
|
||||
PPC_STL r1, HSTATE_HOST_R1(r3)
|
||||
PPC_STL r2, HSTATE_HOST_R2(r3)
|
||||
|
||||
/* Move SRR0 and SRR1 into the respective regs */
|
||||
PPC_LL r9, SVCPU_PC(r3)
|
||||
mtsrr0 r9
|
||||
mtsrr1 r10
|
||||
|
||||
/* Activate guest mode, so faults get handled by KVM */
|
||||
li r11, KVM_GUEST_MODE_GUEST
|
||||
stb r11, HSTATE_IN_GUEST(r3)
|
||||
@ -87,17 +91,46 @@ kvmppc_handler_trampoline_enter:
|
||||
/* Switch to guest segment. This is subarch specific. */
|
||||
LOAD_GUEST_SEGMENTS
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/* Some guests may need to have dcbz set to 32 byte length.
|
||||
*
|
||||
* Usually we ensure that by patching the guest's instructions
|
||||
* to trap on dcbz and emulate it in the hypervisor.
|
||||
*
|
||||
* If we can, we should tell the CPU to use 32 byte dcbz though,
|
||||
* because that's a lot faster.
|
||||
*/
|
||||
lbz r0, HSTATE_RESTORE_HID5(r3)
|
||||
cmpwi r0, 0
|
||||
beq no_dcbz32_on
|
||||
|
||||
mfspr r0,SPRN_HID5
|
||||
ori r0, r0, 0x80 /* XXX HID5_dcbz32 = 0x80 */
|
||||
mtspr SPRN_HID5,r0
|
||||
no_dcbz32_on:
|
||||
|
||||
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||
|
||||
/* Enter guest */
|
||||
|
||||
PPC_LL r4, SVCPU_CTR(r3)
|
||||
PPC_LL r5, SVCPU_LR(r3)
|
||||
lwz r6, SVCPU_CR(r3)
|
||||
lwz r7, SVCPU_XER(r3)
|
||||
PPC_LL r8, SVCPU_CTR(r3)
|
||||
PPC_LL r9, SVCPU_LR(r3)
|
||||
lwz r10, SVCPU_CR(r3)
|
||||
lwz r11, SVCPU_XER(r3)
|
||||
|
||||
mtctr r4
|
||||
mtlr r5
|
||||
mtcr r6
|
||||
mtxer r7
|
||||
mtctr r8
|
||||
mtlr r9
|
||||
mtcr r10
|
||||
mtxer r11
|
||||
|
||||
/* Move SRR0 and SRR1 into the respective regs */
|
||||
PPC_LL r9, SVCPU_PC(r3)
|
||||
/* First clear RI in our current MSR value */
|
||||
li r0, MSR_RI
|
||||
andc r6, r6, r0
|
||||
MTMSR_EERI(r6)
|
||||
mtsrr0 r9
|
||||
mtsrr1 r4
|
||||
|
||||
PPC_LL r0, SVCPU_R0(r3)
|
||||
PPC_LL r1, SVCPU_R1(r3)
|
||||
@ -213,11 +246,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
|
||||
beq ld_last_inst
|
||||
cmpwi r12, BOOK3S_INTERRUPT_PROGRAM
|
||||
beq ld_last_inst
|
||||
cmpwi r12, BOOK3S_INTERRUPT_SYSCALL
|
||||
beq ld_last_prev_inst
|
||||
cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT
|
||||
beq- ld_last_inst
|
||||
|
||||
b no_ld_last_inst
|
||||
|
||||
ld_last_prev_inst:
|
||||
addi r3, r3, -4
|
||||
|
||||
ld_last_inst:
|
||||
/* Save off the guest instruction we're at */
|
||||
|
||||
@ -254,6 +292,43 @@ no_ld_last_inst:
|
||||
/* Switch back to host MMU */
|
||||
LOAD_HOST_SEGMENTS
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
|
||||
lbz r5, HSTATE_RESTORE_HID5(r13)
|
||||
cmpwi r5, 0
|
||||
beq no_dcbz32_off
|
||||
|
||||
li r4, 0
|
||||
mfspr r5,SPRN_HID5
|
||||
rldimi r5,r4,6,56
|
||||
mtspr SPRN_HID5,r5
|
||||
|
||||
no_dcbz32_off:
|
||||
|
||||
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||
|
||||
/*
|
||||
* For some interrupts, we need to call the real Linux
|
||||
* handler, so it can do work for us. This has to happen
|
||||
* as if the interrupt arrived from the kernel though,
|
||||
* so let's fake it here where most state is restored.
|
||||
*
|
||||
* Having set up SRR0/1 with the address where we want
|
||||
* to continue with relocation on (potentially in module
|
||||
* space), we either just go straight there with rfi[d],
|
||||
* or we jump to an interrupt handler with bctr if there
|
||||
* is an interrupt to be handled first. In the latter
|
||||
* case, the rfi[d] at the end of the interrupt handler
|
||||
* will get us back to where we want to continue.
|
||||
*/
|
||||
|
||||
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
|
||||
beq 1f
|
||||
cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
|
||||
beq 1f
|
||||
cmpwi r12, BOOK3S_INTERRUPT_PERFMON
|
||||
1: mtctr r12
|
||||
|
||||
/* Register usage at this point:
|
||||
*
|
||||
* R1 = host R1
|
||||
@ -264,13 +339,15 @@ no_ld_last_inst:
|
||||
*
|
||||
*/
|
||||
|
||||
/* RFI into the highmem handler */
|
||||
mfmsr r7
|
||||
ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */
|
||||
mtsrr1 r7
|
||||
/* Load highmem handler address */
|
||||
PPC_LL r6, HSTATE_HOST_MSR(r13)
|
||||
PPC_LL r8, HSTATE_VMHANDLER(r13)
|
||||
|
||||
/* Restore host msr -> SRR1 */
|
||||
mtsrr1 r6
|
||||
/* Load highmem handler address */
|
||||
mtsrr0 r8
|
||||
|
||||
/* RFI into the highmem handler, or jump to interrupt handler */
|
||||
beqctr
|
||||
RFI
|
||||
kvmppc_handler_trampoline_exit_end:
|
||||
|
@ -316,6 +316,11 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!vcpu->arch.sane) {
|
||||
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
kvm_guest_enter();
|
||||
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
|
||||
@ -618,6 +623,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
int r;
|
||||
|
||||
vcpu->arch.pc = 0;
|
||||
vcpu->arch.shared->msr = 0;
|
||||
@ -634,7 +640,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvmppc_init_timing_stats(vcpu);
|
||||
|
||||
return kvmppc_core_vcpu_setup(vcpu);
|
||||
r = kvmppc_core_vcpu_setup(vcpu);
|
||||
kvmppc_sanity_check(vcpu);
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
|
@ -73,6 +73,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
|
||||
vcpu->vcpu_id = 0;
|
||||
|
||||
vcpu->arch.cpu_type = KVM_CPU_E500V2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -39,12 +39,8 @@
|
||||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
{
|
||||
#ifndef CONFIG_KVM_BOOK3S_64_HV
|
||||
return !(v->arch.shared->msr & MSR_WE) ||
|
||||
!!(v->arch.pending_exceptions);
|
||||
#else
|
||||
return !(v->arch.ceded) || !!(v->arch.pending_exceptions);
|
||||
#endif
|
||||
}
|
||||
|
||||
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
|
||||
@ -95,6 +91,31 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r = false;
|
||||
|
||||
/* We have to know what CPU to virtualize */
|
||||
if (!vcpu->arch.pvr)
|
||||
goto out;
|
||||
|
||||
/* PAPR only works with book3s_64 */
|
||||
if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
|
||||
goto out;
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_HV
|
||||
/* HV KVM can only do PAPR mode for now */
|
||||
if (!vcpu->arch.papr_enabled)
|
||||
goto out;
|
||||
#endif
|
||||
|
||||
r = true;
|
||||
|
||||
out:
|
||||
vcpu->arch.sane = r;
|
||||
return r ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
enum emulation_result er;
|
||||
@ -188,6 +209,8 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
case KVM_CAP_PPC_BOOKE_SREGS:
|
||||
#else
|
||||
case KVM_CAP_PPC_SEGSTATE:
|
||||
case KVM_CAP_PPC_HIOR:
|
||||
case KVM_CAP_PPC_PAPR:
|
||||
#endif
|
||||
case KVM_CAP_PPC_UNSET_IRQ:
|
||||
case KVM_CAP_PPC_IRQ_LEVEL:
|
||||
@ -258,6 +281,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
vcpu = kvmppc_core_vcpu_create(kvm, id);
|
||||
vcpu->arch.wqp = &vcpu->wq;
|
||||
if (!IS_ERR(vcpu))
|
||||
kvmppc_create_vcpu_debugfs(vcpu, id);
|
||||
return vcpu;
|
||||
@ -289,8 +313,8 @@ static void kvmppc_decrementer_func(unsigned long data)
|
||||
|
||||
kvmppc_core_queue_dec(vcpu);
|
||||
|
||||
if (waitqueue_active(&vcpu->wq)) {
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
if (waitqueue_active(vcpu->arch.wqp)) {
|
||||
wake_up_interruptible(vcpu->arch.wqp);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
@ -543,13 +567,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
|
||||
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
|
||||
{
|
||||
if (irq->irq == KVM_INTERRUPT_UNSET)
|
||||
if (irq->irq == KVM_INTERRUPT_UNSET) {
|
||||
kvmppc_core_dequeue_external(vcpu, irq);
|
||||
else
|
||||
kvmppc_core_queue_external(vcpu, irq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (waitqueue_active(&vcpu->wq)) {
|
||||
wake_up_interruptible(&vcpu->wq);
|
||||
kvmppc_core_queue_external(vcpu, irq);
|
||||
|
||||
if (waitqueue_active(vcpu->arch.wqp)) {
|
||||
wake_up_interruptible(vcpu->arch.wqp);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
} else if (vcpu->cpu != -1) {
|
||||
smp_send_reschedule(vcpu->cpu);
|
||||
@ -571,11 +597,18 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
||||
r = 0;
|
||||
vcpu->arch.osi_enabled = true;
|
||||
break;
|
||||
case KVM_CAP_PPC_PAPR:
|
||||
r = 0;
|
||||
vcpu->arch.papr_enabled = true;
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!r)
|
||||
r = kvmppc_sanity_check(vcpu);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -119,6 +119,7 @@ struct kvm_vcpu_stat {
|
||||
u32 instruction_lctlg;
|
||||
u32 exit_program_interruption;
|
||||
u32 exit_instr_and_program;
|
||||
u32 deliver_external_call;
|
||||
u32 deliver_emergency_signal;
|
||||
u32 deliver_service_signal;
|
||||
u32 deliver_virtio_interrupt;
|
||||
@ -138,6 +139,7 @@ struct kvm_vcpu_stat {
|
||||
u32 instruction_stfl;
|
||||
u32 instruction_tprot;
|
||||
u32 instruction_sigp_sense;
|
||||
u32 instruction_sigp_external_call;
|
||||
u32 instruction_sigp_emergency;
|
||||
u32 instruction_sigp_stop;
|
||||
u32 instruction_sigp_arch;
|
||||
@ -174,6 +176,10 @@ struct kvm_s390_prefix_info {
|
||||
__u32 address;
|
||||
};
|
||||
|
||||
struct kvm_s390_extcall_info {
|
||||
__u16 code;
|
||||
};
|
||||
|
||||
struct kvm_s390_emerg_info {
|
||||
__u16 code;
|
||||
};
|
||||
@ -186,6 +192,7 @@ struct kvm_s390_interrupt_info {
|
||||
struct kvm_s390_ext_info ext;
|
||||
struct kvm_s390_pgm_info pgm;
|
||||
struct kvm_s390_emerg_info emerg;
|
||||
struct kvm_s390_extcall_info extcall;
|
||||
struct kvm_s390_prefix_info prefix;
|
||||
};
|
||||
};
|
||||
|
@ -38,6 +38,11 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_interrupt_info *inti)
|
||||
{
|
||||
switch (inti->type) {
|
||||
case KVM_S390_INT_EXTERNAL_CALL:
|
||||
if (psw_extint_disabled(vcpu))
|
||||
return 0;
|
||||
if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
|
||||
return 1;
|
||||
case KVM_S390_INT_EMERGENCY:
|
||||
if (psw_extint_disabled(vcpu))
|
||||
return 0;
|
||||
@ -98,6 +103,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_interrupt_info *inti)
|
||||
{
|
||||
switch (inti->type) {
|
||||
case KVM_S390_INT_EXTERNAL_CALL:
|
||||
case KVM_S390_INT_EMERGENCY:
|
||||
case KVM_S390_INT_SERVICE:
|
||||
case KVM_S390_INT_VIRTIO:
|
||||
@ -143,6 +149,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
|
||||
exception = 1;
|
||||
break;
|
||||
|
||||
case KVM_S390_INT_EXTERNAL_CALL:
|
||||
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
|
||||
vcpu->stat.deliver_external_call++;
|
||||
rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code);
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
|
||||
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
|
||||
rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
|
||||
__LC_EXT_NEW_PSW, sizeof(psw_t));
|
||||
if (rc == -EFAULT)
|
||||
exception = 1;
|
||||
break;
|
||||
|
||||
case KVM_S390_INT_SERVICE:
|
||||
VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
|
||||
inti->ext.ext_params);
|
||||
@ -522,6 +550,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
|
||||
break;
|
||||
case KVM_S390_PROGRAM_INT:
|
||||
case KVM_S390_SIGP_STOP:
|
||||
case KVM_S390_INT_EXTERNAL_CALL:
|
||||
case KVM_S390_INT_EMERGENCY:
|
||||
default:
|
||||
kfree(inti);
|
||||
@ -581,6 +610,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
|
||||
break;
|
||||
case KVM_S390_SIGP_STOP:
|
||||
case KVM_S390_RESTART:
|
||||
case KVM_S390_INT_EXTERNAL_CALL:
|
||||
case KVM_S390_INT_EMERGENCY:
|
||||
VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
|
||||
inti->type = s390int->type;
|
||||
|
@ -46,6 +46,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
|
||||
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
|
||||
{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
|
||||
{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
|
||||
{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
|
||||
{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
|
||||
{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
|
||||
@ -64,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
|
||||
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
|
||||
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
|
||||
{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
|
||||
{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
|
||||
{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
|
||||
{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
|
||||
@ -175,6 +177,8 @@ int kvm_arch_init_vm(struct kvm *kvm)
|
||||
if (rc)
|
||||
goto out_err;
|
||||
|
||||
rc = -ENOMEM;
|
||||
|
||||
kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
|
||||
if (!kvm->arch.sca)
|
||||
goto out_err;
|
||||
@ -312,11 +316,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
||||
unsigned int id)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
|
||||
int rc = -ENOMEM;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int rc = -EINVAL;
|
||||
|
||||
if (id >= KVM_MAX_VCPUS)
|
||||
goto out;
|
||||
|
||||
rc = -ENOMEM;
|
||||
|
||||
vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
|
||||
if (!vcpu)
|
||||
goto out_nomem;
|
||||
goto out;
|
||||
|
||||
vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
|
||||
get_zeroed_page(GFP_KERNEL);
|
||||
@ -352,7 +362,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
||||
free_page((unsigned long)(vcpu->arch.sie_block));
|
||||
out_free_cpu:
|
||||
kfree(vcpu);
|
||||
out_nomem:
|
||||
out:
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
|
||||
@ -386,6 +396,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
|
||||
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
|
||||
restore_access_regs(vcpu->arch.guest_acrs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -401,6 +412,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
{
|
||||
memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
|
||||
vcpu->arch.guest_fpregs.fpc = fpu->fpc;
|
||||
restore_fp_regs(&vcpu->arch.guest_fpregs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -87,6 +87,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
|
||||
return -ENOMEM;
|
||||
|
||||
inti->type = KVM_S390_INT_EMERGENCY;
|
||||
inti->emerg.code = vcpu->vcpu_id;
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
li = fi->local_int[cpu_addr];
|
||||
@ -103,9 +104,47 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
|
||||
wake_up_interruptible(&li->wq);
|
||||
spin_unlock_bh(&li->lock);
|
||||
rc = 0; /* order accepted */
|
||||
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
|
||||
unlock:
|
||||
spin_unlock(&fi->lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
|
||||
{
|
||||
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
|
||||
struct kvm_s390_local_interrupt *li;
|
||||
struct kvm_s390_interrupt_info *inti;
|
||||
int rc;
|
||||
|
||||
if (cpu_addr >= KVM_MAX_VCPUS)
|
||||
return 3; /* not operational */
|
||||
|
||||
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
|
||||
if (!inti)
|
||||
return -ENOMEM;
|
||||
|
||||
inti->type = KVM_S390_INT_EXTERNAL_CALL;
|
||||
inti->extcall.code = vcpu->vcpu_id;
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
li = fi->local_int[cpu_addr];
|
||||
if (li == NULL) {
|
||||
rc = 3; /* not operational */
|
||||
kfree(inti);
|
||||
goto unlock;
|
||||
}
|
||||
spin_lock_bh(&li->lock);
|
||||
list_add_tail(&inti->list, &li->list);
|
||||
atomic_set(&li->active, 1);
|
||||
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
|
||||
if (waitqueue_active(&li->wq))
|
||||
wake_up_interruptible(&li->wq);
|
||||
spin_unlock_bh(&li->lock);
|
||||
rc = 0; /* order accepted */
|
||||
VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
|
||||
unlock:
|
||||
spin_unlock(&fi->lock);
|
||||
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -267,6 +306,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
rc = __sigp_sense(vcpu, cpu_addr,
|
||||
&vcpu->arch.guest_gprs[r1]);
|
||||
break;
|
||||
case SIGP_EXTERNAL_CALL:
|
||||
vcpu->stat.instruction_sigp_external_call++;
|
||||
rc = __sigp_external_call(vcpu, cpu_addr);
|
||||
break;
|
||||
case SIGP_EMERGENCY:
|
||||
vcpu->stat.instruction_sigp_emergency++;
|
||||
rc = __sigp_emergency(vcpu, cpu_addr);
|
||||
|
@ -100,7 +100,9 @@
|
||||
#define APIC_TIMER_BASE_CLKIN 0x0
|
||||
#define APIC_TIMER_BASE_TMBASE 0x1
|
||||
#define APIC_TIMER_BASE_DIV 0x2
|
||||
#define APIC_LVT_TIMER_ONESHOT (0 << 17)
|
||||
#define APIC_LVT_TIMER_PERIODIC (1 << 17)
|
||||
#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17)
|
||||
#define APIC_LVT_MASKED (1 << 16)
|
||||
#define APIC_LVT_LEVEL_TRIGGER (1 << 15)
|
||||
#define APIC_LVT_REMOTE_IRR (1 << 14)
|
||||
|
@ -121,6 +121,7 @@
|
||||
#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
|
||||
#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */
|
||||
#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
|
||||
#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */
|
||||
#define X86_FEATURE_AES (4*32+25) /* AES instructions */
|
||||
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
|
||||
#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
|
||||
|
@ -262,7 +262,7 @@ struct x86_emulate_ctxt {
|
||||
struct operand dst;
|
||||
bool has_seg_override;
|
||||
u8 seg_override;
|
||||
unsigned int d;
|
||||
u64 d;
|
||||
int (*execute)(struct x86_emulate_ctxt *ctxt);
|
||||
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
|
||||
/* modrm */
|
||||
@ -275,6 +275,8 @@ struct x86_emulate_ctxt {
|
||||
unsigned long _eip;
|
||||
/* Fields above regs are cleared together. */
|
||||
unsigned long regs[NR_VCPU_REGS];
|
||||
struct operand memop;
|
||||
struct operand *memopp;
|
||||
struct fetch_cache fetch;
|
||||
struct read_cache io_read;
|
||||
struct read_cache mem_read;
|
||||
|
@ -26,7 +26,8 @@
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
#define KVM_MAX_VCPUS 64
|
||||
#define KVM_MAX_VCPUS 254
|
||||
#define KVM_SOFT_MAX_VCPUS 64
|
||||
#define KVM_MEMORY_SLOTS 32
|
||||
/* memory slots that does not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 4
|
||||
@ -264,6 +265,7 @@ struct kvm_mmu {
|
||||
void (*new_cr3)(struct kvm_vcpu *vcpu);
|
||||
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
|
||||
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
|
||||
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
|
||||
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
|
||||
bool prefault);
|
||||
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
|
||||
@ -411,8 +413,9 @@ struct kvm_vcpu_arch {
|
||||
u32 tsc_catchup_mult;
|
||||
s8 tsc_catchup_shift;
|
||||
|
||||
bool nmi_pending;
|
||||
bool nmi_injected;
|
||||
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
|
||||
unsigned nmi_pending; /* NMI queued after currently running handler */
|
||||
bool nmi_injected; /* Trying to inject an NMI this entry */
|
||||
|
||||
struct mtrr_state_type mtrr_state;
|
||||
u32 pat;
|
||||
@ -628,14 +631,13 @@ struct kvm_x86_ops {
|
||||
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
|
||||
|
||||
u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
|
||||
u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu);
|
||||
|
||||
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
|
||||
|
||||
int (*check_intercept)(struct kvm_vcpu *vcpu,
|
||||
struct x86_instruction_info *info,
|
||||
enum x86_intercept_stage stage);
|
||||
|
||||
const struct trace_print_flags *exit_reasons_str;
|
||||
};
|
||||
|
||||
struct kvm_arch_async_pf {
|
||||
@ -672,6 +674,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
|
||||
|
||||
extern bool tdp_enabled;
|
||||
|
||||
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* control of guest tsc rate supported? */
|
||||
extern bool kvm_has_tsc_control;
|
||||
/* minimum supported tsc_khz for guests */
|
||||
|
@ -229,6 +229,8 @@
|
||||
#define MSR_IA32_APICBASE_ENABLE (1<<11)
|
||||
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
|
||||
|
||||
#define MSR_IA32_TSCDEADLINE 0x000006e0
|
||||
|
||||
#define MSR_IA32_UCODE_WRITE 0x00000079
|
||||
#define MSR_IA32_UCODE_REV 0x0000008b
|
||||
|
||||
|
@ -350,6 +350,18 @@ enum vmcs_field {
|
||||
#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
|
||||
|
||||
|
||||
/*
|
||||
* Exit Qualifications for APIC-Access
|
||||
*/
|
||||
#define APIC_ACCESS_OFFSET 0xfff /* 11:0, offset within the APIC page */
|
||||
#define APIC_ACCESS_TYPE 0xf000 /* 15:12, access type */
|
||||
#define TYPE_LINEAR_APIC_INST_READ (0 << 12)
|
||||
#define TYPE_LINEAR_APIC_INST_WRITE (1 << 12)
|
||||
#define TYPE_LINEAR_APIC_INST_FETCH (2 << 12)
|
||||
#define TYPE_LINEAR_APIC_EVENT (3 << 12)
|
||||
#define TYPE_PHYSICAL_APIC_EVENT (10 << 12)
|
||||
#define TYPE_PHYSICAL_APIC_INST (15 << 12)
|
||||
|
||||
/* segment AR */
|
||||
#define SEGMENT_AR_L_MASK (1 << 13)
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -713,14 +713,16 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
|
||||
kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
|
||||
|
||||
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS,
|
||||
KVM_PIT_MEM_LENGTH, &pit->dev);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
|
||||
if (flags & KVM_PIT_SPEAKER_DUMMY) {
|
||||
kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
|
||||
&pit->speaker_dev);
|
||||
KVM_SPEAKER_BASE_ADDRESS, 4,
|
||||
&pit->speaker_dev);
|
||||
if (ret < 0)
|
||||
goto fail_unregister;
|
||||
}
|
||||
|
@ -34,6 +34,9 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include "trace.h"
|
||||
|
||||
#define pr_pic_unimpl(fmt, ...) \
|
||||
pr_err_ratelimited("kvm: pic: " fmt, ## __VA_ARGS__)
|
||||
|
||||
static void pic_irq_request(struct kvm *kvm, int level);
|
||||
|
||||
static void pic_lock(struct kvm_pic *s)
|
||||
@ -306,10 +309,10 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
|
||||
}
|
||||
s->init_state = 1;
|
||||
if (val & 0x02)
|
||||
printk(KERN_ERR "single mode not supported");
|
||||
pr_pic_unimpl("single mode not supported");
|
||||
if (val & 0x08)
|
||||
printk(KERN_ERR
|
||||
"level sensitive irq not supported");
|
||||
pr_pic_unimpl(
|
||||
"level sensitive irq not supported");
|
||||
} else if (val & 0x08) {
|
||||
if (val & 0x04)
|
||||
s->poll = 1;
|
||||
@ -459,22 +462,15 @@ static int picdev_in_range(gpa_t addr)
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
|
||||
{
|
||||
return container_of(dev, struct kvm_pic, dev);
|
||||
}
|
||||
|
||||
static int picdev_write(struct kvm_io_device *this,
|
||||
static int picdev_write(struct kvm_pic *s,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
struct kvm_pic *s = to_pic(this);
|
||||
unsigned char data = *(unsigned char *)val;
|
||||
if (!picdev_in_range(addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (len != 1) {
|
||||
if (printk_ratelimit())
|
||||
printk(KERN_ERR "PIC: non byte write\n");
|
||||
pr_pic_unimpl("non byte write\n");
|
||||
return 0;
|
||||
}
|
||||
pic_lock(s);
|
||||
@ -494,17 +490,15 @@ static int picdev_write(struct kvm_io_device *this,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int picdev_read(struct kvm_io_device *this,
|
||||
static int picdev_read(struct kvm_pic *s,
|
||||
gpa_t addr, int len, void *val)
|
||||
{
|
||||
struct kvm_pic *s = to_pic(this);
|
||||
unsigned char data = 0;
|
||||
if (!picdev_in_range(addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (len != 1) {
|
||||
if (printk_ratelimit())
|
||||
printk(KERN_ERR "PIC: non byte read\n");
|
||||
pr_pic_unimpl("non byte read\n");
|
||||
return 0;
|
||||
}
|
||||
pic_lock(s);
|
||||
@ -525,6 +519,48 @@ static int picdev_read(struct kvm_io_device *this,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int picdev_master_write(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
return picdev_write(container_of(dev, struct kvm_pic, dev_master),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
static int picdev_master_read(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, void *val)
|
||||
{
|
||||
return picdev_read(container_of(dev, struct kvm_pic, dev_master),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
static int picdev_slave_write(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
return picdev_write(container_of(dev, struct kvm_pic, dev_slave),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
static int picdev_slave_read(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, void *val)
|
||||
{
|
||||
return picdev_read(container_of(dev, struct kvm_pic, dev_slave),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
static int picdev_eclr_write(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
static int picdev_eclr_read(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, void *val)
|
||||
{
|
||||
return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
|
||||
addr, len, val);
|
||||
}
|
||||
|
||||
/*
|
||||
* callback when PIC0 irq status changed
|
||||
*/
|
||||
@ -537,9 +573,19 @@ static void pic_irq_request(struct kvm *kvm, int level)
|
||||
s->output = level;
|
||||
}
|
||||
|
||||
static const struct kvm_io_device_ops picdev_ops = {
|
||||
.read = picdev_read,
|
||||
.write = picdev_write,
|
||||
static const struct kvm_io_device_ops picdev_master_ops = {
|
||||
.read = picdev_master_read,
|
||||
.write = picdev_master_write,
|
||||
};
|
||||
|
||||
static const struct kvm_io_device_ops picdev_slave_ops = {
|
||||
.read = picdev_slave_read,
|
||||
.write = picdev_slave_write,
|
||||
};
|
||||
|
||||
static const struct kvm_io_device_ops picdev_eclr_ops = {
|
||||
.read = picdev_eclr_read,
|
||||
.write = picdev_eclr_write,
|
||||
};
|
||||
|
||||
struct kvm_pic *kvm_create_pic(struct kvm *kvm)
|
||||
@ -560,16 +606,39 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
|
||||
/*
|
||||
* Initialize PIO device
|
||||
*/
|
||||
kvm_iodevice_init(&s->dev, &picdev_ops);
|
||||
kvm_iodevice_init(&s->dev_master, &picdev_master_ops);
|
||||
kvm_iodevice_init(&s->dev_slave, &picdev_slave_ops);
|
||||
kvm_iodevice_init(&s->dev_eclr, &picdev_eclr_ops);
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,
|
||||
&s->dev_master);
|
||||
if (ret < 0)
|
||||
goto fail_unlock;
|
||||
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave);
|
||||
if (ret < 0)
|
||||
goto fail_unreg_2;
|
||||
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr);
|
||||
if (ret < 0)
|
||||
goto fail_unreg_1;
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
if (ret < 0) {
|
||||
kfree(s);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return s;
|
||||
|
||||
fail_unreg_1:
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave);
|
||||
|
||||
fail_unreg_2:
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master);
|
||||
|
||||
fail_unlock:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
kfree(s);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void kvm_destroy_pic(struct kvm *kvm)
|
||||
@ -577,7 +646,9 @@ void kvm_destroy_pic(struct kvm *kvm)
|
||||
struct kvm_pic *vpic = kvm->arch.vpic;
|
||||
|
||||
if (vpic) {
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
|
||||
kvm->arch.vpic = NULL;
|
||||
kfree(vpic);
|
||||
}
|
||||
|
@ -66,7 +66,9 @@ struct kvm_pic {
|
||||
struct kvm *kvm;
|
||||
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
|
||||
int output; /* intr from master PIC */
|
||||
struct kvm_io_device dev;
|
||||
struct kvm_io_device dev_master;
|
||||
struct kvm_io_device dev_slave;
|
||||
struct kvm_io_device dev_eclr;
|
||||
void (*ack_notifier)(void *opaque, int irq);
|
||||
unsigned long irq_states[16];
|
||||
};
|
||||
|
@ -45,13 +45,6 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
|
||||
return vcpu->arch.walk_mmu->pdptrs[index];
|
||||
}
|
||||
|
||||
static inline u64 kvm_pdptr_read_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index)
|
||||
{
|
||||
load_pdptrs(vcpu, mmu, mmu->get_cr3(vcpu));
|
||||
|
||||
return mmu->pdptrs[index];
|
||||
}
|
||||
|
||||
static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
|
||||
{
|
||||
ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
|
||||
|
@ -2,6 +2,8 @@
|
||||
struct kvm_timer {
|
||||
struct hrtimer timer;
|
||||
s64 period; /* unit: ns */
|
||||
u32 timer_mode_mask;
|
||||
u64 tscdeadline;
|
||||
atomic_t pending; /* accumulated triggered timers */
|
||||
bool reinject;
|
||||
struct kvm_timer_ops *t_ops;
|
||||
|
@ -68,6 +68,9 @@
|
||||
#define VEC_POS(v) ((v) & (32 - 1))
|
||||
#define REG_POS(v) (((v) >> 5) << 4)
|
||||
|
||||
static unsigned int min_timer_period_us = 500;
|
||||
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
|
||||
|
||||
static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
|
||||
{
|
||||
return *((u32 *) (apic->regs + reg_off));
|
||||
@ -135,9 +138,23 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
|
||||
return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
|
||||
}
|
||||
|
||||
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
|
||||
{
|
||||
return ((apic_get_reg(apic, APIC_LVTT) &
|
||||
apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
|
||||
}
|
||||
|
||||
static inline int apic_lvtt_period(struct kvm_lapic *apic)
|
||||
{
|
||||
return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
|
||||
return ((apic_get_reg(apic, APIC_LVTT) &
|
||||
apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
|
||||
}
|
||||
|
||||
static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
|
||||
{
|
||||
return ((apic_get_reg(apic, APIC_LVTT) &
|
||||
apic->lapic_timer.timer_mode_mask) ==
|
||||
APIC_LVT_TIMER_TSCDEADLINE);
|
||||
}
|
||||
|
||||
static inline int apic_lvt_nmi_mode(u32 lvt_val)
|
||||
@ -166,7 +183,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
|
||||
}
|
||||
|
||||
static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
|
||||
LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
|
||||
LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
|
||||
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
|
||||
LVT_MASK | APIC_MODE_MASK, /* LVTPC */
|
||||
LINT_MASK, LINT_MASK, /* LVT0-1 */
|
||||
@ -316,8 +333,8 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
|
||||
result = 1;
|
||||
break;
|
||||
default:
|
||||
printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n",
|
||||
apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
|
||||
apic_debug("Bad DFR vcpu %d: %08x\n",
|
||||
apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -354,8 +371,8 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
|
||||
result = (target != source);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_WARNING "Bad dest shorthand value %x\n",
|
||||
short_hand);
|
||||
apic_debug("kvm: apic: Bad dest shorthand value %x\n",
|
||||
short_hand);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -401,11 +418,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
break;
|
||||
|
||||
case APIC_DM_REMRD:
|
||||
printk(KERN_DEBUG "Ignoring delivery mode 3\n");
|
||||
apic_debug("Ignoring delivery mode 3\n");
|
||||
break;
|
||||
|
||||
case APIC_DM_SMI:
|
||||
printk(KERN_DEBUG "Ignoring guest SMI\n");
|
||||
apic_debug("Ignoring guest SMI\n");
|
||||
break;
|
||||
|
||||
case APIC_DM_NMI:
|
||||
@ -565,11 +582,13 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
|
||||
val = kvm_apic_id(apic) << 24;
|
||||
break;
|
||||
case APIC_ARBPRI:
|
||||
printk(KERN_WARNING "Access APIC ARBPRI register "
|
||||
"which is for P6\n");
|
||||
apic_debug("Access APIC ARBPRI register which is for P6\n");
|
||||
break;
|
||||
|
||||
case APIC_TMCCT: /* Timer CCR */
|
||||
if (apic_lvtt_tscdeadline(apic))
|
||||
return 0;
|
||||
|
||||
val = apic_get_tmcct(apic);
|
||||
break;
|
||||
|
||||
@ -664,29 +683,40 @@ static void update_divide_count(struct kvm_lapic *apic)
|
||||
|
||||
static void start_apic_timer(struct kvm_lapic *apic)
|
||||
{
|
||||
ktime_t now = apic->lapic_timer.timer.base->get_time();
|
||||
|
||||
apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
|
||||
APIC_BUS_CYCLE_NS * apic->divide_count;
|
||||
ktime_t now;
|
||||
atomic_set(&apic->lapic_timer.pending, 0);
|
||||
|
||||
if (!apic->lapic_timer.period)
|
||||
return;
|
||||
/*
|
||||
* Do not allow the guest to program periodic timers with small
|
||||
* interval, since the hrtimers are not throttled by the host
|
||||
* scheduler.
|
||||
*/
|
||||
if (apic_lvtt_period(apic)) {
|
||||
if (apic->lapic_timer.period < NSEC_PER_MSEC/2)
|
||||
apic->lapic_timer.period = NSEC_PER_MSEC/2;
|
||||
}
|
||||
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
|
||||
/* lapic timer in oneshot or peroidic mode */
|
||||
now = apic->lapic_timer.timer.base->get_time();
|
||||
apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
|
||||
* APIC_BUS_CYCLE_NS * apic->divide_count;
|
||||
|
||||
hrtimer_start(&apic->lapic_timer.timer,
|
||||
ktime_add_ns(now, apic->lapic_timer.period),
|
||||
HRTIMER_MODE_ABS);
|
||||
if (!apic->lapic_timer.period)
|
||||
return;
|
||||
/*
|
||||
* Do not allow the guest to program periodic timers with small
|
||||
* interval, since the hrtimers are not throttled by the host
|
||||
* scheduler.
|
||||
*/
|
||||
if (apic_lvtt_period(apic)) {
|
||||
s64 min_period = min_timer_period_us * 1000LL;
|
||||
|
||||
apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
|
||||
if (apic->lapic_timer.period < min_period) {
|
||||
pr_info_ratelimited(
|
||||
"kvm: vcpu %i: requested %lld ns "
|
||||
"lapic timer period limited to %lld ns\n",
|
||||
apic->vcpu->vcpu_id,
|
||||
apic->lapic_timer.period, min_period);
|
||||
apic->lapic_timer.period = min_period;
|
||||
}
|
||||
}
|
||||
|
||||
hrtimer_start(&apic->lapic_timer.timer,
|
||||
ktime_add_ns(now, apic->lapic_timer.period),
|
||||
HRTIMER_MODE_ABS);
|
||||
|
||||
apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
|
||||
PRIx64 ", "
|
||||
"timer initial count 0x%x, period %lldns, "
|
||||
"expire @ 0x%016" PRIx64 ".\n", __func__,
|
||||
@ -695,6 +725,30 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
||||
apic->lapic_timer.period,
|
||||
ktime_to_ns(ktime_add_ns(now,
|
||||
apic->lapic_timer.period)));
|
||||
} else if (apic_lvtt_tscdeadline(apic)) {
|
||||
/* lapic timer in tsc deadline mode */
|
||||
u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
|
||||
u64 ns = 0;
|
||||
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||
unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu);
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(!tscdeadline || !this_tsc_khz))
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
now = apic->lapic_timer.timer.base->get_time();
|
||||
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
if (likely(tscdeadline > guest_tsc)) {
|
||||
ns = (tscdeadline - guest_tsc) * 1000000ULL;
|
||||
do_div(ns, this_tsc_khz);
|
||||
}
|
||||
hrtimer_start(&apic->lapic_timer.timer,
|
||||
ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
|
||||
@ -782,7 +836,6 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
||||
|
||||
case APIC_LVT0:
|
||||
apic_manage_nmi_watchdog(apic, val);
|
||||
case APIC_LVTT:
|
||||
case APIC_LVTTHMR:
|
||||
case APIC_LVTPC:
|
||||
case APIC_LVT1:
|
||||
@ -796,7 +849,22 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
||||
|
||||
break;
|
||||
|
||||
case APIC_LVTT:
|
||||
if ((apic_get_reg(apic, APIC_LVTT) &
|
||||
apic->lapic_timer.timer_mode_mask) !=
|
||||
(val & apic->lapic_timer.timer_mode_mask))
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
|
||||
if (!apic_sw_enabled(apic))
|
||||
val |= APIC_LVT_MASKED;
|
||||
val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
|
||||
apic_set_reg(apic, APIC_LVTT, val);
|
||||
break;
|
||||
|
||||
case APIC_TMICT:
|
||||
if (apic_lvtt_tscdeadline(apic))
|
||||
break;
|
||||
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
apic_set_reg(apic, APIC_TMICT, val);
|
||||
start_apic_timer(apic);
|
||||
@ -804,14 +872,14 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
||||
|
||||
case APIC_TDCR:
|
||||
if (val & 4)
|
||||
printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val);
|
||||
apic_debug("KVM_WRITE:TDCR %x\n", val);
|
||||
apic_set_reg(apic, APIC_TDCR, val);
|
||||
update_divide_count(apic);
|
||||
break;
|
||||
|
||||
case APIC_ESR:
|
||||
if (apic_x2apic_mode(apic) && val != 0) {
|
||||
printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val);
|
||||
apic_debug("KVM_WRITE:ESR not zero %x\n", val);
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
@ -864,6 +932,15 @@ static int apic_mmio_write(struct kvm_io_device *this,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
if (apic)
|
||||
apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
|
||||
|
||||
void kvm_free_lapic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!vcpu->arch.apic)
|
||||
@ -883,6 +960,32 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
|
||||
*----------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
if (!apic)
|
||||
return 0;
|
||||
|
||||
if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
|
||||
return 0;
|
||||
|
||||
return apic->lapic_timer.tscdeadline;
|
||||
}
|
||||
|
||||
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
if (!apic)
|
||||
return;
|
||||
|
||||
if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
|
||||
return;
|
||||
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
apic->lapic_timer.tscdeadline = data;
|
||||
start_apic_timer(apic);
|
||||
}
|
||||
|
||||
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
@ -26,6 +26,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
|
||||
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
|
||||
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
|
||||
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
|
||||
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
|
||||
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
|
||||
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
|
||||
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
|
||||
@ -41,6 +42,9 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
|
||||
bool kvm_apic_present(struct kvm_vcpu *vcpu);
|
||||
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
|
||||
|
||||
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
|
||||
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
|
||||
|
||||
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
|
||||
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
|
||||
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
|
||||
|
@ -2770,7 +2770,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
|
||||
ASSERT(!VALID_PAGE(root));
|
||||
if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
|
||||
pdptr = kvm_pdptr_read_mmu(vcpu, &vcpu->arch.mmu, i);
|
||||
pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
|
||||
if (!is_present_gpte(pdptr)) {
|
||||
vcpu->arch.mmu.pae_root[i] = 0;
|
||||
continue;
|
||||
@ -3318,6 +3318,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
context->direct_map = true;
|
||||
context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
|
||||
context->get_cr3 = get_cr3;
|
||||
context->get_pdptr = kvm_pdptr_read;
|
||||
context->inject_page_fault = kvm_inject_page_fault;
|
||||
context->nx = is_nx(vcpu);
|
||||
|
||||
@ -3376,6 +3377,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3;
|
||||
vcpu->arch.walk_mmu->get_cr3 = get_cr3;
|
||||
vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read;
|
||||
vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
|
||||
|
||||
return r;
|
||||
@ -3386,6 +3388,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
|
||||
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
|
||||
|
||||
g_context->get_cr3 = get_cr3;
|
||||
g_context->get_pdptr = kvm_pdptr_read;
|
||||
g_context->inject_page_fault = kvm_inject_page_fault;
|
||||
|
||||
/*
|
||||
|
@ -121,16 +121,16 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
|
||||
|
||||
static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
|
||||
{
|
||||
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
|
||||
unsigned long *rmapp;
|
||||
struct kvm_mmu_page *rev_sp;
|
||||
gfn_t gfn;
|
||||
|
||||
|
||||
rev_sp = page_header(__pa(sptep));
|
||||
gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
|
||||
|
||||
if (!gfn_to_memslot(kvm, gfn)) {
|
||||
if (!printk_ratelimit())
|
||||
if (!__ratelimit(&ratelimit_state))
|
||||
return;
|
||||
audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
|
||||
audit_printk(kvm, "index %ld of sp (gfn=%llx)\n",
|
||||
@ -141,7 +141,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
|
||||
|
||||
rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
|
||||
if (!*rmapp) {
|
||||
if (!printk_ratelimit())
|
||||
if (!__ratelimit(&ratelimit_state))
|
||||
return;
|
||||
audit_printk(kvm, "no rmap for writable spte %llx\n",
|
||||
*sptep);
|
||||
|
@ -147,7 +147,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
||||
gfn_t table_gfn;
|
||||
unsigned index, pt_access, uninitialized_var(pte_access);
|
||||
gpa_t pte_gpa;
|
||||
bool eperm;
|
||||
bool eperm, last_gpte;
|
||||
int offset;
|
||||
const int write_fault = access & PFERR_WRITE_MASK;
|
||||
const int user_fault = access & PFERR_USER_MASK;
|
||||
@ -163,7 +163,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
||||
|
||||
#if PTTYPE == 64
|
||||
if (walker->level == PT32E_ROOT_LEVEL) {
|
||||
pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3);
|
||||
pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
|
||||
trace_kvm_mmu_paging_element(pte, walker->level);
|
||||
if (!is_present_gpte(pte))
|
||||
goto error;
|
||||
@ -221,6 +221,17 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
||||
eperm = true;
|
||||
#endif
|
||||
|
||||
last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
|
||||
if (last_gpte) {
|
||||
pte_access = pt_access &
|
||||
FNAME(gpte_access)(vcpu, pte, true);
|
||||
/* check if the kernel is fetching from user page */
|
||||
if (unlikely(pte_access & PT_USER_MASK) &&
|
||||
kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
|
||||
if (fetch_fault && !user_fault)
|
||||
eperm = true;
|
||||
}
|
||||
|
||||
if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
|
||||
int ret;
|
||||
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
|
||||
@ -238,18 +249,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
||||
|
||||
walker->ptes[walker->level - 1] = pte;
|
||||
|
||||
if (FNAME(is_last_gpte)(walker, vcpu, mmu, pte)) {
|
||||
if (last_gpte) {
|
||||
int lvl = walker->level;
|
||||
gpa_t real_gpa;
|
||||
gfn_t gfn;
|
||||
u32 ac;
|
||||
|
||||
/* check if the kernel is fetching from user page */
|
||||
if (unlikely(pte_access & PT_USER_MASK) &&
|
||||
kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
|
||||
if (fetch_fault && !user_fault)
|
||||
eperm = true;
|
||||
|
||||
gfn = gpte_to_gfn_lvl(pte, lvl);
|
||||
gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
|
||||
|
||||
@ -295,7 +300,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
||||
walker->ptes[walker->level - 1] = pte;
|
||||
}
|
||||
|
||||
pte_access = pt_access & FNAME(gpte_access)(vcpu, pte, true);
|
||||
walker->pt_access = pt_access;
|
||||
walker->pte_access = pte_access;
|
||||
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
|
||||
|
@ -1084,7 +1084,6 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
if (npt_enabled) {
|
||||
/* Setup VMCB for Nested Paging */
|
||||
control->nested_ctl = 1;
|
||||
clr_intercept(svm, INTERCEPT_TASK_SWITCH);
|
||||
clr_intercept(svm, INTERCEPT_INVLPG);
|
||||
clr_exception_intercept(svm, PF_VECTOR);
|
||||
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
|
||||
@ -1844,6 +1843,20 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
|
||||
return svm->nested.nested_cr3;
|
||||
}
|
||||
|
||||
static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u64 cr3 = svm->nested.nested_cr3;
|
||||
u64 pdpte;
|
||||
int ret;
|
||||
|
||||
ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte,
|
||||
offset_in_page(cr3) + index * 8, 8);
|
||||
if (ret)
|
||||
return 0;
|
||||
return pdpte;
|
||||
}
|
||||
|
||||
static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
|
||||
unsigned long root)
|
||||
{
|
||||
@ -1875,6 +1888,7 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
|
||||
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
|
||||
vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
|
||||
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
|
||||
vcpu->arch.mmu.shadow_root_level = get_npt_level();
|
||||
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
|
||||
@ -2182,7 +2196,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
vmcb->control.exit_info_1,
|
||||
vmcb->control.exit_info_2,
|
||||
vmcb->control.exit_int_info,
|
||||
vmcb->control.exit_int_info_err);
|
||||
vmcb->control.exit_int_info_err,
|
||||
KVM_ISA_SVM);
|
||||
|
||||
nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
|
||||
if (!nested_vmcb)
|
||||
@ -2894,15 +2909,20 @@ static int cr8_write_interception(struct vcpu_svm *svm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
|
||||
return vmcb->control.tsc_offset +
|
||||
svm_scale_tsc(vcpu, native_read_tsc());
|
||||
}
|
||||
|
||||
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
switch (ecx) {
|
||||
case MSR_IA32_TSC: {
|
||||
struct vmcb *vmcb = get_host_vmcb(svm);
|
||||
|
||||
*data = vmcb->control.tsc_offset +
|
||||
*data = svm->vmcb->control.tsc_offset +
|
||||
svm_scale_tsc(vcpu, native_read_tsc());
|
||||
|
||||
break;
|
||||
@ -3314,8 +3334,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
|
||||
struct kvm_run *kvm_run = vcpu->run;
|
||||
u32 exit_code = svm->vmcb->control.exit_code;
|
||||
|
||||
trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
|
||||
|
||||
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
|
||||
vcpu->arch.cr0 = svm->vmcb->save.cr0;
|
||||
if (npt_enabled)
|
||||
@ -3335,7 +3353,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)
|
||||
svm->vmcb->control.exit_info_1,
|
||||
svm->vmcb->control.exit_info_2,
|
||||
svm->vmcb->control.exit_int_info,
|
||||
svm->vmcb->control.exit_int_info_err);
|
||||
svm->vmcb->control.exit_int_info_err,
|
||||
KVM_ISA_SVM);
|
||||
|
||||
vmexit = nested_svm_exit_special(svm);
|
||||
|
||||
@ -3768,6 +3787,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
|
||||
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
|
||||
|
||||
trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
|
||||
|
||||
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
|
||||
kvm_before_handle_nmi(&svm->vcpu);
|
||||
|
||||
@ -3897,60 +3918,6 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
||||
}
|
||||
}
|
||||
|
||||
static const struct trace_print_flags svm_exit_reasons_str[] = {
|
||||
{ SVM_EXIT_READ_CR0, "read_cr0" },
|
||||
{ SVM_EXIT_READ_CR3, "read_cr3" },
|
||||
{ SVM_EXIT_READ_CR4, "read_cr4" },
|
||||
{ SVM_EXIT_READ_CR8, "read_cr8" },
|
||||
{ SVM_EXIT_WRITE_CR0, "write_cr0" },
|
||||
{ SVM_EXIT_WRITE_CR3, "write_cr3" },
|
||||
{ SVM_EXIT_WRITE_CR4, "write_cr4" },
|
||||
{ SVM_EXIT_WRITE_CR8, "write_cr8" },
|
||||
{ SVM_EXIT_READ_DR0, "read_dr0" },
|
||||
{ SVM_EXIT_READ_DR1, "read_dr1" },
|
||||
{ SVM_EXIT_READ_DR2, "read_dr2" },
|
||||
{ SVM_EXIT_READ_DR3, "read_dr3" },
|
||||
{ SVM_EXIT_WRITE_DR0, "write_dr0" },
|
||||
{ SVM_EXIT_WRITE_DR1, "write_dr1" },
|
||||
{ SVM_EXIT_WRITE_DR2, "write_dr2" },
|
||||
{ SVM_EXIT_WRITE_DR3, "write_dr3" },
|
||||
{ SVM_EXIT_WRITE_DR5, "write_dr5" },
|
||||
{ SVM_EXIT_WRITE_DR7, "write_dr7" },
|
||||
{ SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" },
|
||||
{ SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" },
|
||||
{ SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" },
|
||||
{ SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" },
|
||||
{ SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" },
|
||||
{ SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" },
|
||||
{ SVM_EXIT_INTR, "interrupt" },
|
||||
{ SVM_EXIT_NMI, "nmi" },
|
||||
{ SVM_EXIT_SMI, "smi" },
|
||||
{ SVM_EXIT_INIT, "init" },
|
||||
{ SVM_EXIT_VINTR, "vintr" },
|
||||
{ SVM_EXIT_CPUID, "cpuid" },
|
||||
{ SVM_EXIT_INVD, "invd" },
|
||||
{ SVM_EXIT_HLT, "hlt" },
|
||||
{ SVM_EXIT_INVLPG, "invlpg" },
|
||||
{ SVM_EXIT_INVLPGA, "invlpga" },
|
||||
{ SVM_EXIT_IOIO, "io" },
|
||||
{ SVM_EXIT_MSR, "msr" },
|
||||
{ SVM_EXIT_TASK_SWITCH, "task_switch" },
|
||||
{ SVM_EXIT_SHUTDOWN, "shutdown" },
|
||||
{ SVM_EXIT_VMRUN, "vmrun" },
|
||||
{ SVM_EXIT_VMMCALL, "hypercall" },
|
||||
{ SVM_EXIT_VMLOAD, "vmload" },
|
||||
{ SVM_EXIT_VMSAVE, "vmsave" },
|
||||
{ SVM_EXIT_STGI, "stgi" },
|
||||
{ SVM_EXIT_CLGI, "clgi" },
|
||||
{ SVM_EXIT_SKINIT, "skinit" },
|
||||
{ SVM_EXIT_WBINVD, "wbinvd" },
|
||||
{ SVM_EXIT_MONITOR, "monitor" },
|
||||
{ SVM_EXIT_MWAIT, "mwait" },
|
||||
{ SVM_EXIT_XSETBV, "xsetbv" },
|
||||
{ SVM_EXIT_NPF, "npf" },
|
||||
{ -1, NULL }
|
||||
};
|
||||
|
||||
static int svm_get_lpage_level(void)
|
||||
{
|
||||
return PT_PDPE_LEVEL;
|
||||
@ -4223,7 +4190,6 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.get_mt_mask = svm_get_mt_mask,
|
||||
|
||||
.get_exit_info = svm_get_exit_info,
|
||||
.exit_reasons_str = svm_exit_reasons_str,
|
||||
|
||||
.get_lpage_level = svm_get_lpage_level,
|
||||
|
||||
@ -4239,6 +4205,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.write_tsc_offset = svm_write_tsc_offset,
|
||||
.adjust_tsc_offset = svm_adjust_tsc_offset,
|
||||
.compute_tsc_offset = svm_compute_tsc_offset,
|
||||
.read_l1_tsc = svm_read_l1_tsc,
|
||||
|
||||
.set_tdp_cr3 = set_tdp_cr3,
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
#define _TRACE_KVM_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include <asm/vmx.h>
|
||||
#include <asm/svm.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
@ -181,6 +183,95 @@ TRACE_EVENT(kvm_apic,
|
||||
#define KVM_ISA_VMX 1
|
||||
#define KVM_ISA_SVM 2
|
||||
|
||||
#define VMX_EXIT_REASONS \
|
||||
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
|
||||
{ EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
|
||||
{ EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
|
||||
{ EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
|
||||
{ EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
|
||||
{ EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
|
||||
{ EXIT_REASON_CPUID, "CPUID" }, \
|
||||
{ EXIT_REASON_HLT, "HLT" }, \
|
||||
{ EXIT_REASON_INVLPG, "INVLPG" }, \
|
||||
{ EXIT_REASON_RDPMC, "RDPMC" }, \
|
||||
{ EXIT_REASON_RDTSC, "RDTSC" }, \
|
||||
{ EXIT_REASON_VMCALL, "VMCALL" }, \
|
||||
{ EXIT_REASON_VMCLEAR, "VMCLEAR" }, \
|
||||
{ EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \
|
||||
{ EXIT_REASON_VMPTRLD, "VMPTRLD" }, \
|
||||
{ EXIT_REASON_VMPTRST, "VMPTRST" }, \
|
||||
{ EXIT_REASON_VMREAD, "VMREAD" }, \
|
||||
{ EXIT_REASON_VMRESUME, "VMRESUME" }, \
|
||||
{ EXIT_REASON_VMWRITE, "VMWRITE" }, \
|
||||
{ EXIT_REASON_VMOFF, "VMOFF" }, \
|
||||
{ EXIT_REASON_VMON, "VMON" }, \
|
||||
{ EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \
|
||||
{ EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \
|
||||
{ EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
|
||||
{ EXIT_REASON_MSR_READ, "MSR_READ" }, \
|
||||
{ EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
|
||||
{ EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
|
||||
{ EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
|
||||
{ EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
|
||||
{ EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
|
||||
{ EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
|
||||
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
|
||||
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
|
||||
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
|
||||
{ EXIT_REASON_WBINVD, "WBINVD" }
|
||||
|
||||
#define SVM_EXIT_REASONS \
|
||||
{ SVM_EXIT_READ_CR0, "read_cr0" }, \
|
||||
{ SVM_EXIT_READ_CR3, "read_cr3" }, \
|
||||
{ SVM_EXIT_READ_CR4, "read_cr4" }, \
|
||||
{ SVM_EXIT_READ_CR8, "read_cr8" }, \
|
||||
{ SVM_EXIT_WRITE_CR0, "write_cr0" }, \
|
||||
{ SVM_EXIT_WRITE_CR3, "write_cr3" }, \
|
||||
{ SVM_EXIT_WRITE_CR4, "write_cr4" }, \
|
||||
{ SVM_EXIT_WRITE_CR8, "write_cr8" }, \
|
||||
{ SVM_EXIT_READ_DR0, "read_dr0" }, \
|
||||
{ SVM_EXIT_READ_DR1, "read_dr1" }, \
|
||||
{ SVM_EXIT_READ_DR2, "read_dr2" }, \
|
||||
{ SVM_EXIT_READ_DR3, "read_dr3" }, \
|
||||
{ SVM_EXIT_WRITE_DR0, "write_dr0" }, \
|
||||
{ SVM_EXIT_WRITE_DR1, "write_dr1" }, \
|
||||
{ SVM_EXIT_WRITE_DR2, "write_dr2" }, \
|
||||
{ SVM_EXIT_WRITE_DR3, "write_dr3" }, \
|
||||
{ SVM_EXIT_WRITE_DR5, "write_dr5" }, \
|
||||
{ SVM_EXIT_WRITE_DR7, "write_dr7" }, \
|
||||
{ SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
|
||||
{ SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
|
||||
{ SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
|
||||
{ SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
|
||||
{ SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
|
||||
{ SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
|
||||
{ SVM_EXIT_INTR, "interrupt" }, \
|
||||
{ SVM_EXIT_NMI, "nmi" }, \
|
||||
{ SVM_EXIT_SMI, "smi" }, \
|
||||
{ SVM_EXIT_INIT, "init" }, \
|
||||
{ SVM_EXIT_VINTR, "vintr" }, \
|
||||
{ SVM_EXIT_CPUID, "cpuid" }, \
|
||||
{ SVM_EXIT_INVD, "invd" }, \
|
||||
{ SVM_EXIT_HLT, "hlt" }, \
|
||||
{ SVM_EXIT_INVLPG, "invlpg" }, \
|
||||
{ SVM_EXIT_INVLPGA, "invlpga" }, \
|
||||
{ SVM_EXIT_IOIO, "io" }, \
|
||||
{ SVM_EXIT_MSR, "msr" }, \
|
||||
{ SVM_EXIT_TASK_SWITCH, "task_switch" }, \
|
||||
{ SVM_EXIT_SHUTDOWN, "shutdown" }, \
|
||||
{ SVM_EXIT_VMRUN, "vmrun" }, \
|
||||
{ SVM_EXIT_VMMCALL, "hypercall" }, \
|
||||
{ SVM_EXIT_VMLOAD, "vmload" }, \
|
||||
{ SVM_EXIT_VMSAVE, "vmsave" }, \
|
||||
{ SVM_EXIT_STGI, "stgi" }, \
|
||||
{ SVM_EXIT_CLGI, "clgi" }, \
|
||||
{ SVM_EXIT_SKINIT, "skinit" }, \
|
||||
{ SVM_EXIT_WBINVD, "wbinvd" }, \
|
||||
{ SVM_EXIT_MONITOR, "monitor" }, \
|
||||
{ SVM_EXIT_MWAIT, "mwait" }, \
|
||||
{ SVM_EXIT_XSETBV, "xsetbv" }, \
|
||||
{ SVM_EXIT_NPF, "npf" }
|
||||
|
||||
/*
|
||||
* Tracepoint for kvm guest exit:
|
||||
*/
|
||||
@ -205,8 +296,9 @@ TRACE_EVENT(kvm_exit,
|
||||
),
|
||||
|
||||
TP_printk("reason %s rip 0x%lx info %llx %llx",
|
||||
ftrace_print_symbols_seq(p, __entry->exit_reason,
|
||||
kvm_x86_ops->exit_reasons_str),
|
||||
(__entry->isa == KVM_ISA_VMX) ?
|
||||
__print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) :
|
||||
__print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS),
|
||||
__entry->guest_rip, __entry->info1, __entry->info2)
|
||||
);
|
||||
|
||||
@ -486,9 +578,9 @@ TRACE_EVENT(kvm_nested_intercepts,
|
||||
TRACE_EVENT(kvm_nested_vmexit,
|
||||
TP_PROTO(__u64 rip, __u32 exit_code,
|
||||
__u64 exit_info1, __u64 exit_info2,
|
||||
__u32 exit_int_info, __u32 exit_int_info_err),
|
||||
__u32 exit_int_info, __u32 exit_int_info_err, __u32 isa),
|
||||
TP_ARGS(rip, exit_code, exit_info1, exit_info2,
|
||||
exit_int_info, exit_int_info_err),
|
||||
exit_int_info, exit_int_info_err, isa),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u64, rip )
|
||||
@ -497,6 +589,7 @@ TRACE_EVENT(kvm_nested_vmexit,
|
||||
__field( __u64, exit_info2 )
|
||||
__field( __u32, exit_int_info )
|
||||
__field( __u32, exit_int_info_err )
|
||||
__field( __u32, isa )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@ -506,12 +599,14 @@ TRACE_EVENT(kvm_nested_vmexit,
|
||||
__entry->exit_info2 = exit_info2;
|
||||
__entry->exit_int_info = exit_int_info;
|
||||
__entry->exit_int_info_err = exit_int_info_err;
|
||||
__entry->isa = isa;
|
||||
),
|
||||
TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
|
||||
"ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
|
||||
__entry->rip,
|
||||
ftrace_print_symbols_seq(p, __entry->exit_code,
|
||||
kvm_x86_ops->exit_reasons_str),
|
||||
(__entry->isa == KVM_ISA_VMX) ?
|
||||
__print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) :
|
||||
__print_symbolic(__entry->exit_code, SVM_EXIT_REASONS),
|
||||
__entry->exit_info1, __entry->exit_info2,
|
||||
__entry->exit_int_info, __entry->exit_int_info_err)
|
||||
);
|
||||
@ -522,9 +617,9 @@ TRACE_EVENT(kvm_nested_vmexit,
|
||||
TRACE_EVENT(kvm_nested_vmexit_inject,
|
||||
TP_PROTO(__u32 exit_code,
|
||||
__u64 exit_info1, __u64 exit_info2,
|
||||
__u32 exit_int_info, __u32 exit_int_info_err),
|
||||
__u32 exit_int_info, __u32 exit_int_info_err, __u32 isa),
|
||||
TP_ARGS(exit_code, exit_info1, exit_info2,
|
||||
exit_int_info, exit_int_info_err),
|
||||
exit_int_info, exit_int_info_err, isa),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u32, exit_code )
|
||||
@ -532,6 +627,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject,
|
||||
__field( __u64, exit_info2 )
|
||||
__field( __u32, exit_int_info )
|
||||
__field( __u32, exit_int_info_err )
|
||||
__field( __u32, isa )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@ -540,12 +636,14 @@ TRACE_EVENT(kvm_nested_vmexit_inject,
|
||||
__entry->exit_info2 = exit_info2;
|
||||
__entry->exit_int_info = exit_int_info;
|
||||
__entry->exit_int_info_err = exit_int_info_err;
|
||||
__entry->isa = isa;
|
||||
),
|
||||
|
||||
TP_printk("reason: %s ext_inf1: 0x%016llx "
|
||||
"ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
|
||||
ftrace_print_symbols_seq(p, __entry->exit_code,
|
||||
kvm_x86_ops->exit_reasons_str),
|
||||
(__entry->isa == KVM_ISA_VMX) ?
|
||||
__print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) :
|
||||
__print_symbolic(__entry->exit_code, SVM_EXIT_REASONS),
|
||||
__entry->exit_info1, __entry->exit_info2,
|
||||
__entry->exit_int_info, __entry->exit_int_info_err)
|
||||
);
|
||||
|
@ -71,6 +71,9 @@ module_param(vmm_exclusive, bool, S_IRUGO);
|
||||
static int __read_mostly yield_on_hlt = 1;
|
||||
module_param(yield_on_hlt, bool, S_IRUGO);
|
||||
|
||||
static int __read_mostly fasteoi = 1;
|
||||
module_param(fasteoi, bool, S_IRUGO);
|
||||
|
||||
/*
|
||||
* If nested=1, nested virtualization is supported, i.e., guests may use
|
||||
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
|
||||
@ -1747,6 +1750,21 @@ static u64 guest_read_tsc(void)
|
||||
return host_tsc + tsc_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like guest_read_tsc, but always returns L1's notion of the timestamp
|
||||
* counter, even if a nested guest (L2) is currently running.
|
||||
*/
|
||||
u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 host_tsc, tsc_offset;
|
||||
|
||||
rdtscll(host_tsc);
|
||||
tsc_offset = is_guest_mode(vcpu) ?
|
||||
to_vmx(vcpu)->nested.vmcs01_tsc_offset :
|
||||
vmcs_read64(TSC_OFFSET);
|
||||
return host_tsc + tsc_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
|
||||
* ioctl. In this case the call-back should update internal vmx state to make
|
||||
@ -1762,15 +1780,23 @@ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
|
||||
*/
|
||||
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
{
|
||||
vmcs_write64(TSC_OFFSET, offset);
|
||||
if (is_guest_mode(vcpu))
|
||||
if (is_guest_mode(vcpu)) {
|
||||
/*
|
||||
* We're here if L1 chose not to trap the TSC MSR. Since
|
||||
* prepare_vmcs12() does not copy tsc_offset, we need to also
|
||||
* set the vmcs12 field here.
|
||||
* We're here if L1 chose not to trap WRMSR to TSC. According
|
||||
* to the spec, this should set L1's TSC; The offset that L1
|
||||
* set for L2 remains unchanged, and still needs to be added
|
||||
* to the newly set TSC to get L2's TSC.
|
||||
*/
|
||||
get_vmcs12(vcpu)->tsc_offset = offset -
|
||||
to_vmx(vcpu)->nested.vmcs01_tsc_offset;
|
||||
struct vmcs12 *vmcs12;
|
||||
to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset;
|
||||
/* recalculate vmcs02.TSC_OFFSET: */
|
||||
vmcs12 = get_vmcs12(vcpu);
|
||||
vmcs_write64(TSC_OFFSET, offset +
|
||||
(nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
|
||||
vmcs12->tsc_offset : 0));
|
||||
} else {
|
||||
vmcs_write64(TSC_OFFSET, offset);
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
|
||||
@ -2736,8 +2762,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
|
||||
|
||||
guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
|
||||
if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
|
||||
printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
|
||||
__func__);
|
||||
pr_debug_ratelimited("%s: tss fixup for long mode. \n",
|
||||
__func__);
|
||||
vmcs_write32(GUEST_TR_AR_BYTES,
|
||||
(guest_tr_ar & ~AR_TYPE_MASK)
|
||||
| AR_TYPE_BUSY_64_TSS);
|
||||
@ -4115,8 +4141,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
||||
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
||||
if (is_page_fault(intr_info)) {
|
||||
/* EPT won't cause page fault directly */
|
||||
if (enable_ept)
|
||||
BUG();
|
||||
BUG_ON(enable_ept);
|
||||
cr2 = vmcs_readl(EXIT_QUALIFICATION);
|
||||
trace_kvm_page_fault(cr2, error_code);
|
||||
|
||||
@ -4518,6 +4543,24 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int handle_apic_access(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (likely(fasteoi)) {
|
||||
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
||||
int access_type, offset;
|
||||
|
||||
access_type = exit_qualification & APIC_ACCESS_TYPE;
|
||||
offset = exit_qualification & APIC_ACCESS_OFFSET;
|
||||
/*
|
||||
* Sane guest uses MOV to write EOI, with written value
|
||||
* not cared. So make a short-circuit here by avoiding
|
||||
* heavy instruction emulation.
|
||||
*/
|
||||
if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
|
||||
(offset == APIC_EOI)) {
|
||||
kvm_lapic_set_eoi(vcpu);
|
||||
skip_emulated_instruction(vcpu);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
|
||||
}
|
||||
|
||||
@ -5591,8 +5634,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
|
||||
if (unlikely(vmx->fail)) {
|
||||
printk(KERN_INFO "%s failed vm entry %x\n",
|
||||
__func__, vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||
pr_info_ratelimited("%s failed vm entry %x\n", __func__,
|
||||
vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -5696,8 +5739,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
||||
u32 exit_reason = vmx->exit_reason;
|
||||
u32 vectoring_info = vmx->idt_vectoring_info;
|
||||
|
||||
trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
|
||||
|
||||
/* If guest state is invalid, start emulating */
|
||||
if (vmx->emulation_required && emulate_invalid_guest_state)
|
||||
return handle_invalid_guest_state(vcpu);
|
||||
@ -6101,6 +6142,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
vmx->loaded_vmcs->launched = 1;
|
||||
|
||||
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
|
||||
trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
|
||||
|
||||
vmx_complete_atomic_exit(vmx);
|
||||
vmx_recover_nmi_blocking(vmx);
|
||||
@ -6241,49 +6283,6 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define _ER(x) { EXIT_REASON_##x, #x }
|
||||
|
||||
static const struct trace_print_flags vmx_exit_reasons_str[] = {
|
||||
_ER(EXCEPTION_NMI),
|
||||
_ER(EXTERNAL_INTERRUPT),
|
||||
_ER(TRIPLE_FAULT),
|
||||
_ER(PENDING_INTERRUPT),
|
||||
_ER(NMI_WINDOW),
|
||||
_ER(TASK_SWITCH),
|
||||
_ER(CPUID),
|
||||
_ER(HLT),
|
||||
_ER(INVLPG),
|
||||
_ER(RDPMC),
|
||||
_ER(RDTSC),
|
||||
_ER(VMCALL),
|
||||
_ER(VMCLEAR),
|
||||
_ER(VMLAUNCH),
|
||||
_ER(VMPTRLD),
|
||||
_ER(VMPTRST),
|
||||
_ER(VMREAD),
|
||||
_ER(VMRESUME),
|
||||
_ER(VMWRITE),
|
||||
_ER(VMOFF),
|
||||
_ER(VMON),
|
||||
_ER(CR_ACCESS),
|
||||
_ER(DR_ACCESS),
|
||||
_ER(IO_INSTRUCTION),
|
||||
_ER(MSR_READ),
|
||||
_ER(MSR_WRITE),
|
||||
_ER(MWAIT_INSTRUCTION),
|
||||
_ER(MONITOR_INSTRUCTION),
|
||||
_ER(PAUSE_INSTRUCTION),
|
||||
_ER(MCE_DURING_VMENTRY),
|
||||
_ER(TPR_BELOW_THRESHOLD),
|
||||
_ER(APIC_ACCESS),
|
||||
_ER(EPT_VIOLATION),
|
||||
_ER(EPT_MISCONFIG),
|
||||
_ER(WBINVD),
|
||||
{ -1, NULL }
|
||||
};
|
||||
|
||||
#undef _ER
|
||||
|
||||
static int vmx_get_lpage_level(void)
|
||||
{
|
||||
if (enable_ept && !cpu_has_vmx_ept_1g_page())
|
||||
@ -6514,8 +6513,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
||||
|
||||
set_cr4_guest_host_mask(vmx);
|
||||
|
||||
vmcs_write64(TSC_OFFSET,
|
||||
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
|
||||
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
|
||||
vmcs_write64(TSC_OFFSET,
|
||||
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
|
||||
else
|
||||
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
|
||||
|
||||
if (enable_vpid) {
|
||||
/*
|
||||
@ -6610,9 +6612,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
||||
if (vmcs12->vm_entry_msr_load_count > 0 ||
|
||||
vmcs12->vm_exit_msr_load_count > 0 ||
|
||||
vmcs12->vm_exit_msr_store_count > 0) {
|
||||
if (printk_ratelimit())
|
||||
printk(KERN_WARNING
|
||||
"%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__);
|
||||
pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
|
||||
__func__);
|
||||
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
||||
return 1;
|
||||
}
|
||||
@ -6922,7 +6923,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
|
||||
|
||||
load_vmcs12_host_state(vcpu, vmcs12);
|
||||
|
||||
/* Update TSC_OFFSET if vmx_adjust_tsc_offset() was used while L2 ran */
|
||||
/* Update TSC_OFFSET if TSC was changed while L2 ran */
|
||||
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
|
||||
|
||||
/* This is needed for same reason as it was needed in prepare_vmcs02 */
|
||||
@ -7039,7 +7040,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.get_mt_mask = vmx_get_mt_mask,
|
||||
|
||||
.get_exit_info = vmx_get_exit_info,
|
||||
.exit_reasons_str = vmx_exit_reasons_str,
|
||||
|
||||
.get_lpage_level = vmx_get_lpage_level,
|
||||
|
||||
@ -7055,6 +7055,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.write_tsc_offset = vmx_write_tsc_offset,
|
||||
.adjust_tsc_offset = vmx_adjust_tsc_offset,
|
||||
.compute_tsc_offset = vmx_compute_tsc_offset,
|
||||
.read_l1_tsc = vmx_read_l1_tsc,
|
||||
|
||||
.set_tdp_cr3 = vmx_set_cr3,
|
||||
|
||||
|
@ -83,6 +83,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
|
||||
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
|
||||
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
|
||||
struct kvm_cpuid_entry2 __user *entries);
|
||||
static void process_nmi(struct kvm_vcpu *vcpu);
|
||||
|
||||
struct kvm_x86_ops *kvm_x86_ops;
|
||||
EXPORT_SYMBOL_GPL(kvm_x86_ops);
|
||||
@ -359,8 +360,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
|
||||
|
||||
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
vcpu->arch.nmi_pending = 1;
|
||||
atomic_inc(&vcpu->arch.nmi_queued);
|
||||
kvm_make_request(KVM_REQ_NMI, vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_inject_nmi);
|
||||
|
||||
@ -599,6 +600,8 @@ static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
|
||||
static void update_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u32 timer_mode_mask;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 1, 0);
|
||||
if (!best)
|
||||
@ -610,6 +613,16 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
|
||||
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
|
||||
best->ecx |= bit(X86_FEATURE_OSXSAVE);
|
||||
}
|
||||
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
|
||||
best->function == 0x1) {
|
||||
best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER);
|
||||
timer_mode_mask = 3 << 17;
|
||||
} else
|
||||
timer_mode_mask = 1 << 17;
|
||||
|
||||
if (apic)
|
||||
apic->lapic_timer.timer_mode_mask = timer_mode_mask;
|
||||
}
|
||||
|
||||
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
@ -825,6 +838,7 @@ static u32 msrs_to_save[] = {
|
||||
static unsigned num_msrs_to_save;
|
||||
|
||||
static u32 emulated_msrs[] = {
|
||||
MSR_IA32_TSCDEADLINE,
|
||||
MSR_IA32_MISC_ENABLE,
|
||||
MSR_IA32_MCG_STATUS,
|
||||
MSR_IA32_MCG_CTL,
|
||||
@ -1000,7 +1014,7 @@ static inline int kvm_tsc_changes_freq(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
|
||||
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.virtual_tsc_khz)
|
||||
return vcpu->arch.virtual_tsc_khz;
|
||||
@ -1098,7 +1112,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
|
||||
/* Keep irq disabled to prevent changes to the clock */
|
||||
local_irq_save(flags);
|
||||
kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
|
||||
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
|
||||
kernel_ns = get_kernel_ns();
|
||||
this_tsc_khz = vcpu_tsc_khz(v);
|
||||
if (unlikely(this_tsc_khz == 0)) {
|
||||
@ -1564,6 +1578,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
break;
|
||||
case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
|
||||
return kvm_x2apic_msr_write(vcpu, msr, data);
|
||||
case MSR_IA32_TSCDEADLINE:
|
||||
kvm_set_lapic_tscdeadline_msr(vcpu, data);
|
||||
break;
|
||||
case MSR_IA32_MISC_ENABLE:
|
||||
vcpu->arch.ia32_misc_enable_msr = data;
|
||||
break;
|
||||
@ -1825,6 +1842,9 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
|
||||
case HV_X64_MSR_TPR:
|
||||
return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
|
||||
case HV_X64_MSR_APIC_ASSIST_PAGE:
|
||||
data = vcpu->arch.hv_vapic;
|
||||
break;
|
||||
default:
|
||||
pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
|
||||
return 1;
|
||||
@ -1839,7 +1859,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
|
||||
switch (msr) {
|
||||
case MSR_IA32_PLATFORM_ID:
|
||||
case MSR_IA32_UCODE_REV:
|
||||
case MSR_IA32_EBL_CR_POWERON:
|
||||
case MSR_IA32_DEBUGCTLMSR:
|
||||
case MSR_IA32_LASTBRANCHFROMIP:
|
||||
@ -1860,6 +1879,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
case MSR_FAM10H_MMIO_CONF_BASE:
|
||||
data = 0;
|
||||
break;
|
||||
case MSR_IA32_UCODE_REV:
|
||||
data = 0x100000000ULL;
|
||||
break;
|
||||
case MSR_MTRRcap:
|
||||
data = 0x500 | KVM_NR_VAR_MTRR;
|
||||
break;
|
||||
@ -1888,6 +1910,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
|
||||
return kvm_x2apic_msr_read(vcpu, msr, pdata);
|
||||
break;
|
||||
case MSR_IA32_TSCDEADLINE:
|
||||
data = kvm_get_lapic_tscdeadline_msr(vcpu);
|
||||
break;
|
||||
case MSR_IA32_MISC_ENABLE:
|
||||
data = vcpu->arch.ia32_misc_enable_msr;
|
||||
break;
|
||||
@ -2086,6 +2111,9 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
|
||||
break;
|
||||
case KVM_CAP_NR_VCPUS:
|
||||
r = KVM_SOFT_MAX_VCPUS;
|
||||
break;
|
||||
case KVM_CAP_MAX_VCPUS:
|
||||
r = KVM_MAX_VCPUS;
|
||||
break;
|
||||
case KVM_CAP_NR_MEMSLOTS:
|
||||
@ -2210,7 +2238,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
s64 tsc_delta;
|
||||
u64 tsc;
|
||||
|
||||
kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc);
|
||||
tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
|
||||
tsc - vcpu->arch.last_guest_tsc;
|
||||
|
||||
@ -2234,7 +2262,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_x86_ops->vcpu_put(vcpu);
|
||||
kvm_put_guest_fpu(vcpu);
|
||||
kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
|
||||
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
}
|
||||
|
||||
static int is_efer_nx(void)
|
||||
@ -2819,6 +2847,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
|
||||
static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events)
|
||||
{
|
||||
process_nmi(vcpu);
|
||||
events->exception.injected =
|
||||
vcpu->arch.exception.pending &&
|
||||
!kvm_exception_is_soft(vcpu->arch.exception.nr);
|
||||
@ -2836,7 +2865,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
|
||||
|
||||
events->nmi.injected = vcpu->arch.nmi_injected;
|
||||
events->nmi.pending = vcpu->arch.nmi_pending;
|
||||
events->nmi.pending = vcpu->arch.nmi_pending != 0;
|
||||
events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
|
||||
events->nmi.pad = 0;
|
||||
|
||||
@ -2856,6 +2885,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
| KVM_VCPUEVENT_VALID_SHADOW))
|
||||
return -EINVAL;
|
||||
|
||||
process_nmi(vcpu);
|
||||
vcpu->arch.exception.pending = events->exception.injected;
|
||||
vcpu->arch.exception.nr = events->exception.nr;
|
||||
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
|
||||
@ -3556,7 +3586,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (r) {
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
|
||||
&vpic->dev);
|
||||
&vpic->dev_master);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
|
||||
&vpic->dev_slave);
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
|
||||
&vpic->dev_eclr);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
kfree(vpic);
|
||||
goto create_irqchip_unlock;
|
||||
@ -4045,62 +4079,6 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
|
||||
unsigned long addr,
|
||||
void *val,
|
||||
unsigned int bytes,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
|
||||
gpa_t gpa;
|
||||
int handled, ret;
|
||||
|
||||
if (vcpu->mmio_read_completed) {
|
||||
memcpy(val, vcpu->mmio_data, bytes);
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
|
||||
vcpu->mmio_phys_addr, *(u64 *)val);
|
||||
vcpu->mmio_read_completed = 0;
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, false);
|
||||
|
||||
if (ret < 0)
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
|
||||
if (ret)
|
||||
goto mmio;
|
||||
|
||||
if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception)
|
||||
== X86EMUL_CONTINUE)
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
mmio:
|
||||
/*
|
||||
* Is this MMIO handled locally?
|
||||
*/
|
||||
handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
|
||||
|
||||
if (handled == bytes)
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
gpa += handled;
|
||||
bytes -= handled;
|
||||
val += handled;
|
||||
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
|
||||
|
||||
vcpu->mmio_needed = 1;
|
||||
vcpu->run->exit_reason = KVM_EXIT_MMIO;
|
||||
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
|
||||
vcpu->mmio_size = bytes;
|
||||
vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
|
||||
vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
|
||||
vcpu->mmio_index = 0;
|
||||
|
||||
return X86EMUL_IO_NEEDED;
|
||||
}
|
||||
|
||||
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const void *val, int bytes)
|
||||
{
|
||||
@ -4113,16 +4091,93 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int emulator_write_emulated_onepage(unsigned long addr,
|
||||
const void *val,
|
||||
unsigned int bytes,
|
||||
struct x86_exception *exception,
|
||||
struct kvm_vcpu *vcpu)
|
||||
struct read_write_emulator_ops {
|
||||
int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
|
||||
int bytes);
|
||||
int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
void *val, int bytes);
|
||||
int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
int bytes, void *val);
|
||||
int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
void *val, int bytes);
|
||||
bool write;
|
||||
};
|
||||
|
||||
static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
|
||||
{
|
||||
if (vcpu->mmio_read_completed) {
|
||||
memcpy(val, vcpu->mmio_data, bytes);
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
|
||||
vcpu->mmio_phys_addr, *(u64 *)val);
|
||||
vcpu->mmio_read_completed = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
void *val, int bytes)
|
||||
{
|
||||
return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
|
||||
}
|
||||
|
||||
static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
void *val, int bytes)
|
||||
{
|
||||
return emulator_write_phys(vcpu, gpa, val, bytes);
|
||||
}
|
||||
|
||||
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
|
||||
{
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
|
||||
return vcpu_mmio_write(vcpu, gpa, bytes, val);
|
||||
}
|
||||
|
||||
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
void *val, int bytes)
|
||||
{
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
|
||||
return X86EMUL_IO_NEEDED;
|
||||
}
|
||||
|
||||
static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
void *val, int bytes)
|
||||
{
|
||||
memcpy(vcpu->mmio_data, val, bytes);
|
||||
memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
|
||||
return X86EMUL_CONTINUE;
|
||||
}
|
||||
|
||||
static struct read_write_emulator_ops read_emultor = {
|
||||
.read_write_prepare = read_prepare,
|
||||
.read_write_emulate = read_emulate,
|
||||
.read_write_mmio = vcpu_mmio_read,
|
||||
.read_write_exit_mmio = read_exit_mmio,
|
||||
};
|
||||
|
||||
static struct read_write_emulator_ops write_emultor = {
|
||||
.read_write_emulate = write_emulate,
|
||||
.read_write_mmio = write_mmio,
|
||||
.read_write_exit_mmio = write_exit_mmio,
|
||||
.write = true,
|
||||
};
|
||||
|
||||
static int emulator_read_write_onepage(unsigned long addr, void *val,
|
||||
unsigned int bytes,
|
||||
struct x86_exception *exception,
|
||||
struct kvm_vcpu *vcpu,
|
||||
struct read_write_emulator_ops *ops)
|
||||
{
|
||||
gpa_t gpa;
|
||||
int handled, ret;
|
||||
bool write = ops->write;
|
||||
|
||||
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, true);
|
||||
if (ops->read_write_prepare &&
|
||||
ops->read_write_prepare(vcpu, val, bytes))
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
|
||||
|
||||
if (ret < 0)
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
@ -4131,15 +4186,14 @@ static int emulator_write_emulated_onepage(unsigned long addr,
|
||||
if (ret)
|
||||
goto mmio;
|
||||
|
||||
if (emulator_write_phys(vcpu, gpa, val, bytes))
|
||||
if (ops->read_write_emulate(vcpu, gpa, val, bytes))
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
mmio:
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
|
||||
/*
|
||||
* Is this MMIO handled locally?
|
||||
*/
|
||||
handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
|
||||
handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
|
||||
if (handled == bytes)
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
@ -4148,23 +4202,20 @@ static int emulator_write_emulated_onepage(unsigned long addr,
|
||||
val += handled;
|
||||
|
||||
vcpu->mmio_needed = 1;
|
||||
memcpy(vcpu->mmio_data, val, bytes);
|
||||
vcpu->run->exit_reason = KVM_EXIT_MMIO;
|
||||
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
|
||||
vcpu->mmio_size = bytes;
|
||||
vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
|
||||
vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
|
||||
memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
|
||||
vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
|
||||
vcpu->mmio_index = 0;
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
|
||||
}
|
||||
|
||||
int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
|
||||
unsigned long addr,
|
||||
const void *val,
|
||||
unsigned int bytes,
|
||||
struct x86_exception *exception)
|
||||
int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
|
||||
void *val, unsigned int bytes,
|
||||
struct x86_exception *exception,
|
||||
struct read_write_emulator_ops *ops)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
|
||||
|
||||
@ -4173,16 +4224,38 @@ int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
|
||||
int rc, now;
|
||||
|
||||
now = -addr & ~PAGE_MASK;
|
||||
rc = emulator_write_emulated_onepage(addr, val, now, exception,
|
||||
vcpu);
|
||||
rc = emulator_read_write_onepage(addr, val, now, exception,
|
||||
vcpu, ops);
|
||||
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
addr += now;
|
||||
val += now;
|
||||
bytes -= now;
|
||||
}
|
||||
return emulator_write_emulated_onepage(addr, val, bytes, exception,
|
||||
vcpu);
|
||||
|
||||
return emulator_read_write_onepage(addr, val, bytes, exception,
|
||||
vcpu, ops);
|
||||
}
|
||||
|
||||
static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
|
||||
unsigned long addr,
|
||||
void *val,
|
||||
unsigned int bytes,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
return emulator_read_write(ctxt, addr, val, bytes,
|
||||
exception, &read_emultor);
|
||||
}
|
||||
|
||||
int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
|
||||
unsigned long addr,
|
||||
const void *val,
|
||||
unsigned int bytes,
|
||||
struct x86_exception *exception)
|
||||
{
|
||||
return emulator_read_write(ctxt, addr, (void *)val, bytes,
|
||||
exception, &write_emultor);
|
||||
}
|
||||
|
||||
#define CMPXCHG_TYPE(t, ptr, old, new) \
|
||||
@ -4712,7 +4785,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
|
||||
kvm_set_rflags(vcpu, ctxt->eflags);
|
||||
|
||||
if (irq == NMI_VECTOR)
|
||||
vcpu->arch.nmi_pending = false;
|
||||
vcpu->arch.nmi_pending = 0;
|
||||
else
|
||||
vcpu->arch.interrupt.pending = false;
|
||||
|
||||
@ -4788,7 +4861,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
|
||||
trace_kvm_emulate_insn_start(vcpu);
|
||||
++vcpu->stat.insn_emulation;
|
||||
if (r) {
|
||||
if (r != EMULATION_OK) {
|
||||
if (emulation_type & EMULTYPE_TRAP_UD)
|
||||
return EMULATE_FAIL;
|
||||
if (reexecute_instruction(vcpu, cr2))
|
||||
@ -5521,7 +5594,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
|
||||
/* try to inject new event if pending */
|
||||
if (vcpu->arch.nmi_pending) {
|
||||
if (kvm_x86_ops->nmi_allowed(vcpu)) {
|
||||
vcpu->arch.nmi_pending = false;
|
||||
--vcpu->arch.nmi_pending;
|
||||
vcpu->arch.nmi_injected = true;
|
||||
kvm_x86_ops->set_nmi(vcpu);
|
||||
}
|
||||
@ -5553,10 +5626,26 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
static void process_nmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned limit = 2;
|
||||
|
||||
/*
|
||||
* x86 is limited to one NMI running, and one NMI pending after it.
|
||||
* If an NMI is already in progress, limit further NMIs to just one.
|
||||
* Otherwise, allow two (and we'll inject the first one immediately).
|
||||
*/
|
||||
if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
|
||||
limit = 1;
|
||||
|
||||
vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
|
||||
vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
}
|
||||
|
||||
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
bool nmi_pending;
|
||||
bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
|
||||
vcpu->run->request_interrupt_window;
|
||||
|
||||
@ -5596,6 +5685,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
|
||||
record_steal_time(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_NMI, vcpu))
|
||||
process_nmi(vcpu);
|
||||
|
||||
}
|
||||
|
||||
@ -5603,19 +5694,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
if (unlikely(r))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* An NMI can be injected between local nmi_pending read and
|
||||
* vcpu->arch.nmi_pending read inside inject_pending_event().
|
||||
* But in that case, KVM_REQ_EVENT will be set, which makes
|
||||
* the race described above benign.
|
||||
*/
|
||||
nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
|
||||
inject_pending_event(vcpu);
|
||||
|
||||
/* enable NMI/IRQ window open exits if needed */
|
||||
if (nmi_pending)
|
||||
if (vcpu->arch.nmi_pending)
|
||||
kvm_x86_ops->enable_nmi_window(vcpu);
|
||||
else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
|
||||
kvm_x86_ops->enable_irq_window(vcpu);
|
||||
@ -5678,7 +5761,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_restore();
|
||||
|
||||
kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
|
||||
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
smp_wmb();
|
||||
@ -6323,7 +6406,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
||||
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.nmi_pending = false;
|
||||
atomic_set(&vcpu->arch.nmi_queued, 0);
|
||||
vcpu->arch.nmi_pending = 0;
|
||||
vcpu->arch.nmi_injected = false;
|
||||
|
||||
vcpu->arch.switch_db_regs = 0;
|
||||
@ -6598,7 +6682,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
!vcpu->arch.apf.halted)
|
||||
|| !list_empty_careful(&vcpu->async_pf.done)
|
||||
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|
||||
|| vcpu->arch.nmi_pending ||
|
||||
|| atomic_read(&vcpu->arch.nmi_queued) ||
|
||||
(kvm_arch_interrupt_allowed(vcpu) &&
|
||||
kvm_cpu_has_interrupt(vcpu));
|
||||
}
|
||||
|
@ -371,6 +371,7 @@ struct kvm_s390_psw {
|
||||
#define KVM_S390_INT_VIRTIO 0xffff2603u
|
||||
#define KVM_S390_INT_SERVICE 0xffff2401u
|
||||
#define KVM_S390_INT_EMERGENCY 0xffff1201u
|
||||
#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
|
||||
|
||||
struct kvm_s390_interrupt {
|
||||
__u32 type;
|
||||
@ -463,7 +464,7 @@ struct kvm_ppc_pvinfo {
|
||||
#define KVM_CAP_VAPIC 6
|
||||
#define KVM_CAP_EXT_CPUID 7
|
||||
#define KVM_CAP_CLOCKSOURCE 8
|
||||
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
|
||||
#define KVM_CAP_NR_VCPUS 9 /* returns recommended max vcpus per vm */
|
||||
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
|
||||
#define KVM_CAP_PIT 11
|
||||
#define KVM_CAP_NOP_IO_DELAY 12
|
||||
@ -553,6 +554,9 @@ struct kvm_ppc_pvinfo {
|
||||
#define KVM_CAP_SPAPR_TCE 63
|
||||
#define KVM_CAP_PPC_SMT 64
|
||||
#define KVM_CAP_PPC_RMA 65
|
||||
#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */
|
||||
#define KVM_CAP_PPC_HIOR 67
|
||||
#define KVM_CAP_PPC_PAPR 68
|
||||
#define KVM_CAP_S390_GMAP 71
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/msi.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <asm/signal.h>
|
||||
|
||||
#include <linux/kvm.h>
|
||||
@ -48,6 +49,7 @@
|
||||
#define KVM_REQ_EVENT 11
|
||||
#define KVM_REQ_APF_HALT 12
|
||||
#define KVM_REQ_STEAL_UPDATE 13
|
||||
#define KVM_REQ_NMI 14
|
||||
|
||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||
|
||||
@ -55,16 +57,16 @@ struct kvm;
|
||||
struct kvm_vcpu;
|
||||
extern struct kmem_cache *kvm_vcpu_cache;
|
||||
|
||||
/*
|
||||
* It would be nice to use something smarter than a linear search, TBD...
|
||||
* Thankfully we dont expect many devices to register (famous last words :),
|
||||
* so until then it will suffice. At least its abstracted so we can change
|
||||
* in one place.
|
||||
*/
|
||||
struct kvm_io_range {
|
||||
gpa_t addr;
|
||||
int len;
|
||||
struct kvm_io_device *dev;
|
||||
};
|
||||
|
||||
struct kvm_io_bus {
|
||||
int dev_count;
|
||||
#define NR_IOBUS_DEVS 200
|
||||
struct kvm_io_device *devs[NR_IOBUS_DEVS];
|
||||
#define NR_IOBUS_DEVS 300
|
||||
struct kvm_io_range range[NR_IOBUS_DEVS];
|
||||
};
|
||||
|
||||
enum kvm_bus {
|
||||
@ -77,8 +79,8 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, const void *val);
|
||||
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
|
||||
void *val);
|
||||
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
||||
struct kvm_io_device *dev);
|
||||
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, struct kvm_io_device *dev);
|
||||
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
||||
struct kvm_io_device *dev);
|
||||
|
||||
@ -256,8 +258,9 @@ struct kvm {
|
||||
struct kvm_arch arch;
|
||||
atomic_t users_count;
|
||||
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
|
||||
struct kvm_coalesced_mmio_dev *coalesced_mmio_dev;
|
||||
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
|
||||
spinlock_t ring_lock;
|
||||
struct list_head coalesced_zones;
|
||||
#endif
|
||||
|
||||
struct mutex irq_lock;
|
||||
@ -281,11 +284,8 @@ struct kvm {
|
||||
|
||||
/* The guest did something we don't support. */
|
||||
#define pr_unimpl(vcpu, fmt, ...) \
|
||||
do { \
|
||||
if (printk_ratelimit()) \
|
||||
printk(KERN_ERR "kvm: %i: cpu%i " fmt, \
|
||||
current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
|
||||
} while (0)
|
||||
pr_err_ratelimited("kvm: %i: cpu%i " fmt, \
|
||||
current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__)
|
||||
|
||||
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
|
||||
#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
|
||||
|
@ -58,8 +58,6 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
|
||||
static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
u32 vector;
|
||||
int index;
|
||||
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) {
|
||||
spin_lock(&assigned_dev->intx_lock);
|
||||
@ -68,31 +66,35 @@ static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id)
|
||||
spin_unlock(&assigned_dev->intx_lock);
|
||||
}
|
||||
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
|
||||
index = find_index_from_host_irq(assigned_dev, irq);
|
||||
if (index >= 0) {
|
||||
vector = assigned_dev->
|
||||
guest_msix_entries[index].vector;
|
||||
kvm_set_irq(assigned_dev->kvm,
|
||||
assigned_dev->irq_source_id, vector, 1);
|
||||
}
|
||||
} else
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
assigned_dev->guest_irq, 1);
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
assigned_dev->guest_irq, 1);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
|
||||
int index = find_index_from_host_irq(assigned_dev, irq);
|
||||
u32 vector;
|
||||
|
||||
if (index >= 0) {
|
||||
vector = assigned_dev->guest_msix_entries[index].vector;
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
vector, 1);
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Ack the irq line for an assigned device */
|
||||
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *dev;
|
||||
|
||||
if (kian->gsi == -1)
|
||||
return;
|
||||
|
||||
dev = container_of(kian, struct kvm_assigned_dev_kernel,
|
||||
ack_notifier);
|
||||
struct kvm_assigned_dev_kernel *dev =
|
||||
container_of(kian, struct kvm_assigned_dev_kernel,
|
||||
ack_notifier);
|
||||
|
||||
kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
|
||||
|
||||
@ -110,8 +112,9 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
|
||||
static void deassign_guest_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *assigned_dev)
|
||||
{
|
||||
kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
|
||||
assigned_dev->ack_notifier.gsi = -1;
|
||||
if (assigned_dev->ack_notifier.gsi != -1)
|
||||
kvm_unregister_irq_ack_notifier(kvm,
|
||||
&assigned_dev->ack_notifier);
|
||||
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
assigned_dev->guest_irq, 0);
|
||||
@ -143,7 +146,7 @@ static void deassign_host_irq(struct kvm *kvm,
|
||||
|
||||
for (i = 0; i < assigned_dev->entries_nr; i++)
|
||||
free_irq(assigned_dev->host_msix_entries[i].vector,
|
||||
(void *)assigned_dev);
|
||||
assigned_dev);
|
||||
|
||||
assigned_dev->entries_nr = 0;
|
||||
kfree(assigned_dev->host_msix_entries);
|
||||
@ -153,7 +156,7 @@ static void deassign_host_irq(struct kvm *kvm,
|
||||
/* Deal with MSI and INTx */
|
||||
disable_irq(assigned_dev->host_irq);
|
||||
|
||||
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
|
||||
free_irq(assigned_dev->host_irq, assigned_dev);
|
||||
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
|
||||
pci_disable_msi(assigned_dev->dev);
|
||||
@ -239,7 +242,7 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
|
||||
* are going to be long delays in accepting, acking, etc.
|
||||
*/
|
||||
if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
|
||||
IRQF_ONESHOT, dev->irq_name, (void *)dev))
|
||||
IRQF_ONESHOT, dev->irq_name, dev))
|
||||
return -EIO;
|
||||
return 0;
|
||||
}
|
||||
@ -258,7 +261,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
|
||||
|
||||
dev->host_irq = dev->dev->irq;
|
||||
if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
|
||||
0, dev->irq_name, (void *)dev)) {
|
||||
0, dev->irq_name, dev)) {
|
||||
pci_disable_msi(dev->dev);
|
||||
return -EIO;
|
||||
}
|
||||
@ -284,8 +287,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
|
||||
|
||||
for (i = 0; i < dev->entries_nr; i++) {
|
||||
r = request_threaded_irq(dev->host_msix_entries[i].vector,
|
||||
NULL, kvm_assigned_dev_thread,
|
||||
0, dev->irq_name, (void *)dev);
|
||||
NULL, kvm_assigned_dev_thread_msix,
|
||||
0, dev->irq_name, dev);
|
||||
if (r)
|
||||
goto err;
|
||||
}
|
||||
@ -293,7 +296,7 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
|
||||
return 0;
|
||||
err:
|
||||
for (i -= 1; i >= 0; i--)
|
||||
free_irq(dev->host_msix_entries[i].vector, (void *)dev);
|
||||
free_irq(dev->host_msix_entries[i].vector, dev);
|
||||
pci_disable_msix(dev->dev);
|
||||
return r;
|
||||
}
|
||||
@ -406,7 +409,8 @@ static int assign_guest_irq(struct kvm *kvm,
|
||||
|
||||
if (!r) {
|
||||
dev->irq_requested_type |= guest_irq_type;
|
||||
kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
|
||||
if (dev->ack_notifier.gsi != -1)
|
||||
kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
|
||||
} else
|
||||
kvm_free_irq_source_id(kvm, dev->irq_source_id);
|
||||
|
||||
|
@ -24,10 +24,19 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
|
||||
static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
|
||||
gpa_t addr, int len)
|
||||
{
|
||||
struct kvm_coalesced_mmio_zone *zone;
|
||||
/* is it in a batchable area ?
|
||||
* (addr,len) is fully included in
|
||||
* (zone->addr, zone->size)
|
||||
*/
|
||||
|
||||
return (dev->zone.addr <= addr &&
|
||||
addr + len <= dev->zone.addr + dev->zone.size);
|
||||
}
|
||||
|
||||
static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
|
||||
{
|
||||
struct kvm_coalesced_mmio_ring *ring;
|
||||
unsigned avail;
|
||||
int i;
|
||||
|
||||
/* Are we able to batch it ? */
|
||||
|
||||
@ -37,25 +46,12 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
|
||||
*/
|
||||
ring = dev->kvm->coalesced_mmio_ring;
|
||||
avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX;
|
||||
if (avail < KVM_MAX_VCPUS) {
|
||||
if (avail == 0) {
|
||||
/* full */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* is it in a batchable area ? */
|
||||
|
||||
for (i = 0; i < dev->nb_zones; i++) {
|
||||
zone = &dev->zone[i];
|
||||
|
||||
/* (addr,len) is fully included in
|
||||
* (zone->addr, zone->size)
|
||||
*/
|
||||
|
||||
if (zone->addr <= addr &&
|
||||
addr + len <= zone->addr + zone->size)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int coalesced_mmio_write(struct kvm_io_device *this,
|
||||
@ -63,10 +59,16 @@ static int coalesced_mmio_write(struct kvm_io_device *this,
|
||||
{
|
||||
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
|
||||
struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
|
||||
|
||||
if (!coalesced_mmio_in_range(dev, addr, len))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
spin_lock(&dev->lock);
|
||||
spin_lock(&dev->kvm->ring_lock);
|
||||
|
||||
if (!coalesced_mmio_has_room(dev)) {
|
||||
spin_unlock(&dev->kvm->ring_lock);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* copy data in first free entry of the ring */
|
||||
|
||||
@ -75,7 +77,7 @@ static int coalesced_mmio_write(struct kvm_io_device *this,
|
||||
memcpy(ring->coalesced_mmio[ring->last].data, val, len);
|
||||
smp_wmb();
|
||||
ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
|
||||
spin_unlock(&dev->lock);
|
||||
spin_unlock(&dev->kvm->ring_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -83,6 +85,8 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
|
||||
{
|
||||
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
|
||||
|
||||
list_del(&dev->list);
|
||||
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
@ -93,7 +97,6 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = {
|
||||
|
||||
int kvm_coalesced_mmio_init(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_coalesced_mmio_dev *dev;
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
||||
@ -101,31 +104,18 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
|
||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!page)
|
||||
goto out_err;
|
||||
|
||||
ret = 0;
|
||||
kvm->coalesced_mmio_ring = page_address(page);
|
||||
|
||||
ret = -ENOMEM;
|
||||
dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
|
||||
if (!dev)
|
||||
goto out_free_page;
|
||||
spin_lock_init(&dev->lock);
|
||||
kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
|
||||
dev->kvm = kvm;
|
||||
kvm->coalesced_mmio_dev = dev;
|
||||
/*
|
||||
* We're using this spinlock to sync access to the coalesced ring.
|
||||
* The list doesn't need it's own lock since device registration and
|
||||
* unregistration should only happen when kvm->slots_lock is held.
|
||||
*/
|
||||
spin_lock_init(&kvm->ring_lock);
|
||||
INIT_LIST_HEAD(&kvm->coalesced_zones);
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
if (ret < 0)
|
||||
goto out_free_dev;
|
||||
|
||||
return ret;
|
||||
|
||||
out_free_dev:
|
||||
kvm->coalesced_mmio_dev = NULL;
|
||||
kfree(dev);
|
||||
out_free_page:
|
||||
kvm->coalesced_mmio_ring = NULL;
|
||||
__free_page(page);
|
||||
out_err:
|
||||
return ret;
|
||||
}
|
||||
@ -139,51 +129,50 @@ void kvm_coalesced_mmio_free(struct kvm *kvm)
|
||||
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
|
||||
struct kvm_coalesced_mmio_zone *zone)
|
||||
{
|
||||
struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
|
||||
int ret;
|
||||
struct kvm_coalesced_mmio_dev *dev;
|
||||
|
||||
dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
|
||||
dev->kvm = kvm;
|
||||
dev->zone = *zone;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
|
||||
zone->size, &dev->dev);
|
||||
if (ret < 0)
|
||||
goto out_free_dev;
|
||||
list_add_tail(&dev->list, &kvm->coalesced_zones);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return ret;
|
||||
|
||||
out_free_dev:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
kfree(dev);
|
||||
|
||||
if (dev == NULL)
|
||||
return -ENXIO;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
|
||||
dev->zone[dev->nb_zones] = *zone;
|
||||
dev->nb_zones++;
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
|
||||
struct kvm_coalesced_mmio_zone *zone)
|
||||
{
|
||||
int i;
|
||||
struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
|
||||
struct kvm_coalesced_mmio_zone *z;
|
||||
|
||||
if (dev == NULL)
|
||||
return -ENXIO;
|
||||
struct kvm_coalesced_mmio_dev *dev, *tmp;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
i = dev->nb_zones;
|
||||
while (i) {
|
||||
z = &dev->zone[i - 1];
|
||||
|
||||
/* unregister all zones
|
||||
* included in (zone->addr, zone->size)
|
||||
*/
|
||||
|
||||
if (zone->addr <= z->addr &&
|
||||
z->addr + z->size <= zone->addr + zone->size) {
|
||||
dev->nb_zones--;
|
||||
*z = dev->zone[dev->nb_zones];
|
||||
list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list)
|
||||
if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
|
||||
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev);
|
||||
kvm_iodevice_destructor(&dev->dev);
|
||||
}
|
||||
i--;
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
|
@ -12,14 +12,13 @@
|
||||
|
||||
#ifdef CONFIG_KVM_MMIO
|
||||
|
||||
#define KVM_COALESCED_MMIO_ZONE_MAX 100
|
||||
#include <linux/list.h>
|
||||
|
||||
struct kvm_coalesced_mmio_dev {
|
||||
struct list_head list;
|
||||
struct kvm_io_device dev;
|
||||
struct kvm *kvm;
|
||||
spinlock_t lock;
|
||||
int nb_zones;
|
||||
struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
|
||||
struct kvm_coalesced_mmio_zone zone;
|
||||
};
|
||||
|
||||
int kvm_coalesced_mmio_init(struct kvm *kvm);
|
||||
|
@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
|
||||
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
|
||||
|
||||
ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
|
||||
ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
|
||||
&p->dev);
|
||||
if (ret < 0)
|
||||
goto unlock_fail;
|
||||
|
||||
|
@ -394,7 +394,8 @@ int kvm_ioapic_init(struct kvm *kvm)
|
||||
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
|
||||
ioapic->kvm = kvm;
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
|
||||
IOAPIC_MEM_LENGTH, &ioapic->dev);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
if (ret < 0) {
|
||||
kvm->arch.vioapic = NULL;
|
||||
|
@ -47,6 +47,8 @@
|
||||
#include <linux/srcu.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/bsearch.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/io.h>
|
||||
@ -2391,24 +2393,92 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bus->dev_count; i++) {
|
||||
struct kvm_io_device *pos = bus->devs[i];
|
||||
struct kvm_io_device *pos = bus->range[i].dev;
|
||||
|
||||
kvm_iodevice_destructor(pos);
|
||||
}
|
||||
kfree(bus);
|
||||
}
|
||||
|
||||
int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
|
||||
{
|
||||
const struct kvm_io_range *r1 = p1;
|
||||
const struct kvm_io_range *r2 = p2;
|
||||
|
||||
if (r1->addr < r2->addr)
|
||||
return -1;
|
||||
if (r1->addr + r1->len > r2->addr + r2->len)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
|
||||
gpa_t addr, int len)
|
||||
{
|
||||
if (bus->dev_count == NR_IOBUS_DEVS)
|
||||
return -ENOSPC;
|
||||
|
||||
bus->range[bus->dev_count++] = (struct kvm_io_range) {
|
||||
.addr = addr,
|
||||
.len = len,
|
||||
.dev = dev,
|
||||
};
|
||||
|
||||
sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
|
||||
kvm_io_bus_sort_cmp, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
|
||||
gpa_t addr, int len)
|
||||
{
|
||||
struct kvm_io_range *range, key;
|
||||
int off;
|
||||
|
||||
key = (struct kvm_io_range) {
|
||||
.addr = addr,
|
||||
.len = len,
|
||||
};
|
||||
|
||||
range = bsearch(&key, bus->range, bus->dev_count,
|
||||
sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp);
|
||||
if (range == NULL)
|
||||
return -ENOENT;
|
||||
|
||||
off = range - bus->range;
|
||||
|
||||
while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0)
|
||||
off--;
|
||||
|
||||
return off;
|
||||
}
|
||||
|
||||
/* kvm_io_bus_write - called under kvm->slots_lock */
|
||||
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, const void *val)
|
||||
{
|
||||
int i;
|
||||
int idx;
|
||||
struct kvm_io_bus *bus;
|
||||
struct kvm_io_range range;
|
||||
|
||||
range = (struct kvm_io_range) {
|
||||
.addr = addr,
|
||||
.len = len,
|
||||
};
|
||||
|
||||
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
|
||||
for (i = 0; i < bus->dev_count; i++)
|
||||
if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
|
||||
idx = kvm_io_bus_get_first_dev(bus, addr, len);
|
||||
if (idx < 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
while (idx < bus->dev_count &&
|
||||
kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
|
||||
if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val))
|
||||
return 0;
|
||||
idx++;
|
||||
}
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
@ -2416,19 +2486,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, void *val)
|
||||
{
|
||||
int i;
|
||||
int idx;
|
||||
struct kvm_io_bus *bus;
|
||||
struct kvm_io_range range;
|
||||
|
||||
range = (struct kvm_io_range) {
|
||||
.addr = addr,
|
||||
.len = len,
|
||||
};
|
||||
|
||||
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
|
||||
for (i = 0; i < bus->dev_count; i++)
|
||||
if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
|
||||
idx = kvm_io_bus_get_first_dev(bus, addr, len);
|
||||
if (idx < 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
while (idx < bus->dev_count &&
|
||||
kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
|
||||
if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val))
|
||||
return 0;
|
||||
idx++;
|
||||
}
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Caller must hold slots_lock. */
|
||||
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
||||
struct kvm_io_device *dev)
|
||||
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
|
||||
int len, struct kvm_io_device *dev)
|
||||
{
|
||||
struct kvm_io_bus *new_bus, *bus;
|
||||
|
||||
@ -2440,7 +2524,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
||||
if (!new_bus)
|
||||
return -ENOMEM;
|
||||
memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
|
||||
new_bus->devs[new_bus->dev_count++] = dev;
|
||||
kvm_io_bus_insert_dev(new_bus, dev, addr, len);
|
||||
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
|
||||
synchronize_srcu_expedited(&kvm->srcu);
|
||||
kfree(bus);
|
||||
@ -2464,9 +2548,13 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
|
||||
|
||||
r = -ENOENT;
|
||||
for (i = 0; i < new_bus->dev_count; i++)
|
||||
if (new_bus->devs[i] == dev) {
|
||||
if (new_bus->range[i].dev == dev) {
|
||||
r = 0;
|
||||
new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
|
||||
new_bus->dev_count--;
|
||||
new_bus->range[i] = new_bus->range[new_bus->dev_count];
|
||||
sort(new_bus->range, new_bus->dev_count,
|
||||
sizeof(struct kvm_io_range),
|
||||
kvm_io_bus_sort_cmp, NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user