mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-07 14:32:23 +00:00
Merge branch 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (84 commits) KVM: VMX: Fix comparison of guest efer with stale host value KVM: s390: Fix prefix register checking in arch/s390/kvm/sigp.c KVM: Drop user return notifier when disabling virtualization on a cpu KVM: VMX: Disable unrestricted guest when EPT disabled KVM: x86 emulator: limit instructions to 15 bytes KVM: s390: Make psw available on all exits, not just a subset KVM: x86: Add KVM_GET/SET_VCPU_EVENTS KVM: VMX: Report unexpected simultaneous exceptions as internal errors KVM: Allow internal errors reported to userspace to carry extra data KVM: Reorder IOCTLs in main kvm.h KVM: x86: Polish exception injection via KVM_SET_GUEST_DEBUG KVM: only clear irq_source_id if irqchip is present KVM: x86: disallow KVM_{SET,GET}_LAPIC without allocated in-kernel lapic KVM: x86: disallow multiple KVM_CREATE_IRQCHIP KVM: VMX: Remove vmx->msr_offset_efer KVM: MMU: update invlpg handler comment KVM: VMX: move CR3/PDPTR update to vmx_set_cr3 KVM: remove duplicated task_switch check KVM: powerpc: Fix BUILD_BUG_ON condition KVM: VMX: Use shared msr infrastructure ... Trivial conflicts due to new Kconfig options in arch/Kconfig and kernel/Makefile
This commit is contained in:
commit
ed9216c171
@ -593,6 +593,115 @@ struct kvm_irqchip {
|
||||
} chip;
|
||||
};
|
||||
|
||||
4.27 KVM_XEN_HVM_CONFIG
|
||||
|
||||
Capability: KVM_CAP_XEN_HVM
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_xen_hvm_config (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Sets the MSR that the Xen HVM guest uses to initialize its hypercall
|
||||
page, and provides the starting address and size of the hypercall
|
||||
blobs in userspace. When the guest writes the MSR, kvm copies one
|
||||
page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
|
||||
memory.
|
||||
|
||||
struct kvm_xen_hvm_config {
|
||||
__u32 flags;
|
||||
__u32 msr;
|
||||
__u64 blob_addr_32;
|
||||
__u64 blob_addr_64;
|
||||
__u8 blob_size_32;
|
||||
__u8 blob_size_64;
|
||||
__u8 pad2[30];
|
||||
};
|
||||
|
||||
4.27 KVM_GET_CLOCK
|
||||
|
||||
Capability: KVM_CAP_ADJUST_CLOCK
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_clock_data (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Gets the current timestamp of kvmclock as seen by the current guest. In
|
||||
conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
|
||||
such as migration.
|
||||
|
||||
struct kvm_clock_data {
|
||||
__u64 clock; /* kvmclock current value */
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
};
|
||||
|
||||
4.28 KVM_SET_CLOCK
|
||||
|
||||
Capability: KVM_CAP_ADJUST_CLOCK
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_clock_data (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Sets the current timestamp of kvmclock to the valued specific in its parameter.
|
||||
In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
|
||||
such as migration.
|
||||
|
||||
struct kvm_clock_data {
|
||||
__u64 clock; /* kvmclock current value */
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
};
|
||||
|
||||
4.29 KVM_GET_VCPU_EVENTS
|
||||
|
||||
Capability: KVM_CAP_VCPU_EVENTS
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_vcpu_event (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Gets currently pending exceptions, interrupts, and NMIs as well as related
|
||||
states of the vcpu.
|
||||
|
||||
struct kvm_vcpu_events {
|
||||
struct {
|
||||
__u8 injected;
|
||||
__u8 nr;
|
||||
__u8 has_error_code;
|
||||
__u8 pad;
|
||||
__u32 error_code;
|
||||
} exception;
|
||||
struct {
|
||||
__u8 injected;
|
||||
__u8 nr;
|
||||
__u8 soft;
|
||||
__u8 pad;
|
||||
} interrupt;
|
||||
struct {
|
||||
__u8 injected;
|
||||
__u8 pending;
|
||||
__u8 masked;
|
||||
__u8 pad;
|
||||
} nmi;
|
||||
__u32 sipi_vector;
|
||||
__u32 flags; /* must be zero */
|
||||
};
|
||||
|
||||
4.30 KVM_SET_VCPU_EVENTS
|
||||
|
||||
Capability: KVM_CAP_VCPU_EVENTS
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_vcpu_event (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Set pending exceptions, interrupts, and NMIs as well as related states of the
|
||||
vcpu.
|
||||
|
||||
See KVM_GET_VCPU_EVENTS for the data structure.
|
||||
|
||||
|
||||
5. The kvm_run structure
|
||||
|
||||
Application code obtains a pointer to the kvm_run structure by
|
||||
|
@ -83,6 +83,13 @@ config KRETPROBES
|
||||
def_bool y
|
||||
depends on KPROBES && HAVE_KRETPROBES
|
||||
|
||||
config USER_RETURN_NOTIFIER
|
||||
bool
|
||||
depends on HAVE_USER_RETURN_NOTIFIER
|
||||
help
|
||||
Provide a kernel-internal notification when a cpu is about to
|
||||
switch to user mode.
|
||||
|
||||
config HAVE_IOREMAP_PROT
|
||||
bool
|
||||
|
||||
@ -132,5 +139,7 @@ config HAVE_HW_BREAKPOINT
|
||||
select ANON_INODES
|
||||
select PERF_EVENTS
|
||||
|
||||
config HAVE_USER_RETURN_NOTIFIER
|
||||
bool
|
||||
|
||||
source "kernel/gcov/Kconfig"
|
||||
|
@ -60,6 +60,7 @@ struct kvm_ioapic_state {
|
||||
#define KVM_IRQCHIP_PIC_MASTER 0
|
||||
#define KVM_IRQCHIP_PIC_SLAVE 1
|
||||
#define KVM_IRQCHIP_IOAPIC 2
|
||||
#define KVM_NR_IRQCHIPS 3
|
||||
|
||||
#define KVM_CONTEXT_SIZE 8*1024
|
||||
|
||||
|
@ -475,7 +475,6 @@ struct kvm_arch {
|
||||
struct list_head assigned_dev_head;
|
||||
struct iommu_domain *iommu_domain;
|
||||
int iommu_flags;
|
||||
struct hlist_head irq_ack_notifier_list;
|
||||
|
||||
unsigned long irq_sources_bitmap;
|
||||
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
|
||||
|
@ -49,7 +49,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
|
||||
EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
|
||||
|
||||
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
|
||||
coalesced_mmio.o irq_comm.o)
|
||||
coalesced_mmio.o irq_comm.o assigned-dev.o)
|
||||
|
||||
ifeq ($(CONFIG_IOMMU_API),y)
|
||||
common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
|
||||
|
@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
|
||||
|
||||
static DEFINE_SPINLOCK(vp_lock);
|
||||
|
||||
void kvm_arch_hardware_enable(void *garbage)
|
||||
int kvm_arch_hardware_enable(void *garbage)
|
||||
{
|
||||
long status;
|
||||
long tmp_base;
|
||||
@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
|
||||
slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
|
||||
local_irq_restore(saved_psr);
|
||||
if (slot < 0)
|
||||
return;
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&vp_lock);
|
||||
status = ia64_pal_vp_init_env(kvm_vsa_base ?
|
||||
@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
|
||||
__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
|
||||
if (status != 0) {
|
||||
printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
|
||||
return ;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!kvm_vsa_base) {
|
||||
@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
|
||||
}
|
||||
spin_unlock(&vp_lock);
|
||||
ia64_ptr_entry(0x3, slot);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void *garbage)
|
||||
@ -851,8 +853,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
|
||||
r = 0;
|
||||
switch (chip->chip_id) {
|
||||
case KVM_IRQCHIP_IOAPIC:
|
||||
memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
|
||||
sizeof(struct kvm_ioapic_state));
|
||||
r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
@ -868,9 +869,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
|
||||
r = 0;
|
||||
switch (chip->chip_id) {
|
||||
case KVM_IRQCHIP_IOAPIC:
|
||||
memcpy(ioapic_irqchip(kvm),
|
||||
&chip->chip.ioapic,
|
||||
sizeof(struct kvm_ioapic_state));
|
||||
r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
@ -944,7 +943,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
{
|
||||
struct kvm *kvm = filp->private_data;
|
||||
void __user *argp = (void __user *)arg;
|
||||
int r = -EINVAL;
|
||||
int r = -ENOTTY;
|
||||
|
||||
switch (ioctl) {
|
||||
case KVM_SET_MEMORY_REGION: {
|
||||
@ -985,10 +984,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
goto out;
|
||||
if (irqchip_in_kernel(kvm)) {
|
||||
__s32 status;
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
|
||||
irq_event.irq, irq_event.level);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
if (ioctl == KVM_IRQ_LINE_STATUS) {
|
||||
irq_event.status = status;
|
||||
if (copy_to_user(argp, &irq_event,
|
||||
|
@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_enable(void *garbage)
|
||||
int kvm_arch_hardware_enable(void *garbage)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void *garbage)
|
||||
@ -421,7 +422,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
|
||||
switch (ioctl) {
|
||||
default:
|
||||
r = -EINVAL;
|
||||
r = -ENOTTY;
|
||||
}
|
||||
|
||||
return r;
|
||||
|
@ -51,7 +51,7 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
|
||||
|
||||
/* The BUILD_BUG_ON below breaks in funny ways, commented out
|
||||
* for now ... -BenH
|
||||
BUILD_BUG_ON(__builtin_constant_p(type));
|
||||
BUILD_BUG_ON(!__builtin_constant_p(type));
|
||||
*/
|
||||
switch (type) {
|
||||
case EXT_INTR_EXITS:
|
||||
|
@ -1,6 +1,5 @@
|
||||
#ifndef __LINUX_KVM_S390_H
|
||||
#define __LINUX_KVM_S390_H
|
||||
|
||||
/*
|
||||
* asm-s390/kvm.h - KVM s390 specific structures and definitions
|
||||
*
|
||||
@ -15,6 +14,8 @@
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
|
||||
#define __KVM_S390
|
||||
|
||||
/* for KVM_GET_REGS and KVM_SET_REGS */
|
||||
struct kvm_regs {
|
||||
/* general purpose regs for s390 */
|
||||
|
@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
static unsigned long long *facilities;
|
||||
|
||||
/* Section: not file related */
|
||||
void kvm_arch_hardware_enable(void *garbage)
|
||||
int kvm_arch_hardware_enable(void *garbage)
|
||||
{
|
||||
/* every s390 is virtualization enabled ;-) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void *garbage)
|
||||
@ -116,10 +117,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
|
||||
int kvm_dev_ioctl_check_extension(long ext)
|
||||
{
|
||||
int r;
|
||||
|
||||
switch (ext) {
|
||||
case KVM_CAP_S390_PSW:
|
||||
r = 1;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
r = 0;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Section: vm related */
|
||||
@ -150,7 +157,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -EINVAL;
|
||||
r = -ENOTTY;
|
||||
}
|
||||
|
||||
return r;
|
||||
@ -419,8 +426,10 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
|
||||
vcpu_load(vcpu);
|
||||
if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
|
||||
rc = -EBUSY;
|
||||
else
|
||||
vcpu->arch.sie_block->gpsw = psw;
|
||||
else {
|
||||
vcpu->run->psw_mask = psw.mask;
|
||||
vcpu->run->psw_addr = psw.addr;
|
||||
}
|
||||
vcpu_put(vcpu);
|
||||
return rc;
|
||||
}
|
||||
@ -508,9 +517,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
|
||||
switch (kvm_run->exit_reason) {
|
||||
case KVM_EXIT_S390_SIEIC:
|
||||
vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
|
||||
vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
|
||||
break;
|
||||
case KVM_EXIT_UNKNOWN:
|
||||
case KVM_EXIT_INTR:
|
||||
case KVM_EXIT_S390_RESET:
|
||||
@ -519,6 +525,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
BUG();
|
||||
}
|
||||
|
||||
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
|
||||
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
|
||||
|
||||
might_fault();
|
||||
|
||||
do {
|
||||
@ -538,8 +547,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
/* intercept cannot be handled in-kernel, prepare kvm-run */
|
||||
kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
|
||||
kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
|
||||
kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
|
||||
kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
|
||||
kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
|
||||
kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
|
||||
rc = 0;
|
||||
@ -551,6 +558,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
rc = 0;
|
||||
}
|
||||
|
||||
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
|
||||
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
|
||||
|
||||
if (vcpu->sigset_active)
|
||||
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
|
||||
|
||||
|
@ -188,9 +188,9 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
|
||||
|
||||
/* make sure that the new value is valid memory */
|
||||
address = address & 0x7fffe000u;
|
||||
if ((copy_from_guest(vcpu, &tmp,
|
||||
(u64) (address + vcpu->arch.sie_block->gmsor) , 1)) ||
|
||||
(copy_from_guest(vcpu, &tmp, (u64) (address +
|
||||
if ((copy_from_user(&tmp, (void __user *)
|
||||
(address + vcpu->arch.sie_block->gmsor) , 1)) ||
|
||||
(copy_from_user(&tmp, (void __user *)(address +
|
||||
vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) {
|
||||
*reg |= SIGP_STAT_INVALID_PARAMETER;
|
||||
return 1; /* invalid parameter */
|
||||
|
@ -51,6 +51,7 @@ config X86
|
||||
select HAVE_KERNEL_LZMA
|
||||
select HAVE_HW_BREAKPOINT
|
||||
select HAVE_ARCH_KMEMCHECK
|
||||
select HAVE_USER_RETURN_NOTIFIER
|
||||
|
||||
config OUTPUT_FORMAT
|
||||
string
|
||||
|
@ -19,6 +19,8 @@
|
||||
#define __KVM_HAVE_MSIX
|
||||
#define __KVM_HAVE_MCE
|
||||
#define __KVM_HAVE_PIT_STATE2
|
||||
#define __KVM_HAVE_XEN_HVM
|
||||
#define __KVM_HAVE_VCPU_EVENTS
|
||||
|
||||
/* Architectural interrupt line count. */
|
||||
#define KVM_NR_INTERRUPTS 256
|
||||
@ -79,6 +81,7 @@ struct kvm_ioapic_state {
|
||||
#define KVM_IRQCHIP_PIC_MASTER 0
|
||||
#define KVM_IRQCHIP_PIC_SLAVE 1
|
||||
#define KVM_IRQCHIP_IOAPIC 2
|
||||
#define KVM_NR_IRQCHIPS 3
|
||||
|
||||
/* for KVM_GET_REGS and KVM_SET_REGS */
|
||||
struct kvm_regs {
|
||||
@ -250,4 +253,31 @@ struct kvm_reinject_control {
|
||||
__u8 pit_reinject;
|
||||
__u8 reserved[31];
|
||||
};
|
||||
|
||||
/* for KVM_GET/SET_VCPU_EVENTS */
|
||||
struct kvm_vcpu_events {
|
||||
struct {
|
||||
__u8 injected;
|
||||
__u8 nr;
|
||||
__u8 has_error_code;
|
||||
__u8 pad;
|
||||
__u32 error_code;
|
||||
} exception;
|
||||
struct {
|
||||
__u8 injected;
|
||||
__u8 nr;
|
||||
__u8 soft;
|
||||
__u8 pad;
|
||||
} interrupt;
|
||||
struct {
|
||||
__u8 injected;
|
||||
__u8 pending;
|
||||
__u8 masked;
|
||||
__u8 pad;
|
||||
} nmi;
|
||||
__u32 sipi_vector;
|
||||
__u32 flags;
|
||||
__u32 reserved[10];
|
||||
};
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
@ -129,7 +129,7 @@ struct decode_cache {
|
||||
u8 seg_override;
|
||||
unsigned int d;
|
||||
unsigned long regs[NR_VCPU_REGS];
|
||||
unsigned long eip;
|
||||
unsigned long eip, eip_orig;
|
||||
/* modrm */
|
||||
u8 modrm;
|
||||
u8 modrm_mod;
|
||||
|
@ -354,7 +354,6 @@ struct kvm_vcpu_arch {
|
||||
unsigned int time_offset;
|
||||
struct page *time_page;
|
||||
|
||||
bool singlestep; /* guest is single stepped by KVM */
|
||||
bool nmi_pending;
|
||||
bool nmi_injected;
|
||||
|
||||
@ -371,6 +370,10 @@ struct kvm_vcpu_arch {
|
||||
u64 mcg_status;
|
||||
u64 mcg_ctl;
|
||||
u64 *mce_banks;
|
||||
|
||||
/* used for guest single stepping over the given code position */
|
||||
u16 singlestep_cs;
|
||||
unsigned long singlestep_rip;
|
||||
};
|
||||
|
||||
struct kvm_mem_alias {
|
||||
@ -397,7 +400,6 @@ struct kvm_arch{
|
||||
struct kvm_pic *vpic;
|
||||
struct kvm_ioapic *vioapic;
|
||||
struct kvm_pit *vpit;
|
||||
struct hlist_head irq_ack_notifier_list;
|
||||
int vapics_in_nmi_mode;
|
||||
|
||||
unsigned int tss_addr;
|
||||
@ -410,8 +412,10 @@ struct kvm_arch{
|
||||
gpa_t ept_identity_map_addr;
|
||||
|
||||
unsigned long irq_sources_bitmap;
|
||||
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
|
||||
u64 vm_init_tsc;
|
||||
s64 kvmclock_offset;
|
||||
|
||||
struct kvm_xen_hvm_config xen_hvm_config;
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
@ -461,7 +465,7 @@ struct descriptor_table {
|
||||
struct kvm_x86_ops {
|
||||
int (*cpu_has_kvm_support)(void); /* __init */
|
||||
int (*disabled_by_bios)(void); /* __init */
|
||||
void (*hardware_enable)(void *dummy); /* __init */
|
||||
int (*hardware_enable)(void *dummy);
|
||||
void (*hardware_disable)(void *dummy);
|
||||
void (*check_processor_compatibility)(void *rtn);
|
||||
int (*hardware_setup)(void); /* __init */
|
||||
@ -477,8 +481,8 @@ struct kvm_x86_ops {
|
||||
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
|
||||
void (*vcpu_put)(struct kvm_vcpu *vcpu);
|
||||
|
||||
int (*set_guest_debug)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_guest_debug *dbg);
|
||||
void (*set_guest_debug)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_guest_debug *dbg);
|
||||
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
|
||||
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
|
||||
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
|
||||
@ -506,8 +510,8 @@ struct kvm_x86_ops {
|
||||
|
||||
void (*tlb_flush)(struct kvm_vcpu *vcpu);
|
||||
|
||||
void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
|
||||
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
||||
void (*run)(struct kvm_vcpu *vcpu);
|
||||
int (*handle_exit)(struct kvm_vcpu *vcpu);
|
||||
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
|
||||
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
|
||||
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
|
||||
@ -519,6 +523,8 @@ struct kvm_x86_ops {
|
||||
bool has_error_code, u32 error_code);
|
||||
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
|
||||
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
|
||||
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
|
||||
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
|
||||
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
|
||||
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
||||
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
||||
@ -568,7 +574,7 @@ enum emulation_result {
|
||||
#define EMULTYPE_NO_DECODE (1 << 0)
|
||||
#define EMULTYPE_TRAP_UD (1 << 1)
|
||||
#define EMULTYPE_SKIP (1 << 2)
|
||||
int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
int emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
unsigned long cr2, u16 error_code, int emulation_type);
|
||||
void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
|
||||
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
|
||||
@ -585,9 +591,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
|
||||
|
||||
struct x86_emulate_ctxt;
|
||||
|
||||
int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
||||
int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in,
|
||||
int size, unsigned port);
|
||||
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
||||
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
|
||||
int size, unsigned long count, int down,
|
||||
gva_t address, int rep, unsigned port);
|
||||
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
|
||||
@ -616,6 +622,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
|
||||
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
|
||||
|
||||
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
|
||||
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
|
||||
|
||||
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
|
||||
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
|
||||
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
|
||||
@ -802,4 +811,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
|
||||
|
||||
void kvm_define_shared_msr(unsigned index, u32 msr);
|
||||
void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
|
||||
|
||||
#endif /* _ASM_X86_KVM_HOST_H */
|
||||
|
@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
||||
u16 intercept_dr_write;
|
||||
u32 intercept_exceptions;
|
||||
u64 intercept;
|
||||
u8 reserved_1[44];
|
||||
u8 reserved_1[42];
|
||||
u16 pause_filter_count;
|
||||
u64 iopm_base_pa;
|
||||
u64 msrpm_base_pa;
|
||||
u64 tsc_offset;
|
||||
|
@ -83,6 +83,7 @@ struct thread_info {
|
||||
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
|
||||
#define TIF_SECCOMP 8 /* secure computing */
|
||||
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
|
||||
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
|
||||
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
|
||||
#define TIF_IA32 17 /* 32bit process */
|
||||
#define TIF_FORK 18 /* ret_from_fork */
|
||||
@ -107,6 +108,7 @@ struct thread_info {
|
||||
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
||||
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
|
||||
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
|
||||
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
|
||||
#define _TIF_NOTSC (1 << TIF_NOTSC)
|
||||
#define _TIF_IA32 (1 << TIF_IA32)
|
||||
#define _TIF_FORK (1 << TIF_FORK)
|
||||
@ -142,13 +144,14 @@ struct thread_info {
|
||||
|
||||
/* Only used for 64 bit */
|
||||
#define _TIF_DO_NOTIFY_MASK \
|
||||
(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
|
||||
(_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
|
||||
_TIF_USER_RETURN_NOTIFY)
|
||||
|
||||
/* flags to check in __switch_to() */
|
||||
#define _TIF_WORK_CTXSW \
|
||||
(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
|
||||
|
||||
#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
|
||||
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
|
||||
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
|
||||
|
||||
#define PREEMPT_ACTIVE 0x10000000
|
||||
|
@ -56,6 +56,7 @@
|
||||
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
|
||||
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
|
||||
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
|
||||
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
|
||||
|
||||
|
||||
#define PIN_BASED_EXT_INTR_MASK 0x00000001
|
||||
@ -144,6 +145,8 @@ enum vmcs_field {
|
||||
VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
|
||||
TPR_THRESHOLD = 0x0000401c,
|
||||
SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
|
||||
PLE_GAP = 0x00004020,
|
||||
PLE_WINDOW = 0x00004022,
|
||||
VM_INSTRUCTION_ERROR = 0x00004400,
|
||||
VM_EXIT_REASON = 0x00004402,
|
||||
VM_EXIT_INTR_INFO = 0x00004404,
|
||||
@ -248,6 +251,7 @@ enum vmcs_field {
|
||||
#define EXIT_REASON_MSR_READ 31
|
||||
#define EXIT_REASON_MSR_WRITE 32
|
||||
#define EXIT_REASON_MWAIT_INSTRUCTION 36
|
||||
#define EXIT_REASON_PAUSE_INSTRUCTION 40
|
||||
#define EXIT_REASON_MCE_DURING_VMENTRY 41
|
||||
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
|
||||
#define EXIT_REASON_APIC_ACCESS 44
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/pm.h>
|
||||
#include <linux/clockchips.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/user-return-notifier.h>
|
||||
#include <trace/events/power.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <asm/system.h>
|
||||
@ -209,6 +210,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
*/
|
||||
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
|
||||
}
|
||||
propagate_user_return_notify(prev_p, next_p);
|
||||
}
|
||||
|
||||
int sys_fork(struct pt_regs *regs)
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/personality.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/user-return-notifier.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ucontext.h>
|
||||
@ -863,6 +864,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
if (current->replacement_session_keyring)
|
||||
key_replace_session_keyring();
|
||||
}
|
||||
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
|
||||
fire_user_return_notifiers();
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
clear_thread_flag(TIF_IRET);
|
||||
|
@ -28,6 +28,7 @@ config KVM
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_EVENTFD
|
||||
select KVM_APIC_ARCHITECTURE
|
||||
select USER_RETURN_NOTIFIER
|
||||
---help---
|
||||
Support hosting fully virtualized guest machines using hardware
|
||||
virtualization extensions. You will need a fairly recent
|
||||
|
@ -6,7 +6,8 @@ CFLAGS_svm.o := -I.
|
||||
CFLAGS_vmx.o := -I.
|
||||
|
||||
kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
|
||||
coalesced_mmio.o irq_comm.o eventfd.o)
|
||||
coalesced_mmio.o irq_comm.o eventfd.o \
|
||||
assigned-dev.o)
|
||||
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
|
||||
|
||||
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
|
||||
|
@ -75,6 +75,8 @@
|
||||
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
|
||||
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
|
||||
#define GroupMask 0xff /* Group number stored in bits 0:7 */
|
||||
/* Misc flags */
|
||||
#define No64 (1<<28)
|
||||
/* Source 2 operand type */
|
||||
#define Src2None (0<<29)
|
||||
#define Src2CL (1<<29)
|
||||
@ -92,19 +94,23 @@ static u32 opcode_table[256] = {
|
||||
/* 0x00 - 0x07 */
|
||||
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
|
||||
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
|
||||
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
|
||||
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
|
||||
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
|
||||
/* 0x08 - 0x0F */
|
||||
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
|
||||
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
|
||||
0, 0, 0, 0,
|
||||
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
|
||||
ImplicitOps | Stack | No64, 0,
|
||||
/* 0x10 - 0x17 */
|
||||
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
|
||||
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
|
||||
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
|
||||
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
|
||||
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
|
||||
/* 0x18 - 0x1F */
|
||||
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
|
||||
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
|
||||
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
|
||||
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
|
||||
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
|
||||
/* 0x20 - 0x27 */
|
||||
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
|
||||
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
|
||||
@ -133,7 +139,8 @@ static u32 opcode_table[256] = {
|
||||
DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
|
||||
DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
|
||||
/* 0x60 - 0x67 */
|
||||
0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
|
||||
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
|
||||
0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
|
||||
0, 0, 0, 0,
|
||||
/* 0x68 - 0x6F */
|
||||
SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
|
||||
@ -158,7 +165,7 @@ static u32 opcode_table[256] = {
|
||||
/* 0x90 - 0x97 */
|
||||
DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
|
||||
/* 0x98 - 0x9F */
|
||||
0, 0, SrcImm | Src2Imm16, 0,
|
||||
0, 0, SrcImm | Src2Imm16 | No64, 0,
|
||||
ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
|
||||
/* 0xA0 - 0xA7 */
|
||||
ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
|
||||
@ -185,7 +192,7 @@ static u32 opcode_table[256] = {
|
||||
ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
|
||||
/* 0xC8 - 0xCF */
|
||||
0, 0, 0, ImplicitOps | Stack,
|
||||
ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
|
||||
ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
|
||||
/* 0xD0 - 0xD7 */
|
||||
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
|
||||
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
|
||||
@ -198,7 +205,7 @@ static u32 opcode_table[256] = {
|
||||
ByteOp | SrcImmUByte, SrcImmUByte,
|
||||
/* 0xE8 - 0xEF */
|
||||
SrcImm | Stack, SrcImm | ImplicitOps,
|
||||
SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
|
||||
SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
|
||||
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
|
||||
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
|
||||
/* 0xF0 - 0xF7 */
|
||||
@ -244,11 +251,13 @@ static u32 twobyte_table[256] = {
|
||||
/* 0x90 - 0x9F */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0xA0 - 0xA7 */
|
||||
0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
|
||||
ImplicitOps | Stack, ImplicitOps | Stack,
|
||||
0, DstMem | SrcReg | ModRM | BitOp,
|
||||
DstMem | SrcReg | Src2ImmByte | ModRM,
|
||||
DstMem | SrcReg | Src2CL | ModRM, 0, 0,
|
||||
/* 0xA8 - 0xAF */
|
||||
0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
|
||||
ImplicitOps | Stack, ImplicitOps | Stack,
|
||||
0, DstMem | SrcReg | ModRM | BitOp,
|
||||
DstMem | SrcReg | Src2ImmByte | ModRM,
|
||||
DstMem | SrcReg | Src2CL | ModRM,
|
||||
ModRM, 0,
|
||||
@ -613,6 +622,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
/* x86 instructions are limited to 15 bytes. */
|
||||
if (eip + size - ctxt->decode.eip_orig > 15)
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
eip += ctxt->cs_base;
|
||||
while (size--) {
|
||||
rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
|
||||
@ -871,7 +883,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
||||
/* Shadow copy of register state. Committed on successful emulation. */
|
||||
|
||||
memset(c, 0, sizeof(struct decode_cache));
|
||||
c->eip = kvm_rip_read(ctxt->vcpu);
|
||||
c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu);
|
||||
ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
|
||||
memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
|
||||
|
||||
@ -962,6 +974,11 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
|
||||
kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (c->d & Group) {
|
||||
group = c->d & GroupMask;
|
||||
c->modrm = insn_fetch(u8, 1, c->eip);
|
||||
@ -1186,6 +1203,69 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
|
||||
{
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
struct kvm_segment segment;
|
||||
|
||||
kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
|
||||
|
||||
c->src.val = segment.selector;
|
||||
emulate_push(ctxt);
|
||||
}
|
||||
|
||||
static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops, int seg)
|
||||
{
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
unsigned long selector;
|
||||
int rc;
|
||||
|
||||
rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
|
||||
rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
unsigned long old_esp = c->regs[VCPU_REGS_RSP];
|
||||
int reg = VCPU_REGS_RAX;
|
||||
|
||||
while (reg <= VCPU_REGS_RDI) {
|
||||
(reg == VCPU_REGS_RSP) ?
|
||||
(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
|
||||
|
||||
emulate_push(ctxt);
|
||||
++reg;
|
||||
}
|
||||
}
|
||||
|
||||
static int emulate_popa(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops)
|
||||
{
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
int rc = 0;
|
||||
int reg = VCPU_REGS_RDI;
|
||||
|
||||
while (reg >= VCPU_REGS_RAX) {
|
||||
if (reg == VCPU_REGS_RSP) {
|
||||
register_address_increment(c, &c->regs[VCPU_REGS_RSP],
|
||||
c->op_bytes);
|
||||
--reg;
|
||||
}
|
||||
|
||||
rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
|
||||
if (rc != 0)
|
||||
break;
|
||||
--reg;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
|
||||
struct x86_emulate_ops *ops)
|
||||
{
|
||||
@ -1707,18 +1787,45 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
||||
add: /* add */
|
||||
emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
|
||||
break;
|
||||
case 0x06: /* push es */
|
||||
emulate_push_sreg(ctxt, VCPU_SREG_ES);
|
||||
break;
|
||||
case 0x07: /* pop es */
|
||||
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
|
||||
if (rc != 0)
|
||||
goto done;
|
||||
break;
|
||||
case 0x08 ... 0x0d:
|
||||
or: /* or */
|
||||
emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
|
||||
break;
|
||||
case 0x0e: /* push cs */
|
||||
emulate_push_sreg(ctxt, VCPU_SREG_CS);
|
||||
break;
|
||||
case 0x10 ... 0x15:
|
||||
adc: /* adc */
|
||||
emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
|
||||
break;
|
||||
case 0x16: /* push ss */
|
||||
emulate_push_sreg(ctxt, VCPU_SREG_SS);
|
||||
break;
|
||||
case 0x17: /* pop ss */
|
||||
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
|
||||
if (rc != 0)
|
||||
goto done;
|
||||
break;
|
||||
case 0x18 ... 0x1d:
|
||||
sbb: /* sbb */
|
||||
emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
|
||||
break;
|
||||
case 0x1e: /* push ds */
|
||||
emulate_push_sreg(ctxt, VCPU_SREG_DS);
|
||||
break;
|
||||
case 0x1f: /* pop ds */
|
||||
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
|
||||
if (rc != 0)
|
||||
goto done;
|
||||
break;
|
||||
case 0x20 ... 0x25:
|
||||
and: /* and */
|
||||
emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
|
||||
@ -1750,6 +1857,14 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
||||
if (rc != 0)
|
||||
goto done;
|
||||
break;
|
||||
case 0x60: /* pusha */
|
||||
emulate_pusha(ctxt);
|
||||
break;
|
||||
case 0x61: /* popa */
|
||||
rc = emulate_popa(ctxt, ops);
|
||||
if (rc != 0)
|
||||
goto done;
|
||||
break;
|
||||
case 0x63: /* movsxd */
|
||||
if (ctxt->mode != X86EMUL_MODE_PROT64)
|
||||
goto cannot_emulate;
|
||||
@ -1761,7 +1876,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
||||
break;
|
||||
case 0x6c: /* insb */
|
||||
case 0x6d: /* insw/insd */
|
||||
if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
|
||||
if (kvm_emulate_pio_string(ctxt->vcpu,
|
||||
1,
|
||||
(c->d & ByteOp) ? 1 : c->op_bytes,
|
||||
c->rep_prefix ?
|
||||
@ -1777,7 +1892,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
||||
return 0;
|
||||
case 0x6e: /* outsb */
|
||||
case 0x6f: /* outsw/outsd */
|
||||
if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
|
||||
if (kvm_emulate_pio_string(ctxt->vcpu,
|
||||
0,
|
||||
(c->d & ByteOp) ? 1 : c->op_bytes,
|
||||
c->rep_prefix ?
|
||||
@ -2070,7 +2185,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
||||
case 0xef: /* out (e/r)ax,dx */
|
||||
port = c->regs[VCPU_REGS_RDX];
|
||||
io_dir_in = 0;
|
||||
do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in,
|
||||
do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
|
||||
(c->d & ByteOp) ? 1 : c->op_bytes,
|
||||
port) != 0) {
|
||||
c->eip = saved_eip;
|
||||
@ -2297,6 +2412,14 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
||||
jmp_rel(c, c->src.val);
|
||||
c->dst.type = OP_NONE;
|
||||
break;
|
||||
case 0xa0: /* push fs */
|
||||
emulate_push_sreg(ctxt, VCPU_SREG_FS);
|
||||
break;
|
||||
case 0xa1: /* pop fs */
|
||||
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
|
||||
if (rc != 0)
|
||||
goto done;
|
||||
break;
|
||||
case 0xa3:
|
||||
bt: /* bt */
|
||||
c->dst.type = OP_NONE;
|
||||
@ -2308,6 +2431,14 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
||||
case 0xa5: /* shld cl, r, r/m */
|
||||
emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
|
||||
break;
|
||||
case 0xa8: /* push gs */
|
||||
emulate_push_sreg(ctxt, VCPU_SREG_GS);
|
||||
break;
|
||||
case 0xa9: /* pop gs */
|
||||
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
|
||||
if (rc != 0)
|
||||
goto done;
|
||||
break;
|
||||
case 0xab:
|
||||
bts: /* bts */
|
||||
/* only subword offset */
|
||||
|
@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
|
||||
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
|
||||
/*
|
||||
* Provides NMI watchdog support via Virtual Wire mode.
|
||||
|
@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
|
||||
s->isr_ack |= (1 << irq);
|
||||
if (s != &s->pics_state->pics[0])
|
||||
irq += 8;
|
||||
/*
|
||||
* We are dropping lock while calling ack notifiers since ack
|
||||
* notifier callbacks for assigned devices call into PIC recursively.
|
||||
* Other interrupt may be delivered to PIC while lock is dropped but
|
||||
* it should be safe since PIC state is already updated at this stage.
|
||||
*/
|
||||
spin_unlock(&s->pics_state->lock);
|
||||
kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
|
||||
spin_lock(&s->pics_state->lock);
|
||||
}
|
||||
|
||||
void kvm_pic_clear_isr_ack(struct kvm *kvm)
|
||||
@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
|
||||
static inline void pic_intack(struct kvm_kpic_state *s, int irq)
|
||||
{
|
||||
s->isr |= 1 << irq;
|
||||
if (s->auto_eoi) {
|
||||
if (s->rotate_on_auto_eoi)
|
||||
s->priority_add = (irq + 1) & 7;
|
||||
pic_clear_isr(s, irq);
|
||||
}
|
||||
/*
|
||||
* We don't clear a level sensitive interrupt here
|
||||
*/
|
||||
if (!(s->elcr & (1 << irq)))
|
||||
s->irr &= ~(1 << irq);
|
||||
|
||||
if (s->auto_eoi) {
|
||||
if (s->rotate_on_auto_eoi)
|
||||
s->priority_add = (irq + 1) & 7;
|
||||
pic_clear_isr(s, irq);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int kvm_pic_read_irq(struct kvm *kvm)
|
||||
@ -225,22 +235,11 @@ int kvm_pic_read_irq(struct kvm *kvm)
|
||||
|
||||
void kvm_pic_reset(struct kvm_kpic_state *s)
|
||||
{
|
||||
int irq, irqbase, n;
|
||||
int irq;
|
||||
struct kvm *kvm = s->pics_state->irq_request_opaque;
|
||||
struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
|
||||
u8 irr = s->irr, isr = s->imr;
|
||||
|
||||
if (s == &s->pics_state->pics[0])
|
||||
irqbase = 0;
|
||||
else
|
||||
irqbase = 8;
|
||||
|
||||
for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
|
||||
if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
|
||||
if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
|
||||
n = irq + irqbase;
|
||||
kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
|
||||
}
|
||||
}
|
||||
s->last_irr = 0;
|
||||
s->irr = 0;
|
||||
s->imr = 0;
|
||||
@ -256,6 +255,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
|
||||
s->rotate_on_auto_eoi = 0;
|
||||
s->special_fully_nested_mode = 0;
|
||||
s->init4 = 0;
|
||||
|
||||
for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
|
||||
if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
|
||||
if (irr & (1 << irq) || isr & (1 << irq)) {
|
||||
pic_clear_isr(s, irq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void pic_ioport_write(void *opaque, u32 addr, u32 val)
|
||||
@ -298,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
|
||||
priority = get_priority(s, s->isr);
|
||||
if (priority != 8) {
|
||||
irq = (priority + s->priority_add) & 7;
|
||||
pic_clear_isr(s, irq);
|
||||
if (cmd == 5)
|
||||
s->priority_add = (irq + 1) & 7;
|
||||
pic_clear_isr(s, irq);
|
||||
pic_update_irq(s->pics_state);
|
||||
}
|
||||
break;
|
||||
|
@ -71,6 +71,7 @@ struct kvm_pic {
|
||||
int output; /* intr from master PIC */
|
||||
struct kvm_io_device dev;
|
||||
void (*ack_notifier)(void *opaque, int irq);
|
||||
unsigned long irq_states[16];
|
||||
};
|
||||
|
||||
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
|
||||
@ -85,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
|
||||
|
||||
static inline int irqchip_in_kernel(struct kvm *kvm)
|
||||
{
|
||||
return pic_irqchip(kvm) != NULL;
|
||||
int ret;
|
||||
|
||||
ret = (pic_irqchip(kvm) != NULL);
|
||||
smp_rmb();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_pic_reset(struct kvm_kpic_state *s);
|
||||
|
@ -32,7 +32,6 @@
|
||||
#include <asm/current.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "irq.h"
|
||||
#include "trace.h"
|
||||
@ -471,11 +470,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
|
||||
trigger_mode = IOAPIC_LEVEL_TRIG;
|
||||
else
|
||||
trigger_mode = IOAPIC_EDGE_TRIG;
|
||||
if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
|
||||
mutex_lock(&apic->vcpu->kvm->irq_lock);
|
||||
if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
|
||||
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
|
||||
mutex_unlock(&apic->vcpu->kvm->irq_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void apic_send_ipi(struct kvm_lapic *apic)
|
||||
@ -504,9 +500,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
|
||||
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
|
||||
irq.vector);
|
||||
|
||||
mutex_lock(&apic->vcpu->kvm->irq_lock);
|
||||
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
|
||||
mutex_unlock(&apic->vcpu->kvm->irq_lock);
|
||||
}
|
||||
|
||||
static u32 apic_get_tmcct(struct kvm_lapic *apic)
|
||||
|
@ -2789,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
|
||||
er = emulate_instruction(vcpu, cr2, error_code, 0);
|
||||
|
||||
switch (er) {
|
||||
case EMULATE_DONE:
|
||||
@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
|
||||
case EMULATE_FAIL:
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
||||
vcpu->run->internal.ndata = 0;
|
||||
return 0;
|
||||
default:
|
||||
BUG();
|
||||
|
@ -467,7 +467,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
level = iterator.level;
|
||||
sptep = iterator.sptep;
|
||||
|
||||
/* FIXME: properly handle invlpg on large guest pages */
|
||||
if (level == PT_PAGE_TABLE_LEVEL ||
|
||||
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
|
||||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
|
||||
|
@ -46,6 +46,7 @@ MODULE_LICENSE("GPL");
|
||||
#define SVM_FEATURE_NPT (1 << 0)
|
||||
#define SVM_FEATURE_LBRV (1 << 1)
|
||||
#define SVM_FEATURE_SVML (1 << 2)
|
||||
#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
|
||||
|
||||
#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
|
||||
#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
|
||||
@ -53,15 +54,6 @@ MODULE_LICENSE("GPL");
|
||||
|
||||
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
|
||||
|
||||
/* Turn on to get debugging output*/
|
||||
/* #define NESTED_DEBUG */
|
||||
|
||||
#ifdef NESTED_DEBUG
|
||||
#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
|
||||
#else
|
||||
#define nsvm_printk(fmt, args...) do {} while(0)
|
||||
#endif
|
||||
|
||||
static const u32 host_save_user_msrs[] = {
|
||||
#ifdef CONFIG_X86_64
|
||||
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
|
||||
@ -85,6 +77,9 @@ struct nested_state {
|
||||
/* gpa pointers to the real vectors */
|
||||
u64 vmcb_msrpm;
|
||||
|
||||
/* A VMEXIT is required but not yet emulated */
|
||||
bool exit_required;
|
||||
|
||||
/* cache for intercepts of the guest */
|
||||
u16 intercept_cr_read;
|
||||
u16 intercept_cr_write;
|
||||
@ -112,6 +107,8 @@ struct vcpu_svm {
|
||||
u32 *msrpm;
|
||||
|
||||
struct nested_state nested;
|
||||
|
||||
bool nmi_singlestep;
|
||||
};
|
||||
|
||||
/* enable NPT for AMD64 and X86 with PAE */
|
||||
@ -286,7 +283,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (!svm->next_rip) {
|
||||
if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
|
||||
if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
|
||||
EMULATE_DONE)
|
||||
printk(KERN_DEBUG "%s: NOP\n", __func__);
|
||||
return;
|
||||
@ -316,7 +313,7 @@ static void svm_hardware_disable(void *garbage)
|
||||
cpu_svm_disable();
|
||||
}
|
||||
|
||||
static void svm_hardware_enable(void *garbage)
|
||||
static int svm_hardware_enable(void *garbage)
|
||||
{
|
||||
|
||||
struct svm_cpu_data *svm_data;
|
||||
@ -325,16 +322,21 @@ static void svm_hardware_enable(void *garbage)
|
||||
struct desc_struct *gdt;
|
||||
int me = raw_smp_processor_id();
|
||||
|
||||
rdmsrl(MSR_EFER, efer);
|
||||
if (efer & EFER_SVME)
|
||||
return -EBUSY;
|
||||
|
||||
if (!has_svm()) {
|
||||
printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
|
||||
return;
|
||||
printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
|
||||
me);
|
||||
return -EINVAL;
|
||||
}
|
||||
svm_data = per_cpu(svm_data, me);
|
||||
|
||||
if (!svm_data) {
|
||||
printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
|
||||
printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
|
||||
me);
|
||||
return;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
svm_data->asid_generation = 1;
|
||||
@ -345,11 +347,12 @@ static void svm_hardware_enable(void *garbage)
|
||||
gdt = (struct desc_struct *)gdt_descr.base;
|
||||
svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
|
||||
|
||||
rdmsrl(MSR_EFER, efer);
|
||||
wrmsrl(MSR_EFER, efer | EFER_SVME);
|
||||
|
||||
wrmsrl(MSR_VM_HSAVE_PA,
|
||||
page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void svm_cpu_uninit(int cpu)
|
||||
@ -476,7 +479,7 @@ static __init int svm_hardware_setup(void)
|
||||
kvm_enable_efer_bits(EFER_SVME);
|
||||
}
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
for_each_possible_cpu(cpu) {
|
||||
r = svm_cpu_init(cpu);
|
||||
if (r)
|
||||
goto err;
|
||||
@ -510,7 +513,7 @@ static __exit void svm_hardware_unsetup(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
for_each_possible_cpu(cpu)
|
||||
svm_cpu_uninit(cpu);
|
||||
|
||||
__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
|
||||
@ -625,11 +628,12 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
save->rip = 0x0000fff0;
|
||||
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
|
||||
|
||||
/*
|
||||
* cr0 val on cpu init should be 0x60000010, we enable cpu
|
||||
* cache by default. the orderly way is to enable cache in bios.
|
||||
/* This is the guest-visible cr0 value.
|
||||
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
|
||||
*/
|
||||
save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
|
||||
svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
|
||||
kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
|
||||
|
||||
save->cr4 = X86_CR4_PAE;
|
||||
/* rdx = ?? */
|
||||
|
||||
@ -644,8 +648,6 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
|
||||
INTERCEPT_CR3_MASK);
|
||||
save->g_pat = 0x0007040600070406ULL;
|
||||
/* enable caching because the QEMU Bios doesn't enable it */
|
||||
save->cr0 = X86_CR0_ET;
|
||||
save->cr3 = 0;
|
||||
save->cr4 = 0;
|
||||
}
|
||||
@ -654,6 +656,11 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
svm->nested.vmcb = 0;
|
||||
svm->vcpu.arch.hflags = 0;
|
||||
|
||||
if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
|
||||
control->pause_filter_count = 3000;
|
||||
control->intercept |= (1ULL << INTERCEPT_PAUSE);
|
||||
}
|
||||
|
||||
enable_gif(svm);
|
||||
}
|
||||
|
||||
@ -758,14 +765,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
int i;
|
||||
|
||||
if (unlikely(cpu != vcpu->cpu)) {
|
||||
u64 tsc_this, delta;
|
||||
u64 delta;
|
||||
|
||||
/*
|
||||
* Make sure that the guest sees a monotonically
|
||||
* increasing TSC.
|
||||
*/
|
||||
rdtscll(tsc_this);
|
||||
delta = vcpu->arch.host_tsc - tsc_this;
|
||||
delta = vcpu->arch.host_tsc - native_read_tsc();
|
||||
svm->vmcb->control.tsc_offset += delta;
|
||||
if (is_nested(svm))
|
||||
svm->nested.hsave->control.tsc_offset += delta;
|
||||
@ -787,7 +793,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
|
||||
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
|
||||
|
||||
rdtscll(vcpu->arch.host_tsc);
|
||||
vcpu->arch.host_tsc = native_read_tsc();
|
||||
}
|
||||
|
||||
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
|
||||
@ -1045,7 +1051,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
|
||||
svm->vmcb->control.intercept_exceptions &=
|
||||
~((1 << DB_VECTOR) | (1 << BP_VECTOR));
|
||||
|
||||
if (vcpu->arch.singlestep)
|
||||
if (svm->nmi_singlestep)
|
||||
svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
|
||||
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
|
||||
@ -1060,26 +1066,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
|
||||
vcpu->guest_debug = 0;
|
||||
}
|
||||
|
||||
static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
|
||||
static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
|
||||
{
|
||||
int old_debug = vcpu->guest_debug;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
vcpu->guest_debug = dbg->control;
|
||||
|
||||
update_db_intercept(vcpu);
|
||||
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
|
||||
svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
|
||||
else
|
||||
svm->vmcb->save.dr7 = vcpu->arch.dr7;
|
||||
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
|
||||
svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
|
||||
else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
|
||||
svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
|
||||
|
||||
return 0;
|
||||
update_db_intercept(vcpu);
|
||||
}
|
||||
|
||||
static void load_host_msrs(struct kvm_vcpu *vcpu)
|
||||
@ -1180,7 +1176,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
|
||||
}
|
||||
}
|
||||
|
||||
static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int pf_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
u64 fault_address;
|
||||
u32 error_code;
|
||||
@ -1194,17 +1190,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
|
||||
}
|
||||
|
||||
static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int db_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_run *kvm_run = svm->vcpu.run;
|
||||
|
||||
if (!(svm->vcpu.guest_debug &
|
||||
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
|
||||
!svm->vcpu.arch.singlestep) {
|
||||
!svm->nmi_singlestep) {
|
||||
kvm_queue_exception(&svm->vcpu, DB_VECTOR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (svm->vcpu.arch.singlestep) {
|
||||
svm->vcpu.arch.singlestep = false;
|
||||
if (svm->nmi_singlestep) {
|
||||
svm->nmi_singlestep = false;
|
||||
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
|
||||
svm->vmcb->save.rflags &=
|
||||
~(X86_EFLAGS_TF | X86_EFLAGS_RF);
|
||||
@ -1223,25 +1221,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int bp_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_run *kvm_run = svm->vcpu.run;
|
||||
|
||||
kvm_run->exit_reason = KVM_EXIT_DEBUG;
|
||||
kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
|
||||
kvm_run->debug.arch.exception = BP_VECTOR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int ud_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
int er;
|
||||
|
||||
er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
|
||||
er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
|
||||
if (er != EMULATE_DONE)
|
||||
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int nm_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
|
||||
if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
|
||||
@ -1251,7 +1251,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int mc_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
/*
|
||||
* On an #MC intercept the MCE handler is not called automatically in
|
||||
@ -1264,8 +1264,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int shutdown_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_run *kvm_run = svm->vcpu.run;
|
||||
|
||||
/*
|
||||
* VMCB is undefined after a SHUTDOWN intercept
|
||||
* so reinitialize it.
|
||||
@ -1277,7 +1279,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int io_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
|
||||
int size, in, string;
|
||||
@ -1291,7 +1293,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
|
||||
if (string) {
|
||||
if (emulate_instruction(&svm->vcpu,
|
||||
kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
|
||||
0, 0, 0) == EMULATE_DO_MMIO)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
@ -1301,33 +1303,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
|
||||
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
|
||||
return kvm_emulate_pio(&svm->vcpu, in, size, port);
|
||||
}
|
||||
|
||||
static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int nmi_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int intr_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
++svm->vcpu.stat.irq_exits;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int nop_on_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int halt_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
return kvm_emulate_halt(&svm->vcpu);
|
||||
}
|
||||
|
||||
static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int vmmcall_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
skip_emulated_instruction(&svm->vcpu);
|
||||
@ -1378,8 +1380,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
|
||||
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
|
||||
|
||||
if (nested_svm_exit_handled(svm)) {
|
||||
nsvm_printk("VMexit -> INTR\n");
|
||||
if (svm->nested.intercept & 1ULL) {
|
||||
/*
|
||||
* The #vmexit can't be emulated here directly because this
|
||||
* code path runs with irqs and preemtion disabled. A
|
||||
* #vmexit emulation might sleep. Only signal request for
|
||||
* the #vmexit here.
|
||||
*/
|
||||
svm->nested.exit_required = true;
|
||||
trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -1390,10 +1399,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
if (is_error_page(page))
|
||||
goto error;
|
||||
|
||||
@ -1532,14 +1538,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
|
||||
}
|
||||
default: {
|
||||
u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
|
||||
nsvm_printk("exit code: 0x%x\n", exit_code);
|
||||
if (svm->nested.intercept & exit_bits)
|
||||
vmexit = NESTED_EXIT_DONE;
|
||||
}
|
||||
}
|
||||
|
||||
if (vmexit == NESTED_EXIT_DONE) {
|
||||
nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
|
||||
nested_svm_vmexit(svm);
|
||||
}
|
||||
|
||||
@ -1584,6 +1588,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
struct vmcb *hsave = svm->nested.hsave;
|
||||
struct vmcb *vmcb = svm->vmcb;
|
||||
|
||||
trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
|
||||
vmcb->control.exit_info_1,
|
||||
vmcb->control.exit_info_2,
|
||||
vmcb->control.exit_int_info,
|
||||
vmcb->control.exit_int_info_err);
|
||||
|
||||
nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
|
||||
if (!nested_vmcb)
|
||||
return 1;
|
||||
@ -1617,6 +1627,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
|
||||
nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
|
||||
nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
|
||||
|
||||
/*
|
||||
* If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
|
||||
* to make sure that we do not lose injected events. So check event_inj
|
||||
* here and copy it to exit_int_info if it is valid.
|
||||
* Exit_int_info and event_inj can't be both valid because the case
|
||||
* below only happens on a VMRUN instruction intercept which has
|
||||
* no valid exit_int_info set.
|
||||
*/
|
||||
if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
|
||||
struct vmcb_control_area *nc = &nested_vmcb->control;
|
||||
|
||||
nc->exit_int_info = vmcb->control.event_inj;
|
||||
nc->exit_int_info_err = vmcb->control.event_inj_err;
|
||||
}
|
||||
|
||||
nested_vmcb->control.tlb_ctl = 0;
|
||||
nested_vmcb->control.event_inj = 0;
|
||||
nested_vmcb->control.event_inj_err = 0;
|
||||
@ -1628,10 +1654,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
/* Restore the original control entries */
|
||||
copy_vmcb_control_area(vmcb, hsave);
|
||||
|
||||
/* Kill any pending exceptions */
|
||||
if (svm->vcpu.arch.exception.pending == true)
|
||||
nsvm_printk("WARNING: Pending Exception\n");
|
||||
|
||||
kvm_clear_exception_queue(&svm->vcpu);
|
||||
kvm_clear_interrupt_queue(&svm->vcpu);
|
||||
|
||||
@ -1702,6 +1724,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
||||
/* nested_vmcb is our indicator if nested SVM is activated */
|
||||
svm->nested.vmcb = svm->vmcb->save.rax;
|
||||
|
||||
trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
|
||||
nested_vmcb->save.rip,
|
||||
nested_vmcb->control.int_ctl,
|
||||
nested_vmcb->control.event_inj,
|
||||
nested_vmcb->control.nested_ctl);
|
||||
|
||||
/* Clear internal status */
|
||||
kvm_clear_exception_queue(&svm->vcpu);
|
||||
kvm_clear_interrupt_queue(&svm->vcpu);
|
||||
@ -1789,28 +1817,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
|
||||
svm->nested.intercept = nested_vmcb->control.intercept;
|
||||
|
||||
force_new_asid(&svm->vcpu);
|
||||
svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
|
||||
svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
|
||||
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
|
||||
if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
|
||||
nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
|
||||
nested_vmcb->control.int_ctl);
|
||||
}
|
||||
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
|
||||
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
|
||||
else
|
||||
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
|
||||
|
||||
nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
|
||||
nested_vmcb->control.exit_int_info,
|
||||
nested_vmcb->control.int_state);
|
||||
|
||||
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
|
||||
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
|
||||
svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
|
||||
if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
|
||||
nsvm_printk("Injecting Event: 0x%x\n",
|
||||
nested_vmcb->control.event_inj);
|
||||
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
|
||||
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
|
||||
|
||||
@ -1837,7 +1852,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
|
||||
to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
|
||||
}
|
||||
|
||||
static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int vmload_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *nested_vmcb;
|
||||
|
||||
@ -1857,7 +1872,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int vmsave_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *nested_vmcb;
|
||||
|
||||
@ -1877,10 +1892,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int vmrun_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
nsvm_printk("VMrun\n");
|
||||
|
||||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
|
||||
@ -1907,7 +1920,7 @@ static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int stgi_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
@ -1920,7 +1933,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int clgi_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
@ -1937,10 +1950,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int invlpga_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
nsvm_printk("INVLPGA\n");
|
||||
|
||||
trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
|
||||
vcpu->arch.regs[VCPU_REGS_RAX]);
|
||||
|
||||
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
|
||||
kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
|
||||
@ -1950,15 +1965,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invalid_op_interception(struct vcpu_svm *svm,
|
||||
struct kvm_run *kvm_run)
|
||||
static int skinit_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
|
||||
|
||||
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invalid_op_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int task_switch_interception(struct vcpu_svm *svm,
|
||||
struct kvm_run *kvm_run)
|
||||
static int task_switch_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
u16 tss_selector;
|
||||
int reason;
|
||||
@ -2008,14 +2029,14 @@ static int task_switch_interception(struct vcpu_svm *svm,
|
||||
return kvm_task_switch(&svm->vcpu, tss_selector, reason);
|
||||
}
|
||||
|
||||
static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int cpuid_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
|
||||
kvm_emulate_cpuid(&svm->vcpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int iret_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
++svm->vcpu.stat.nmi_window_exits;
|
||||
svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
|
||||
@ -2023,26 +2044,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int invlpg_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
|
||||
if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
|
||||
pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int emulate_on_interception(struct vcpu_svm *svm,
|
||||
struct kvm_run *kvm_run)
|
||||
static int emulate_on_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
|
||||
if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
|
||||
pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int cr8_write_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_run *kvm_run = svm->vcpu.run;
|
||||
|
||||
u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
|
||||
/* instruction emulation calls kvm_set_cr8() */
|
||||
emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
|
||||
emulate_instruction(&svm->vcpu, 0, 0, 0);
|
||||
if (irqchip_in_kernel(svm->vcpu.kvm)) {
|
||||
svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
|
||||
return 1;
|
||||
@ -2128,7 +2150,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int rdmsr_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
|
||||
u64 data;
|
||||
@ -2221,7 +2243,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int wrmsr_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
|
||||
u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
|
||||
@ -2237,17 +2259,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
|
||||
static int msr_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
if (svm->vmcb->control.exit_info_1)
|
||||
return wrmsr_interception(svm, kvm_run);
|
||||
return wrmsr_interception(svm);
|
||||
else
|
||||
return rdmsr_interception(svm, kvm_run);
|
||||
return rdmsr_interception(svm);
|
||||
}
|
||||
|
||||
static int interrupt_window_interception(struct vcpu_svm *svm,
|
||||
struct kvm_run *kvm_run)
|
||||
static int interrupt_window_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_run *kvm_run = svm->vcpu.run;
|
||||
|
||||
svm_clear_vintr(svm);
|
||||
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
|
||||
/*
|
||||
@ -2265,8 +2288,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
|
||||
struct kvm_run *kvm_run) = {
|
||||
static int pause_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
kvm_vcpu_on_spin(&(svm->vcpu));
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
|
||||
[SVM_EXIT_READ_CR0] = emulate_on_interception,
|
||||
[SVM_EXIT_READ_CR3] = emulate_on_interception,
|
||||
[SVM_EXIT_READ_CR4] = emulate_on_interception,
|
||||
@ -2301,6 +2329,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
|
||||
[SVM_EXIT_CPUID] = cpuid_interception,
|
||||
[SVM_EXIT_IRET] = iret_interception,
|
||||
[SVM_EXIT_INVD] = emulate_on_interception,
|
||||
[SVM_EXIT_PAUSE] = pause_interception,
|
||||
[SVM_EXIT_HLT] = halt_interception,
|
||||
[SVM_EXIT_INVLPG] = invlpg_interception,
|
||||
[SVM_EXIT_INVLPGA] = invlpga_interception,
|
||||
@ -2314,26 +2343,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
|
||||
[SVM_EXIT_VMSAVE] = vmsave_interception,
|
||||
[SVM_EXIT_STGI] = stgi_interception,
|
||||
[SVM_EXIT_CLGI] = clgi_interception,
|
||||
[SVM_EXIT_SKINIT] = invalid_op_interception,
|
||||
[SVM_EXIT_SKINIT] = skinit_interception,
|
||||
[SVM_EXIT_WBINVD] = emulate_on_interception,
|
||||
[SVM_EXIT_MONITOR] = invalid_op_interception,
|
||||
[SVM_EXIT_MWAIT] = invalid_op_interception,
|
||||
[SVM_EXIT_NPF] = pf_interception,
|
||||
};
|
||||
|
||||
static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
static int handle_exit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct kvm_run *kvm_run = vcpu->run;
|
||||
u32 exit_code = svm->vmcb->control.exit_code;
|
||||
|
||||
trace_kvm_exit(exit_code, svm->vmcb->save.rip);
|
||||
|
||||
if (unlikely(svm->nested.exit_required)) {
|
||||
nested_svm_vmexit(svm);
|
||||
svm->nested.exit_required = false;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (is_nested(svm)) {
|
||||
int vmexit;
|
||||
|
||||
nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
|
||||
exit_code, svm->vmcb->control.exit_info_1,
|
||||
svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
|
||||
trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
|
||||
svm->vmcb->control.exit_info_1,
|
||||
svm->vmcb->control.exit_info_2,
|
||||
svm->vmcb->control.exit_int_info,
|
||||
svm->vmcb->control.exit_int_info_err);
|
||||
|
||||
vmexit = nested_svm_exit_special(svm);
|
||||
|
||||
@ -2383,7 +2422,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return svm_exit_handlers[exit_code](svm, kvm_run);
|
||||
return svm_exit_handlers[exit_code](svm);
|
||||
}
|
||||
|
||||
static void reload_tss(struct kvm_vcpu *vcpu)
|
||||
@ -2460,20 +2499,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
|
||||
!(svm->vcpu.arch.hflags & HF_NMI_MASK);
|
||||
}
|
||||
|
||||
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
|
||||
}
|
||||
|
||||
static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (masked) {
|
||||
svm->vcpu.arch.hflags |= HF_NMI_MASK;
|
||||
svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
|
||||
} else {
|
||||
svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
|
||||
svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
|
||||
}
|
||||
}
|
||||
|
||||
static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct vmcb *vmcb = svm->vmcb;
|
||||
return (vmcb->save.rflags & X86_EFLAGS_IF) &&
|
||||
!(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
|
||||
gif_set(svm) &&
|
||||
!(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK));
|
||||
int ret;
|
||||
|
||||
if (!gif_set(svm) ||
|
||||
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
|
||||
return 0;
|
||||
|
||||
ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
|
||||
|
||||
if (is_nested(svm))
|
||||
return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
nsvm_printk("Trying to open IRQ window\n");
|
||||
|
||||
nested_svm_intr(svm);
|
||||
|
||||
@ -2498,7 +2564,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
||||
/* Something prevents NMI from been injected. Single step over
|
||||
possible problem (IRET or exception injection or interrupt
|
||||
shadow) */
|
||||
vcpu->arch.singlestep = true;
|
||||
svm->nmi_singlestep = true;
|
||||
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
|
||||
update_db_intercept(vcpu);
|
||||
}
|
||||
@ -2588,13 +2654,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
|
||||
#define R "e"
|
||||
#endif
|
||||
|
||||
static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u16 fs_selector;
|
||||
u16 gs_selector;
|
||||
u16 ldt_selector;
|
||||
|
||||
/*
|
||||
* A vmexit emulation is required before the vcpu can be executed
|
||||
* again.
|
||||
*/
|
||||
if (unlikely(svm->nested.exit_required))
|
||||
return;
|
||||
|
||||
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
|
||||
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
|
||||
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
|
||||
@ -2893,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.queue_exception = svm_queue_exception,
|
||||
.interrupt_allowed = svm_interrupt_allowed,
|
||||
.nmi_allowed = svm_nmi_allowed,
|
||||
.get_nmi_mask = svm_get_nmi_mask,
|
||||
.set_nmi_mask = svm_set_nmi_mask,
|
||||
.enable_nmi_window = enable_nmi_window,
|
||||
.enable_irq_window = enable_irq_window,
|
||||
.update_cr8_intercept = update_cr8_intercept,
|
||||
|
@ -349,6 +349,171 @@ TRACE_EVENT(kvm_apic_accept_irq,
|
||||
__entry->coalesced ? " (coalesced)" : "")
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for nested VMRUN
|
||||
*/
|
||||
TRACE_EVENT(kvm_nested_vmrun,
|
||||
TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
|
||||
__u32 event_inj, bool npt),
|
||||
TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u64, rip )
|
||||
__field( __u64, vmcb )
|
||||
__field( __u64, nested_rip )
|
||||
__field( __u32, int_ctl )
|
||||
__field( __u32, event_inj )
|
||||
__field( bool, npt )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rip = rip;
|
||||
__entry->vmcb = vmcb;
|
||||
__entry->nested_rip = nested_rip;
|
||||
__entry->int_ctl = int_ctl;
|
||||
__entry->event_inj = event_inj;
|
||||
__entry->npt = npt;
|
||||
),
|
||||
|
||||
TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x "
|
||||
"event_inj: 0x%08x npt: %s\n",
|
||||
__entry->rip, __entry->vmcb, __entry->nested_rip,
|
||||
__entry->int_ctl, __entry->event_inj,
|
||||
__entry->npt ? "on" : "off")
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for #VMEXIT while nested
|
||||
*/
|
||||
TRACE_EVENT(kvm_nested_vmexit,
|
||||
TP_PROTO(__u64 rip, __u32 exit_code,
|
||||
__u64 exit_info1, __u64 exit_info2,
|
||||
__u32 exit_int_info, __u32 exit_int_info_err),
|
||||
TP_ARGS(rip, exit_code, exit_info1, exit_info2,
|
||||
exit_int_info, exit_int_info_err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u64, rip )
|
||||
__field( __u32, exit_code )
|
||||
__field( __u64, exit_info1 )
|
||||
__field( __u64, exit_info2 )
|
||||
__field( __u32, exit_int_info )
|
||||
__field( __u32, exit_int_info_err )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rip = rip;
|
||||
__entry->exit_code = exit_code;
|
||||
__entry->exit_info1 = exit_info1;
|
||||
__entry->exit_info2 = exit_info2;
|
||||
__entry->exit_int_info = exit_int_info;
|
||||
__entry->exit_int_info_err = exit_int_info_err;
|
||||
),
|
||||
TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
|
||||
"ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
|
||||
__entry->rip,
|
||||
ftrace_print_symbols_seq(p, __entry->exit_code,
|
||||
kvm_x86_ops->exit_reasons_str),
|
||||
__entry->exit_info1, __entry->exit_info2,
|
||||
__entry->exit_int_info, __entry->exit_int_info_err)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for #VMEXIT reinjected to the guest
|
||||
*/
|
||||
TRACE_EVENT(kvm_nested_vmexit_inject,
|
||||
TP_PROTO(__u32 exit_code,
|
||||
__u64 exit_info1, __u64 exit_info2,
|
||||
__u32 exit_int_info, __u32 exit_int_info_err),
|
||||
TP_ARGS(exit_code, exit_info1, exit_info2,
|
||||
exit_int_info, exit_int_info_err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u32, exit_code )
|
||||
__field( __u64, exit_info1 )
|
||||
__field( __u64, exit_info2 )
|
||||
__field( __u32, exit_int_info )
|
||||
__field( __u32, exit_int_info_err )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->exit_code = exit_code;
|
||||
__entry->exit_info1 = exit_info1;
|
||||
__entry->exit_info2 = exit_info2;
|
||||
__entry->exit_int_info = exit_int_info;
|
||||
__entry->exit_int_info_err = exit_int_info_err;
|
||||
),
|
||||
|
||||
TP_printk("reason: %s ext_inf1: 0x%016llx "
|
||||
"ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
|
||||
ftrace_print_symbols_seq(p, __entry->exit_code,
|
||||
kvm_x86_ops->exit_reasons_str),
|
||||
__entry->exit_info1, __entry->exit_info2,
|
||||
__entry->exit_int_info, __entry->exit_int_info_err)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for nested #vmexit because of interrupt pending
|
||||
*/
|
||||
TRACE_EVENT(kvm_nested_intr_vmexit,
|
||||
TP_PROTO(__u64 rip),
|
||||
TP_ARGS(rip),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u64, rip )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rip = rip
|
||||
),
|
||||
|
||||
TP_printk("rip: 0x%016llx\n", __entry->rip)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for nested #vmexit because of interrupt pending
|
||||
*/
|
||||
TRACE_EVENT(kvm_invlpga,
|
||||
TP_PROTO(__u64 rip, int asid, u64 address),
|
||||
TP_ARGS(rip, asid, address),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u64, rip )
|
||||
__field( int, asid )
|
||||
__field( __u64, address )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rip = rip;
|
||||
__entry->asid = asid;
|
||||
__entry->address = address;
|
||||
),
|
||||
|
||||
TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n",
|
||||
__entry->rip, __entry->asid, __entry->address)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for nested #vmexit because of interrupt pending
|
||||
*/
|
||||
TRACE_EVENT(kvm_skinit,
|
||||
TP_PROTO(__u64 rip, __u32 slb),
|
||||
TP_ARGS(rip, slb),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u64, rip )
|
||||
__field( __u32, slb )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rip = rip;
|
||||
__entry->slb = slb;
|
||||
),
|
||||
|
||||
TP_printk("rip: 0x%016llx slb: 0x%08x\n",
|
||||
__entry->rip, __entry->slb)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -37,6 +37,7 @@
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/intel-iommu.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/user-return-notifier.h>
|
||||
#include <trace/events/kvm.h>
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define CREATE_TRACE_POINTS
|
||||
@ -88,6 +89,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
|
||||
int ignore_msrs = 0;
|
||||
module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
|
||||
|
||||
#define KVM_NR_SHARED_MSRS 16
|
||||
|
||||
struct kvm_shared_msrs_global {
|
||||
int nr;
|
||||
struct kvm_shared_msr {
|
||||
u32 msr;
|
||||
u64 value;
|
||||
} msrs[KVM_NR_SHARED_MSRS];
|
||||
};
|
||||
|
||||
struct kvm_shared_msrs {
|
||||
struct user_return_notifier urn;
|
||||
bool registered;
|
||||
u64 current_value[KVM_NR_SHARED_MSRS];
|
||||
};
|
||||
|
||||
static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
|
||||
static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
|
||||
|
||||
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
{ "pf_fixed", VCPU_STAT(pf_fixed) },
|
||||
{ "pf_guest", VCPU_STAT(pf_guest) },
|
||||
@ -124,6 +144,72 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static void kvm_on_user_return(struct user_return_notifier *urn)
|
||||
{
|
||||
unsigned slot;
|
||||
struct kvm_shared_msr *global;
|
||||
struct kvm_shared_msrs *locals
|
||||
= container_of(urn, struct kvm_shared_msrs, urn);
|
||||
|
||||
for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
|
||||
global = &shared_msrs_global.msrs[slot];
|
||||
if (global->value != locals->current_value[slot]) {
|
||||
wrmsrl(global->msr, global->value);
|
||||
locals->current_value[slot] = global->value;
|
||||
}
|
||||
}
|
||||
locals->registered = false;
|
||||
user_return_notifier_unregister(urn);
|
||||
}
|
||||
|
||||
void kvm_define_shared_msr(unsigned slot, u32 msr)
|
||||
{
|
||||
int cpu;
|
||||
u64 value;
|
||||
|
||||
if (slot >= shared_msrs_global.nr)
|
||||
shared_msrs_global.nr = slot + 1;
|
||||
shared_msrs_global.msrs[slot].msr = msr;
|
||||
rdmsrl_safe(msr, &value);
|
||||
shared_msrs_global.msrs[slot].value = value;
|
||||
for_each_online_cpu(cpu)
|
||||
per_cpu(shared_msrs, cpu).current_value[slot] = value;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
|
||||
|
||||
static void kvm_shared_msr_cpu_online(void)
|
||||
{
|
||||
unsigned i;
|
||||
struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
|
||||
|
||||
for (i = 0; i < shared_msrs_global.nr; ++i)
|
||||
locals->current_value[i] = shared_msrs_global.msrs[i].value;
|
||||
}
|
||||
|
||||
void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
|
||||
{
|
||||
struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
|
||||
|
||||
if (((value ^ smsr->current_value[slot]) & mask) == 0)
|
||||
return;
|
||||
smsr->current_value[slot] = value;
|
||||
wrmsrl(shared_msrs_global.msrs[slot].msr, value);
|
||||
if (!smsr->registered) {
|
||||
smsr->urn.on_user_return = kvm_on_user_return;
|
||||
user_return_notifier_register(&smsr->urn);
|
||||
smsr->registered = true;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
|
||||
|
||||
static void drop_user_return_notifiers(void *ignore)
|
||||
{
|
||||
struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
|
||||
|
||||
if (smsr->registered)
|
||||
kvm_on_user_return(&smsr->urn);
|
||||
}
|
||||
|
||||
unsigned long segment_base(u16 selector)
|
||||
{
|
||||
struct descriptor_table gdt;
|
||||
@ -485,16 +571,19 @@ static inline u32 bit(int bitno)
|
||||
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
|
||||
*
|
||||
* This list is modified at module load time to reflect the
|
||||
* capabilities of the host cpu.
|
||||
* capabilities of the host cpu. This capabilities test skips MSRs that are
|
||||
* kvm-specific. Those are put in the beginning of the list.
|
||||
*/
|
||||
|
||||
#define KVM_SAVE_MSRS_BEGIN 2
|
||||
static u32 msrs_to_save[] = {
|
||||
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
|
||||
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
|
||||
MSR_K6_STAR,
|
||||
#ifdef CONFIG_X86_64
|
||||
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
|
||||
#endif
|
||||
MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
|
||||
MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
|
||||
MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
|
||||
};
|
||||
|
||||
static unsigned num_msrs_to_save;
|
||||
@ -678,7 +767,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
|
||||
/* With all the info we got, fill in the values */
|
||||
|
||||
vcpu->hv_clock.system_time = ts.tv_nsec +
|
||||
(NSEC_PER_SEC * (u64)ts.tv_sec);
|
||||
(NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
|
||||
|
||||
/*
|
||||
* The interface expects us to write an even number signaling that the
|
||||
* update is finished. Since the guest won't see the intermediate
|
||||
@ -836,6 +926,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
int lm = is_long_mode(vcpu);
|
||||
u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
|
||||
: (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
|
||||
u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
|
||||
: kvm->arch.xen_hvm_config.blob_size_32;
|
||||
u32 page_num = data & ~PAGE_MASK;
|
||||
u64 page_addr = data & PAGE_MASK;
|
||||
u8 *page;
|
||||
int r;
|
||||
|
||||
r = -E2BIG;
|
||||
if (page_num >= blob_size)
|
||||
goto out;
|
||||
r = -ENOMEM;
|
||||
page = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
if (!page)
|
||||
goto out;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
|
||||
goto out_free;
|
||||
if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
|
||||
goto out_free;
|
||||
r = 0;
|
||||
out_free:
|
||||
kfree(page);
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
{
|
||||
switch (msr) {
|
||||
@ -951,6 +1073,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
"0x%x data 0x%llx\n", msr, data);
|
||||
break;
|
||||
default:
|
||||
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
|
||||
return xen_hvm_config(vcpu, data);
|
||||
if (!ignore_msrs) {
|
||||
pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
|
||||
msr, data);
|
||||
@ -1225,6 +1349,9 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
case KVM_CAP_PIT2:
|
||||
case KVM_CAP_PIT_STATE2:
|
||||
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
|
||||
case KVM_CAP_XEN_HVM:
|
||||
case KVM_CAP_ADJUST_CLOCK:
|
||||
case KVM_CAP_VCPU_EVENTS:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_COALESCED_MMIO:
|
||||
@ -1239,8 +1366,8 @@ int kvm_dev_ioctl_check_extension(long ext)
|
||||
case KVM_CAP_NR_MEMSLOTS:
|
||||
r = KVM_MEMORY_SLOTS;
|
||||
break;
|
||||
case KVM_CAP_PV_MMU:
|
||||
r = !tdp_enabled;
|
||||
case KVM_CAP_PV_MMU: /* obsolete */
|
||||
r = 0;
|
||||
break;
|
||||
case KVM_CAP_IOMMU:
|
||||
r = iommu_found();
|
||||
@ -1327,6 +1454,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
kvm_x86_ops->vcpu_load(vcpu, cpu);
|
||||
if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
|
||||
unsigned long khz = cpufreq_quick_get(cpu);
|
||||
if (!khz)
|
||||
khz = tsc_khz;
|
||||
per_cpu(cpu_tsc_khz, cpu) = khz;
|
||||
}
|
||||
kvm_request_guest_time_update(vcpu);
|
||||
}
|
||||
|
||||
@ -1760,6 +1893,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events)
|
||||
{
|
||||
vcpu_load(vcpu);
|
||||
|
||||
events->exception.injected = vcpu->arch.exception.pending;
|
||||
events->exception.nr = vcpu->arch.exception.nr;
|
||||
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
|
||||
events->exception.error_code = vcpu->arch.exception.error_code;
|
||||
|
||||
events->interrupt.injected = vcpu->arch.interrupt.pending;
|
||||
events->interrupt.nr = vcpu->arch.interrupt.nr;
|
||||
events->interrupt.soft = vcpu->arch.interrupt.soft;
|
||||
|
||||
events->nmi.injected = vcpu->arch.nmi_injected;
|
||||
events->nmi.pending = vcpu->arch.nmi_pending;
|
||||
events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
|
||||
|
||||
events->sipi_vector = vcpu->arch.sipi_vector;
|
||||
|
||||
events->flags = 0;
|
||||
|
||||
vcpu_put(vcpu);
|
||||
}
|
||||
|
||||
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events)
|
||||
{
|
||||
if (events->flags)
|
||||
return -EINVAL;
|
||||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
vcpu->arch.exception.pending = events->exception.injected;
|
||||
vcpu->arch.exception.nr = events->exception.nr;
|
||||
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
|
||||
vcpu->arch.exception.error_code = events->exception.error_code;
|
||||
|
||||
vcpu->arch.interrupt.pending = events->interrupt.injected;
|
||||
vcpu->arch.interrupt.nr = events->interrupt.nr;
|
||||
vcpu->arch.interrupt.soft = events->interrupt.soft;
|
||||
if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
|
||||
kvm_pic_clear_isr_ack(vcpu->kvm);
|
||||
|
||||
vcpu->arch.nmi_injected = events->nmi.injected;
|
||||
vcpu->arch.nmi_pending = events->nmi.pending;
|
||||
kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
|
||||
|
||||
vcpu->arch.sipi_vector = events->sipi_vector;
|
||||
|
||||
vcpu_put(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
@ -1770,6 +1958,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
|
||||
switch (ioctl) {
|
||||
case KVM_GET_LAPIC: {
|
||||
r = -EINVAL;
|
||||
if (!vcpu->arch.apic)
|
||||
goto out;
|
||||
lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
|
||||
|
||||
r = -ENOMEM;
|
||||
@ -1785,6 +1976,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
break;
|
||||
}
|
||||
case KVM_SET_LAPIC: {
|
||||
r = -EINVAL;
|
||||
if (!vcpu->arch.apic)
|
||||
goto out;
|
||||
lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
|
||||
r = -ENOMEM;
|
||||
if (!lapic)
|
||||
@ -1911,6 +2105,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
|
||||
break;
|
||||
}
|
||||
case KVM_GET_VCPU_EVENTS: {
|
||||
struct kvm_vcpu_events events;
|
||||
|
||||
kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
|
||||
break;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_VCPU_EVENTS: {
|
||||
struct kvm_vcpu_events events;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
|
||||
break;
|
||||
|
||||
r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
@ -2039,9 +2254,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
|
||||
sizeof(struct kvm_pic_state));
|
||||
break;
|
||||
case KVM_IRQCHIP_IOAPIC:
|
||||
memcpy(&chip->chip.ioapic,
|
||||
ioapic_irqchip(kvm),
|
||||
sizeof(struct kvm_ioapic_state));
|
||||
r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
@ -2071,11 +2284,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
|
||||
spin_unlock(&pic_irqchip(kvm)->lock);
|
||||
break;
|
||||
case KVM_IRQCHIP_IOAPIC:
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
memcpy(ioapic_irqchip(kvm),
|
||||
&chip->chip.ioapic,
|
||||
sizeof(struct kvm_ioapic_state));
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
@ -2183,7 +2392,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
{
|
||||
struct kvm *kvm = filp->private_data;
|
||||
void __user *argp = (void __user *)arg;
|
||||
int r = -EINVAL;
|
||||
int r = -ENOTTY;
|
||||
/*
|
||||
* This union makes it completely explicit to gcc-3.x
|
||||
* that these two variables' stack usage should be
|
||||
@ -2245,25 +2454,39 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
case KVM_CREATE_IRQCHIP:
|
||||
case KVM_CREATE_IRQCHIP: {
|
||||
struct kvm_pic *vpic;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
r = -EEXIST;
|
||||
if (kvm->arch.vpic)
|
||||
goto create_irqchip_unlock;
|
||||
r = -ENOMEM;
|
||||
kvm->arch.vpic = kvm_create_pic(kvm);
|
||||
if (kvm->arch.vpic) {
|
||||
vpic = kvm_create_pic(kvm);
|
||||
if (vpic) {
|
||||
r = kvm_ioapic_init(kvm);
|
||||
if (r) {
|
||||
kfree(kvm->arch.vpic);
|
||||
kvm->arch.vpic = NULL;
|
||||
goto out;
|
||||
kfree(vpic);
|
||||
goto create_irqchip_unlock;
|
||||
}
|
||||
} else
|
||||
goto out;
|
||||
goto create_irqchip_unlock;
|
||||
smp_wmb();
|
||||
kvm->arch.vpic = vpic;
|
||||
smp_wmb();
|
||||
r = kvm_setup_default_irq_routing(kvm);
|
||||
if (r) {
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
kfree(kvm->arch.vpic);
|
||||
kfree(kvm->arch.vioapic);
|
||||
goto out;
|
||||
kvm->arch.vpic = NULL;
|
||||
kvm->arch.vioapic = NULL;
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
create_irqchip_unlock:
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
}
|
||||
case KVM_CREATE_PIT:
|
||||
u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
|
||||
goto create_pit;
|
||||
@ -2293,10 +2516,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
goto out;
|
||||
if (irqchip_in_kernel(kvm)) {
|
||||
__s32 status;
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
|
||||
irq_event.irq, irq_event.level);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
if (ioctl == KVM_IRQ_LINE_STATUS) {
|
||||
irq_event.status = status;
|
||||
if (copy_to_user(argp, &irq_event,
|
||||
@ -2422,6 +2643,55 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_XEN_HVM_CONFIG: {
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
|
||||
sizeof(struct kvm_xen_hvm_config)))
|
||||
goto out;
|
||||
r = -EINVAL;
|
||||
if (kvm->arch.xen_hvm_config.flags)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_CLOCK: {
|
||||
struct timespec now;
|
||||
struct kvm_clock_data user_ns;
|
||||
u64 now_ns;
|
||||
s64 delta;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
|
||||
goto out;
|
||||
|
||||
r = -EINVAL;
|
||||
if (user_ns.flags)
|
||||
goto out;
|
||||
|
||||
r = 0;
|
||||
ktime_get_ts(&now);
|
||||
now_ns = timespec_to_ns(&now);
|
||||
delta = user_ns.clock - now_ns;
|
||||
kvm->arch.kvmclock_offset = delta;
|
||||
break;
|
||||
}
|
||||
case KVM_GET_CLOCK: {
|
||||
struct timespec now;
|
||||
struct kvm_clock_data user_ns;
|
||||
u64 now_ns;
|
||||
|
||||
ktime_get_ts(&now);
|
||||
now_ns = timespec_to_ns(&now);
|
||||
user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
|
||||
user_ns.flags = 0;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
;
|
||||
}
|
||||
@ -2434,7 +2704,8 @@ static void kvm_init_msr_list(void)
|
||||
u32 dummy[2];
|
||||
unsigned i, j;
|
||||
|
||||
for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
|
||||
/* skip the first msrs in the list. KVM-specific */
|
||||
for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
|
||||
if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
|
||||
continue;
|
||||
if (j < i)
|
||||
@ -2758,13 +3029,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
int emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *run,
|
||||
unsigned long cr2,
|
||||
u16 error_code,
|
||||
int emulation_type)
|
||||
{
|
||||
int r, shadow_mask;
|
||||
struct decode_cache *c;
|
||||
struct kvm_run *run = vcpu->run;
|
||||
|
||||
kvm_clear_exception_queue(vcpu);
|
||||
vcpu->arch.mmio_fault_cr2 = cr2;
|
||||
@ -2784,7 +3055,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
|
||||
|
||||
vcpu->arch.emulate_ctxt.vcpu = vcpu;
|
||||
vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
|
||||
vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
|
||||
vcpu->arch.emulate_ctxt.mode =
|
||||
(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
|
||||
? X86EMUL_MODE_REAL : cs_l
|
||||
@ -2862,7 +3133,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
|
||||
return EMULATE_DO_MMIO;
|
||||
}
|
||||
|
||||
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
|
||||
kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
|
||||
|
||||
if (vcpu->mmio_is_write) {
|
||||
vcpu->mmio_needed = 0;
|
||||
@ -2970,8 +3241,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
||||
int size, unsigned port)
|
||||
int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
@ -3000,7 +3270,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_emulate_pio);
|
||||
|
||||
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
|
||||
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
|
||||
int size, unsigned long count, int down,
|
||||
gva_t address, int rep, unsigned port)
|
||||
{
|
||||
@ -3073,9 +3343,6 @@ static void bounce_off(void *info)
|
||||
/* nothing */
|
||||
}
|
||||
|
||||
static unsigned int ref_freq;
|
||||
static unsigned long tsc_khz_ref;
|
||||
|
||||
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
@ -3084,14 +3351,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i, send_ipi = 0;
|
||||
|
||||
if (!ref_freq)
|
||||
ref_freq = freq->old;
|
||||
|
||||
if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
|
||||
return 0;
|
||||
if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
|
||||
return 0;
|
||||
per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
|
||||
per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
|
||||
|
||||
spin_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
@ -3128,9 +3392,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
|
||||
.notifier_call = kvmclock_cpufreq_notifier
|
||||
};
|
||||
|
||||
static void kvm_timer_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
|
||||
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
|
||||
CPUFREQ_TRANSITION_NOTIFIER);
|
||||
for_each_online_cpu(cpu) {
|
||||
unsigned long khz = cpufreq_get(cpu);
|
||||
if (!khz)
|
||||
khz = tsc_khz;
|
||||
per_cpu(cpu_tsc_khz, cpu) = khz;
|
||||
}
|
||||
} else {
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_arch_init(void *opaque)
|
||||
{
|
||||
int r, cpu;
|
||||
int r;
|
||||
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
|
||||
|
||||
if (kvm_x86_ops) {
|
||||
@ -3162,13 +3445,7 @@ int kvm_arch_init(void *opaque)
|
||||
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
|
||||
PT_DIRTY_MASK, PT64_NX_MASK, 0);
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
|
||||
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
|
||||
tsc_khz_ref = tsc_khz;
|
||||
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
|
||||
CPUFREQ_TRANSITION_NOTIFIER);
|
||||
}
|
||||
kvm_timer_init();
|
||||
|
||||
return 0;
|
||||
|
||||
@ -3296,7 +3573,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
|
||||
unsigned long *rflags)
|
||||
{
|
||||
kvm_lmsw(vcpu, msw);
|
||||
*rflags = kvm_x86_ops->get_rflags(vcpu);
|
||||
*rflags = kvm_get_rflags(vcpu);
|
||||
}
|
||||
|
||||
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
|
||||
@ -3334,7 +3611,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
|
||||
switch (cr) {
|
||||
case 0:
|
||||
kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
|
||||
*rflags = kvm_x86_ops->get_rflags(vcpu);
|
||||
*rflags = kvm_get_rflags(vcpu);
|
||||
break;
|
||||
case 2:
|
||||
vcpu->arch.cr2 = val;
|
||||
@ -3454,18 +3731,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
|
||||
*
|
||||
* No need to exit to userspace if we already have an interrupt queued.
|
||||
*/
|
||||
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *kvm_run)
|
||||
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
|
||||
kvm_run->request_interrupt_window &&
|
||||
vcpu->run->request_interrupt_window &&
|
||||
kvm_arch_interrupt_allowed(vcpu));
|
||||
}
|
||||
|
||||
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *kvm_run)
|
||||
static void post_kvm_run_save(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
|
||||
struct kvm_run *kvm_run = vcpu->run;
|
||||
|
||||
kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
|
||||
kvm_run->cr8 = kvm_get_cr8(vcpu);
|
||||
kvm_run->apic_base = kvm_get_apic_base(vcpu);
|
||||
if (irqchip_in_kernel(vcpu->kvm))
|
||||
@ -3526,7 +3803,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
|
||||
kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
|
||||
}
|
||||
|
||||
static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
static void inject_pending_event(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* try to reinject previous events if any */
|
||||
if (vcpu->arch.exception.pending) {
|
||||
@ -3562,11 +3839,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
}
|
||||
}
|
||||
|
||||
static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
|
||||
kvm_run->request_interrupt_window;
|
||||
vcpu->run->request_interrupt_window;
|
||||
|
||||
if (vcpu->requests)
|
||||
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
|
||||
@ -3587,12 +3864,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
kvm_x86_ops->tlb_flush(vcpu);
|
||||
if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
|
||||
&vcpu->requests)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
|
||||
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
|
||||
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
@ -3616,7 +3893,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
goto out;
|
||||
}
|
||||
|
||||
inject_pending_event(vcpu, kvm_run);
|
||||
inject_pending_event(vcpu);
|
||||
|
||||
/* enable NMI/IRQ window open exits if needed */
|
||||
if (vcpu->arch.nmi_pending)
|
||||
@ -3642,7 +3919,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
}
|
||||
|
||||
trace_kvm_entry(vcpu->vcpu_id);
|
||||
kvm_x86_ops->run(vcpu, kvm_run);
|
||||
kvm_x86_ops->run(vcpu);
|
||||
|
||||
/*
|
||||
* If the guest has used debug registers, at least dr7
|
||||
@ -3684,13 +3961,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
|
||||
kvm_lapic_sync_from_vapic(vcpu);
|
||||
|
||||
r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
|
||||
r = kvm_x86_ops->handle_exit(vcpu);
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
|
||||
@ -3710,7 +3987,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
r = 1;
|
||||
while (r > 0) {
|
||||
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
|
||||
r = vcpu_enter_guest(vcpu, kvm_run);
|
||||
r = vcpu_enter_guest(vcpu);
|
||||
else {
|
||||
up_read(&vcpu->kvm->slots_lock);
|
||||
kvm_vcpu_block(vcpu);
|
||||
@ -3738,14 +4015,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
if (kvm_cpu_has_pending_timer(vcpu))
|
||||
kvm_inject_pending_timer_irqs(vcpu);
|
||||
|
||||
if (dm_request_for_irq_injection(vcpu, kvm_run)) {
|
||||
if (dm_request_for_irq_injection(vcpu)) {
|
||||
r = -EINTR;
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
||||
++vcpu->stat.request_irq_exits;
|
||||
}
|
||||
if (signal_pending(current)) {
|
||||
r = -EINTR;
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
||||
++vcpu->stat.signal_exits;
|
||||
}
|
||||
if (need_resched()) {
|
||||
@ -3756,7 +4033,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
}
|
||||
|
||||
up_read(&vcpu->kvm->slots_lock);
|
||||
post_kvm_run_save(vcpu, kvm_run);
|
||||
post_kvm_run_save(vcpu);
|
||||
|
||||
vapic_exit(vcpu);
|
||||
|
||||
@ -3789,15 +4066,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
if (r)
|
||||
goto out;
|
||||
}
|
||||
#if CONFIG_HAS_IOMEM
|
||||
if (vcpu->mmio_needed) {
|
||||
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
|
||||
vcpu->mmio_read_completed = 1;
|
||||
vcpu->mmio_needed = 0;
|
||||
|
||||
down_read(&vcpu->kvm->slots_lock);
|
||||
r = emulate_instruction(vcpu, kvm_run,
|
||||
vcpu->arch.mmio_fault_cr2, 0,
|
||||
r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
|
||||
EMULTYPE_NO_DECODE);
|
||||
up_read(&vcpu->kvm->slots_lock);
|
||||
if (r == EMULATE_DO_MMIO) {
|
||||
@ -3808,12 +4083,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
|
||||
kvm_register_write(vcpu, VCPU_REGS_RAX,
|
||||
kvm_run->hypercall.ret);
|
||||
|
||||
r = __vcpu_run(vcpu, kvm_run);
|
||||
r = __vcpu_run(vcpu);
|
||||
|
||||
out:
|
||||
if (vcpu->sigset_active)
|
||||
@ -3847,13 +4121,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
#endif
|
||||
|
||||
regs->rip = kvm_rip_read(vcpu);
|
||||
regs->rflags = kvm_x86_ops->get_rflags(vcpu);
|
||||
|
||||
/*
|
||||
* Don't leak debug flags in case they were set for guest debugging
|
||||
*/
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
|
||||
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
|
||||
regs->rflags = kvm_get_rflags(vcpu);
|
||||
|
||||
vcpu_put(vcpu);
|
||||
|
||||
@ -3881,12 +4149,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
|
||||
kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
|
||||
kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
|
||||
|
||||
#endif
|
||||
|
||||
kvm_rip_write(vcpu, regs->rip);
|
||||
kvm_x86_ops->set_rflags(vcpu, regs->rflags);
|
||||
|
||||
kvm_set_rflags(vcpu, regs->rflags);
|
||||
|
||||
vcpu->arch.exception.pending = false;
|
||||
|
||||
@ -4105,7 +4371,7 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
|
||||
{
|
||||
return (seg != VCPU_SREG_LDTR) &&
|
||||
(seg != VCPU_SREG_TR) &&
|
||||
(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM);
|
||||
(kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
|
||||
}
|
||||
|
||||
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
|
||||
@ -4133,7 +4399,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
tss->cr3 = vcpu->arch.cr3;
|
||||
tss->eip = kvm_rip_read(vcpu);
|
||||
tss->eflags = kvm_x86_ops->get_rflags(vcpu);
|
||||
tss->eflags = kvm_get_rflags(vcpu);
|
||||
tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
|
||||
tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
|
||||
tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
|
||||
@ -4157,7 +4423,7 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
|
||||
kvm_set_cr3(vcpu, tss->cr3);
|
||||
|
||||
kvm_rip_write(vcpu, tss->eip);
|
||||
kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
|
||||
kvm_set_rflags(vcpu, tss->eflags | 2);
|
||||
|
||||
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
|
||||
@ -4195,7 +4461,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
|
||||
struct tss_segment_16 *tss)
|
||||
{
|
||||
tss->ip = kvm_rip_read(vcpu);
|
||||
tss->flag = kvm_x86_ops->get_rflags(vcpu);
|
||||
tss->flag = kvm_get_rflags(vcpu);
|
||||
tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
|
||||
tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
|
||||
tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
|
||||
@ -4210,14 +4476,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
|
||||
tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
|
||||
tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
|
||||
tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
|
||||
tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
|
||||
}
|
||||
|
||||
static int load_state_from_tss16(struct kvm_vcpu *vcpu,
|
||||
struct tss_segment_16 *tss)
|
||||
{
|
||||
kvm_rip_write(vcpu, tss->ip);
|
||||
kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
|
||||
kvm_set_rflags(vcpu, tss->flag | 2);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
|
||||
@ -4363,15 +4628,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
|
||||
}
|
||||
|
||||
if (reason == TASK_SWITCH_IRET) {
|
||||
u32 eflags = kvm_x86_ops->get_rflags(vcpu);
|
||||
kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
|
||||
u32 eflags = kvm_get_rflags(vcpu);
|
||||
kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
|
||||
}
|
||||
|
||||
/* set back link to prev task only if NT bit is set in eflags
|
||||
note that old_tss_sel is not used afetr this point */
|
||||
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
|
||||
old_tss_sel = 0xffff;
|
||||
|
||||
/* set back link to prev task only if NT bit is set in eflags
|
||||
note that old_tss_sel is not used afetr this point */
|
||||
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
|
||||
@ -4385,8 +4645,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
|
||||
old_tss_base, &nseg_desc);
|
||||
|
||||
if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
|
||||
u32 eflags = kvm_x86_ops->get_rflags(vcpu);
|
||||
kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
|
||||
u32 eflags = kvm_get_rflags(vcpu);
|
||||
kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
|
||||
}
|
||||
|
||||
if (reason != TASK_SWITCH_IRET) {
|
||||
@ -4438,8 +4698,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
|
||||
mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
|
||||
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
|
||||
if (!is_long_mode(vcpu) && is_pae(vcpu))
|
||||
if (!is_long_mode(vcpu) && is_pae(vcpu)) {
|
||||
load_pdptrs(vcpu, vcpu->arch.cr3);
|
||||
mmu_reset_needed = 1;
|
||||
}
|
||||
|
||||
if (mmu_reset_needed)
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
@ -4480,12 +4742,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
struct kvm_guest_debug *dbg)
|
||||
{
|
||||
unsigned long rflags;
|
||||
int i, r;
|
||||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
|
||||
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
|
||||
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
|
||||
r = -EBUSY;
|
||||
if (vcpu->arch.exception.pending)
|
||||
goto unlock_out;
|
||||
if (dbg->control & KVM_GUESTDBG_INJECT_DB)
|
||||
kvm_queue_exception(vcpu, DB_VECTOR);
|
||||
else
|
||||
kvm_queue_exception(vcpu, BP_VECTOR);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read rflags as long as potentially injected trace flags are still
|
||||
* filtered out.
|
||||
*/
|
||||
rflags = kvm_get_rflags(vcpu);
|
||||
|
||||
vcpu->guest_debug = dbg->control;
|
||||
if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
|
||||
vcpu->guest_debug = 0;
|
||||
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
|
||||
for (i = 0; i < KVM_NR_DB_REGS; ++i)
|
||||
vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
|
||||
vcpu->arch.switch_db_regs =
|
||||
@ -4496,13 +4778,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
|
||||
}
|
||||
|
||||
r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
|
||||
vcpu->arch.singlestep_cs =
|
||||
get_segment_selector(vcpu, VCPU_SREG_CS);
|
||||
vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
|
||||
}
|
||||
|
||||
if (dbg->control & KVM_GUESTDBG_INJECT_DB)
|
||||
kvm_queue_exception(vcpu, DB_VECTOR);
|
||||
else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
|
||||
kvm_queue_exception(vcpu, BP_VECTOR);
|
||||
/*
|
||||
* Trigger an rflags update that will inject or remove the trace
|
||||
* flags.
|
||||
*/
|
||||
kvm_set_rflags(vcpu, rflags);
|
||||
|
||||
kvm_x86_ops->set_guest_debug(vcpu, dbg);
|
||||
|
||||
r = 0;
|
||||
|
||||
unlock_out:
|
||||
vcpu_put(vcpu);
|
||||
|
||||
return r;
|
||||
@ -4703,14 +4995,26 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
return kvm_x86_ops->vcpu_reset(vcpu);
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_enable(void *garbage)
|
||||
int kvm_arch_hardware_enable(void *garbage)
|
||||
{
|
||||
kvm_x86_ops->hardware_enable(garbage);
|
||||
/*
|
||||
* Since this may be called from a hotplug notifcation,
|
||||
* we can't get the CPU frequency directly.
|
||||
*/
|
||||
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
|
||||
int cpu = raw_smp_processor_id();
|
||||
per_cpu(cpu_tsc_khz, cpu) = 0;
|
||||
}
|
||||
|
||||
kvm_shared_msr_cpu_online();
|
||||
|
||||
return kvm_x86_ops->hardware_enable(garbage);
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void *garbage)
|
||||
{
|
||||
kvm_x86_ops->hardware_disable(garbage);
|
||||
drop_user_return_notifiers(garbage);
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_setup(void)
|
||||
@ -4948,8 +5252,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
|
||||
return kvm_x86_ops->interrupt_allowed(vcpu);
|
||||
}
|
||||
|
||||
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long rflags;
|
||||
|
||||
rflags = kvm_x86_ops->get_rflags(vcpu);
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
|
||||
rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF);
|
||||
return rflags;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_rflags);
|
||||
|
||||
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
|
||||
{
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
|
||||
vcpu->arch.singlestep_cs ==
|
||||
get_segment_selector(vcpu, VCPU_SREG_CS) &&
|
||||
vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
|
||||
rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
|
||||
kvm_x86_ops->set_rflags(vcpu, rflags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_set_rflags);
|
||||
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
|
||||
|
@ -14,12 +14,76 @@
|
||||
|
||||
#define KVM_API_VERSION 12
|
||||
|
||||
/* for KVM_TRACE_ENABLE, deprecated */
|
||||
/* *** Deprecated interfaces *** */
|
||||
|
||||
#define KVM_TRC_SHIFT 16
|
||||
|
||||
#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT)
|
||||
#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1))
|
||||
|
||||
#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01)
|
||||
#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02)
|
||||
#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01)
|
||||
|
||||
#define KVM_TRC_HEAD_SIZE 12
|
||||
#define KVM_TRC_CYCLE_SIZE 8
|
||||
#define KVM_TRC_EXTRA_MAX 7
|
||||
|
||||
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
|
||||
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
|
||||
#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
|
||||
#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
|
||||
#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
|
||||
#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
|
||||
#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
|
||||
#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
|
||||
#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
|
||||
#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
|
||||
#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
|
||||
#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
|
||||
#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
|
||||
#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
|
||||
#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
|
||||
#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
|
||||
#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
|
||||
#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
|
||||
#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
|
||||
#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15)
|
||||
#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16)
|
||||
#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17)
|
||||
#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
|
||||
#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
|
||||
|
||||
struct kvm_user_trace_setup {
|
||||
__u32 buf_size; /* sub_buffer size of each per-cpu */
|
||||
__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
|
||||
__u32 buf_size;
|
||||
__u32 buf_nr;
|
||||
};
|
||||
|
||||
#define __KVM_DEPRECATED_MAIN_W_0x06 \
|
||||
_IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
|
||||
#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
|
||||
#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
|
||||
|
||||
#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
|
||||
|
||||
struct kvm_breakpoint {
|
||||
__u32 enabled;
|
||||
__u32 padding;
|
||||
__u64 address;
|
||||
};
|
||||
|
||||
struct kvm_debug_guest {
|
||||
__u32 enabled;
|
||||
__u32 pad;
|
||||
struct kvm_breakpoint breakpoints[4];
|
||||
__u32 singlestep;
|
||||
};
|
||||
|
||||
#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
|
||||
|
||||
/* *** End of deprecated interfaces *** */
|
||||
|
||||
|
||||
/* for KVM_CREATE_MEMORY_REGION */
|
||||
struct kvm_memory_region {
|
||||
__u32 slot;
|
||||
@ -99,6 +163,7 @@ struct kvm_pit_config {
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
#define KVM_INTERNAL_ERROR_EMULATION 1
|
||||
#define KVM_INTERNAL_ERROR_SIMUL_EX 2
|
||||
|
||||
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
|
||||
struct kvm_run {
|
||||
@ -116,6 +181,11 @@ struct kvm_run {
|
||||
__u64 cr8;
|
||||
__u64 apic_base;
|
||||
|
||||
#ifdef __KVM_S390
|
||||
/* the processor status word for s390 */
|
||||
__u64 psw_mask; /* psw upper half */
|
||||
__u64 psw_addr; /* psw lower half */
|
||||
#endif
|
||||
union {
|
||||
/* KVM_EXIT_UNKNOWN */
|
||||
struct {
|
||||
@ -167,8 +237,6 @@ struct kvm_run {
|
||||
/* KVM_EXIT_S390_SIEIC */
|
||||
struct {
|
||||
__u8 icptcode;
|
||||
__u64 mask; /* psw upper half */
|
||||
__u64 addr; /* psw lower half */
|
||||
__u16 ipa;
|
||||
__u32 ipb;
|
||||
} s390_sieic;
|
||||
@ -187,6 +255,9 @@ struct kvm_run {
|
||||
} dcr;
|
||||
struct {
|
||||
__u32 suberror;
|
||||
/* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
|
||||
__u32 ndata;
|
||||
__u64 data[16];
|
||||
} internal;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
@ -329,24 +400,6 @@ struct kvm_ioeventfd {
|
||||
__u8 pad[36];
|
||||
};
|
||||
|
||||
#define KVM_TRC_SHIFT 16
|
||||
/*
|
||||
* kvm trace categories
|
||||
*/
|
||||
#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT)
|
||||
#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */
|
||||
|
||||
/*
|
||||
* kvm trace action
|
||||
*/
|
||||
#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01)
|
||||
#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02)
|
||||
#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01)
|
||||
|
||||
#define KVM_TRC_HEAD_SIZE 12
|
||||
#define KVM_TRC_CYCLE_SIZE 8
|
||||
#define KVM_TRC_EXTRA_MAX 7
|
||||
|
||||
#define KVMIO 0xAE
|
||||
|
||||
/*
|
||||
@ -367,12 +420,10 @@ struct kvm_ioeventfd {
|
||||
*/
|
||||
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
|
||||
#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
|
||||
/*
|
||||
* ioctls for kvm trace
|
||||
*/
|
||||
#define KVM_TRACE_ENABLE _IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
|
||||
#define KVM_TRACE_PAUSE _IO(KVMIO, 0x07)
|
||||
#define KVM_TRACE_DISABLE _IO(KVMIO, 0x08)
|
||||
#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06
|
||||
#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07
|
||||
#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08
|
||||
|
||||
/*
|
||||
* Extension capability list.
|
||||
*/
|
||||
@ -436,6 +487,15 @@ struct kvm_ioeventfd {
|
||||
#endif
|
||||
#define KVM_CAP_IOEVENTFD 36
|
||||
#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
|
||||
#ifdef __KVM_HAVE_XEN_HVM
|
||||
#define KVM_CAP_XEN_HVM 38
|
||||
#endif
|
||||
#define KVM_CAP_ADJUST_CLOCK 39
|
||||
#define KVM_CAP_INTERNAL_ERROR_DATA 40
|
||||
#ifdef __KVM_HAVE_VCPU_EVENTS
|
||||
#define KVM_CAP_VCPU_EVENTS 41
|
||||
#endif
|
||||
#define KVM_CAP_S390_PSW 42
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -488,6 +548,18 @@ struct kvm_x86_mce {
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef KVM_CAP_XEN_HVM
|
||||
struct kvm_xen_hvm_config {
|
||||
__u32 flags;
|
||||
__u32 msr;
|
||||
__u64 blob_addr_32;
|
||||
__u64 blob_addr_64;
|
||||
__u8 blob_size_32;
|
||||
__u8 blob_size_64;
|
||||
__u8 pad2[30];
|
||||
};
|
||||
#endif
|
||||
|
||||
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
|
||||
|
||||
struct kvm_irqfd {
|
||||
@ -497,55 +569,66 @@ struct kvm_irqfd {
|
||||
__u8 pad[20];
|
||||
};
|
||||
|
||||
struct kvm_clock_data {
|
||||
__u64 clock;
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
};
|
||||
|
||||
/*
|
||||
* ioctls for VM fds
|
||||
*/
|
||||
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
|
||||
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
|
||||
/*
|
||||
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
|
||||
* a vcpu fd.
|
||||
*/
|
||||
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
|
||||
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
|
||||
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
|
||||
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
|
||||
#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
|
||||
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
|
||||
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
|
||||
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
|
||||
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
|
||||
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
|
||||
#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
|
||||
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
|
||||
struct kvm_userspace_memory_region)
|
||||
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
|
||||
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
|
||||
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
|
||||
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
|
||||
/* Device model IOC */
|
||||
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
|
||||
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
|
||||
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
|
||||
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
|
||||
#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
|
||||
#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state)
|
||||
#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state)
|
||||
#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level)
|
||||
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
|
||||
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
|
||||
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
|
||||
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
|
||||
#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
|
||||
#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state)
|
||||
#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state)
|
||||
#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level)
|
||||
#define KVM_REGISTER_COALESCED_MMIO \
|
||||
_IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
|
||||
#define KVM_UNREGISTER_COALESCED_MMIO \
|
||||
_IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
|
||||
#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
|
||||
#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
|
||||
/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
|
||||
#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
|
||||
struct kvm_assigned_irq)
|
||||
#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
|
||||
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
|
||||
#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
#define KVM_ASSIGN_SET_MSIX_NR \
|
||||
_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
|
||||
#define KVM_ASSIGN_SET_MSIX_ENTRY \
|
||||
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
|
||||
#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
|
||||
#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd)
|
||||
#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
|
||||
#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
|
||||
#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
|
||||
#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70
|
||||
#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
|
||||
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
|
||||
#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \
|
||||
struct kvm_assigned_msix_nr)
|
||||
#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \
|
||||
struct kvm_assigned_msix_entry)
|
||||
#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
|
||||
#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd)
|
||||
#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
|
||||
#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
|
||||
#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
|
||||
#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
|
||||
#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
|
||||
#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
|
||||
/* Available with KVM_CAP_PIT_STATE2 */
|
||||
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
|
||||
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
|
||||
|
||||
/*
|
||||
* ioctls for vcpu fds
|
||||
@ -558,7 +641,7 @@ struct kvm_irqfd {
|
||||
#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
|
||||
#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
|
||||
/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
|
||||
#define KVM_DEBUG_GUEST __KVM_DEPRECATED_DEBUG_GUEST
|
||||
#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87
|
||||
#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
|
||||
#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
|
||||
#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
|
||||
@ -570,7 +653,7 @@ struct kvm_irqfd {
|
||||
#define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2)
|
||||
#define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
|
||||
/* Available with KVM_CAP_VAPIC */
|
||||
#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
|
||||
#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
|
||||
/* Available with KVM_CAP_VAPIC */
|
||||
#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr)
|
||||
/* valid for virtual machine (for floating interrupt)_and_ vcpu */
|
||||
@ -582,66 +665,23 @@ struct kvm_irqfd {
|
||||
/* initial ipl psw for s390 */
|
||||
#define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw)
|
||||
/* initial reset for s390 */
|
||||
#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
|
||||
#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
|
||||
#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
|
||||
#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
|
||||
/* Available with KVM_CAP_NMI */
|
||||
#define KVM_NMI _IO(KVMIO, 0x9a)
|
||||
#define KVM_NMI _IO(KVMIO, 0x9a)
|
||||
/* Available with KVM_CAP_SET_GUEST_DEBUG */
|
||||
#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
|
||||
/* MCE for x86 */
|
||||
#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64)
|
||||
#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64)
|
||||
#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce)
|
||||
|
||||
/*
|
||||
* Deprecated interfaces
|
||||
*/
|
||||
struct kvm_breakpoint {
|
||||
__u32 enabled;
|
||||
__u32 padding;
|
||||
__u64 address;
|
||||
};
|
||||
|
||||
struct kvm_debug_guest {
|
||||
__u32 enabled;
|
||||
__u32 pad;
|
||||
struct kvm_breakpoint breakpoints[4];
|
||||
__u32 singlestep;
|
||||
};
|
||||
|
||||
#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
|
||||
|
||||
/* IA64 stack access */
|
||||
#define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *)
|
||||
#define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *)
|
||||
|
||||
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
|
||||
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
|
||||
|
||||
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
|
||||
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
|
||||
#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
|
||||
#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
|
||||
#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
|
||||
#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
|
||||
#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
|
||||
#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
|
||||
#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
|
||||
#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
|
||||
#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
|
||||
#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
|
||||
#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
|
||||
#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
|
||||
#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
|
||||
#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
|
||||
#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
|
||||
#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
|
||||
#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
|
||||
#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15)
|
||||
#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16)
|
||||
#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17)
|
||||
#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
|
||||
#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
|
||||
/* Available with KVM_CAP_VCPU_EVENTS */
|
||||
#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events)
|
||||
#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events)
|
||||
|
||||
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
|
||||
|
||||
@ -696,4 +736,4 @@ struct kvm_assigned_msix_entry {
|
||||
__u16 padding[3];
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif /* __LINUX_KVM_H */
|
||||
|
@ -120,7 +120,7 @@ struct kvm_kernel_irq_routing_entry {
|
||||
u32 gsi;
|
||||
u32 type;
|
||||
int (*set)(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int level);
|
||||
struct kvm *kvm, int irq_source_id, int level);
|
||||
union {
|
||||
struct {
|
||||
unsigned irqchip;
|
||||
@ -128,9 +128,28 @@ struct kvm_kernel_irq_routing_entry {
|
||||
} irqchip;
|
||||
struct msi_msg msi;
|
||||
};
|
||||
struct list_head link;
|
||||
struct hlist_node link;
|
||||
};
|
||||
|
||||
#ifdef __KVM_HAVE_IOAPIC
|
||||
|
||||
struct kvm_irq_routing_table {
|
||||
int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
|
||||
struct kvm_kernel_irq_routing_entry *rt_entries;
|
||||
u32 nr_rt_entries;
|
||||
/*
|
||||
* Array indexed by gsi. Each entry contains list of irq chips
|
||||
* the gsi is connected to.
|
||||
*/
|
||||
struct hlist_head map[0];
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
struct kvm_irq_routing_table {};
|
||||
|
||||
#endif
|
||||
|
||||
struct kvm {
|
||||
spinlock_t mmu_lock;
|
||||
spinlock_t requests_lock;
|
||||
@ -166,8 +185,9 @@ struct kvm {
|
||||
|
||||
struct mutex irq_lock;
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
|
||||
struct kvm_irq_routing_table *irq_routing;
|
||||
struct hlist_head mask_notifier_list;
|
||||
struct hlist_head irq_ack_notifier_list;
|
||||
#endif
|
||||
|
||||
#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
@ -266,6 +286,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
|
||||
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
|
||||
|
||||
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
|
||||
void kvm_resched(struct kvm_vcpu *vcpu);
|
||||
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
|
||||
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
|
||||
@ -325,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_hardware_enable(void *garbage);
|
||||
int kvm_arch_hardware_enable(void *garbage);
|
||||
void kvm_arch_hardware_disable(void *garbage);
|
||||
int kvm_arch_hardware_setup(void);
|
||||
void kvm_arch_hardware_unsetup(void);
|
||||
@ -390,7 +411,12 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||
struct kvm_irq_mask_notifier *kimn);
|
||||
void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
|
||||
|
||||
int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
|
||||
#ifdef __KVM_HAVE_IOAPIC
|
||||
void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
|
||||
union kvm_ioapic_redirect_entry *entry,
|
||||
unsigned long *deliver_bitmask);
|
||||
#endif
|
||||
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
||||
void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian);
|
||||
@ -552,4 +578,21 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
|
||||
return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
|
||||
|
||||
long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
||||
unsigned long arg);
|
||||
|
||||
#else
|
||||
|
||||
static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
||||
unsigned long arg)
|
||||
{
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
49
include/linux/user-return-notifier.h
Normal file
49
include/linux/user-return-notifier.h
Normal file
@ -0,0 +1,49 @@
|
||||
#ifndef _LINUX_USER_RETURN_NOTIFIER_H
|
||||
#define _LINUX_USER_RETURN_NOTIFIER_H
|
||||
|
||||
#ifdef CONFIG_USER_RETURN_NOTIFIER
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
struct user_return_notifier {
|
||||
void (*on_user_return)(struct user_return_notifier *urn);
|
||||
struct hlist_node link;
|
||||
};
|
||||
|
||||
|
||||
void user_return_notifier_register(struct user_return_notifier *urn);
|
||||
void user_return_notifier_unregister(struct user_return_notifier *urn);
|
||||
|
||||
static inline void propagate_user_return_notify(struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
if (test_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY)) {
|
||||
clear_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY);
|
||||
set_tsk_thread_flag(next, TIF_USER_RETURN_NOTIFY);
|
||||
}
|
||||
}
|
||||
|
||||
void fire_user_return_notifiers(void);
|
||||
|
||||
static inline void clear_user_return_notifier(struct task_struct *p)
|
||||
{
|
||||
clear_tsk_thread_flag(p, TIF_USER_RETURN_NOTIFY);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
struct user_return_notifier {};
|
||||
|
||||
static inline void propagate_user_return_notify(struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void fire_user_return_notifiers(void) {}
|
||||
|
||||
static inline void clear_user_return_notifier(struct task_struct *p) {}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@ -99,6 +99,7 @@ obj-$(CONFIG_SLOW_WORK) += slow-work.o
|
||||
obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
|
||||
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
|
||||
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
|
||||
|
||||
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
|
||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||
|
@ -64,6 +64,7 @@
|
||||
#include <linux/magic.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/posix-timers.h>
|
||||
#include <linux/user-return-notifier.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
@ -249,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
|
||||
goto out;
|
||||
|
||||
setup_thread_stack(tsk, orig);
|
||||
clear_user_return_notifier(tsk);
|
||||
stackend = end_of_stack(tsk);
|
||||
*stackend = STACK_END_MAGIC; /* for overflow detection */
|
||||
|
||||
|
46
kernel/user-return-notifier.c
Normal file
46
kernel/user-return-notifier.c
Normal file
@ -0,0 +1,46 @@
|
||||
|
||||
#include <linux/user-return-notifier.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
|
||||
|
||||
#define URN_LIST_HEAD per_cpu(return_notifier_list, raw_smp_processor_id())
|
||||
|
||||
/*
|
||||
* Request a notification when the current cpu returns to userspace. Must be
|
||||
* called in atomic context. The notifier will also be called in atomic
|
||||
* context.
|
||||
*/
|
||||
void user_return_notifier_register(struct user_return_notifier *urn)
|
||||
{
|
||||
set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
|
||||
hlist_add_head(&urn->link, &URN_LIST_HEAD);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(user_return_notifier_register);
|
||||
|
||||
/*
|
||||
* Removes a registered user return notifier. Must be called from atomic
|
||||
* context, and from the same cpu registration occured in.
|
||||
*/
|
||||
void user_return_notifier_unregister(struct user_return_notifier *urn)
|
||||
{
|
||||
hlist_del(&urn->link);
|
||||
if (hlist_empty(&URN_LIST_HEAD))
|
||||
clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
|
||||
|
||||
/* Calls registered user return notifiers */
|
||||
void fire_user_return_notifiers(void)
|
||||
{
|
||||
struct user_return_notifier *urn;
|
||||
struct hlist_node *tmp1, *tmp2;
|
||||
struct hlist_head *head;
|
||||
|
||||
head = &get_cpu_var(return_notifier_list);
|
||||
hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link)
|
||||
urn->on_user_return(urn);
|
||||
put_cpu_var(return_notifier_list);
|
||||
}
|
818
virt/kvm/assigned-dev.c
Normal file
818
virt/kvm/assigned-dev.c
Normal file
@ -0,0 +1,818 @@
|
||||
/*
|
||||
* Kernel-based Virtual Machine - device assignment support
|
||||
*
|
||||
* Copyright (C) 2006-9 Red Hat, Inc
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include "irq.h"
|
||||
|
||||
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
|
||||
int assigned_dev_id)
|
||||
{
|
||||
struct list_head *ptr;
|
||||
struct kvm_assigned_dev_kernel *match;
|
||||
|
||||
list_for_each(ptr, head) {
|
||||
match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
|
||||
if (match->assigned_dev_id == assigned_dev_id)
|
||||
return match;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
|
||||
*assigned_dev, int irq)
|
||||
{
|
||||
int i, index;
|
||||
struct msix_entry *host_msix_entries;
|
||||
|
||||
host_msix_entries = assigned_dev->host_msix_entries;
|
||||
|
||||
index = -1;
|
||||
for (i = 0; i < assigned_dev->entries_nr; i++)
|
||||
if (irq == host_msix_entries[i].vector) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
if (index < 0) {
|
||||
printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev;
|
||||
struct kvm *kvm;
|
||||
int i;
|
||||
|
||||
assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
|
||||
interrupt_work);
|
||||
kvm = assigned_dev->kvm;
|
||||
|
||||
spin_lock_irq(&assigned_dev->assigned_dev_lock);
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
|
||||
struct kvm_guest_msix_entry *guest_entries =
|
||||
assigned_dev->guest_msix_entries;
|
||||
for (i = 0; i < assigned_dev->entries_nr; i++) {
|
||||
if (!(guest_entries[i].flags &
|
||||
KVM_ASSIGNED_MSIX_PENDING))
|
||||
continue;
|
||||
guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
|
||||
kvm_set_irq(assigned_dev->kvm,
|
||||
assigned_dev->irq_source_id,
|
||||
guest_entries[i].vector, 1);
|
||||
}
|
||||
} else
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
assigned_dev->guest_irq, 1);
|
||||
|
||||
spin_unlock_irq(&assigned_dev->assigned_dev_lock);
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kvm_assigned_dev_kernel *assigned_dev =
|
||||
(struct kvm_assigned_dev_kernel *) dev_id;
|
||||
|
||||
spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
|
||||
int index = find_index_from_host_irq(assigned_dev, irq);
|
||||
if (index < 0)
|
||||
goto out;
|
||||
assigned_dev->guest_msix_entries[index].flags |=
|
||||
KVM_ASSIGNED_MSIX_PENDING;
|
||||
}
|
||||
|
||||
schedule_work(&assigned_dev->interrupt_work);
|
||||
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
|
||||
disable_irq_nosync(irq);
|
||||
assigned_dev->host_irq_disabled = true;
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/* Ack the irq line for an assigned device */
|
||||
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
struct kvm_assigned_dev_kernel *dev;
|
||||
unsigned long flags;
|
||||
|
||||
if (kian->gsi == -1)
|
||||
return;
|
||||
|
||||
dev = container_of(kian, struct kvm_assigned_dev_kernel,
|
||||
ack_notifier);
|
||||
|
||||
kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
|
||||
|
||||
/* The guest irq may be shared so this ack may be
|
||||
* from another device.
|
||||
*/
|
||||
spin_lock_irqsave(&dev->assigned_dev_lock, flags);
|
||||
if (dev->host_irq_disabled) {
|
||||
enable_irq(dev->host_irq);
|
||||
dev->host_irq_disabled = false;
|
||||
}
|
||||
spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
|
||||
}
|
||||
|
||||
static void deassign_guest_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *assigned_dev)
|
||||
{
|
||||
kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
|
||||
assigned_dev->ack_notifier.gsi = -1;
|
||||
|
||||
if (assigned_dev->irq_source_id != -1)
|
||||
kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
|
||||
assigned_dev->irq_source_id = -1;
|
||||
assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
|
||||
}
|
||||
|
||||
/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
|
||||
static void deassign_host_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *assigned_dev)
|
||||
{
|
||||
/*
|
||||
* In kvm_free_device_irq, cancel_work_sync return true if:
|
||||
* 1. work is scheduled, and then cancelled.
|
||||
* 2. work callback is executed.
|
||||
*
|
||||
* The first one ensured that the irq is disabled and no more events
|
||||
* would happen. But for the second one, the irq may be enabled (e.g.
|
||||
* for MSI). So we disable irq here to prevent further events.
|
||||
*
|
||||
* Notice this maybe result in nested disable if the interrupt type is
|
||||
* INTx, but it's OK for we are going to free it.
|
||||
*
|
||||
* If this function is a part of VM destroy, please ensure that till
|
||||
* now, the kvm state is still legal for probably we also have to wait
|
||||
* interrupt_work done.
|
||||
*/
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
|
||||
int i;
|
||||
for (i = 0; i < assigned_dev->entries_nr; i++)
|
||||
disable_irq_nosync(assigned_dev->
|
||||
host_msix_entries[i].vector);
|
||||
|
||||
cancel_work_sync(&assigned_dev->interrupt_work);
|
||||
|
||||
for (i = 0; i < assigned_dev->entries_nr; i++)
|
||||
free_irq(assigned_dev->host_msix_entries[i].vector,
|
||||
(void *)assigned_dev);
|
||||
|
||||
assigned_dev->entries_nr = 0;
|
||||
kfree(assigned_dev->host_msix_entries);
|
||||
kfree(assigned_dev->guest_msix_entries);
|
||||
pci_disable_msix(assigned_dev->dev);
|
||||
} else {
|
||||
/* Deal with MSI and INTx */
|
||||
disable_irq_nosync(assigned_dev->host_irq);
|
||||
cancel_work_sync(&assigned_dev->interrupt_work);
|
||||
|
||||
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
|
||||
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
|
||||
pci_disable_msi(assigned_dev->dev);
|
||||
}
|
||||
|
||||
assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
|
||||
}
|
||||
|
||||
static int kvm_deassign_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *assigned_dev,
|
||||
unsigned long irq_requested_type)
|
||||
{
|
||||
unsigned long guest_irq_type, host_irq_type;
|
||||
|
||||
if (!irqchip_in_kernel(kvm))
|
||||
return -EINVAL;
|
||||
/* no irq assignment to deassign */
|
||||
if (!assigned_dev->irq_requested_type)
|
||||
return -ENXIO;
|
||||
|
||||
host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
|
||||
guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
|
||||
|
||||
if (host_irq_type)
|
||||
deassign_host_irq(kvm, assigned_dev);
|
||||
if (guest_irq_type)
|
||||
deassign_guest_irq(kvm, assigned_dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_free_assigned_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *assigned_dev)
|
||||
{
|
||||
kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
|
||||
}
|
||||
|
||||
static void kvm_free_assigned_device(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel
|
||||
*assigned_dev)
|
||||
{
|
||||
kvm_free_assigned_irq(kvm, assigned_dev);
|
||||
|
||||
pci_reset_function(assigned_dev->dev);
|
||||
|
||||
pci_release_regions(assigned_dev->dev);
|
||||
pci_disable_device(assigned_dev->dev);
|
||||
pci_dev_put(assigned_dev->dev);
|
||||
|
||||
list_del(&assigned_dev->list);
|
||||
kfree(assigned_dev);
|
||||
}
|
||||
|
||||
void kvm_free_all_assigned_devices(struct kvm *kvm)
|
||||
{
|
||||
struct list_head *ptr, *ptr2;
|
||||
struct kvm_assigned_dev_kernel *assigned_dev;
|
||||
|
||||
list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
|
||||
assigned_dev = list_entry(ptr,
|
||||
struct kvm_assigned_dev_kernel,
|
||||
list);
|
||||
|
||||
kvm_free_assigned_device(kvm, assigned_dev);
|
||||
}
|
||||
}
|
||||
|
||||
static int assigned_device_enable_host_intx(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev)
|
||||
{
|
||||
dev->host_irq = dev->dev->irq;
|
||||
/* Even though this is PCI, we don't want to use shared
|
||||
* interrupts. Sharing host devices with guest-assigned devices
|
||||
* on the same interrupt line is not a happy situation: there
|
||||
* are going to be long delays in accepting, acking, etc.
|
||||
*/
|
||||
if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
|
||||
0, "kvm_assigned_intx_device", (void *)dev))
|
||||
return -EIO;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
static int assigned_device_enable_host_msi(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (!dev->dev->msi_enabled) {
|
||||
r = pci_enable_msi(dev->dev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
dev->host_irq = dev->dev->irq;
|
||||
if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
|
||||
"kvm_assigned_msi_device", (void *)dev)) {
|
||||
pci_disable_msi(dev->dev);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static int assigned_device_enable_host_msix(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev)
|
||||
{
|
||||
int i, r = -EINVAL;
|
||||
|
||||
/* host_msix_entries and guest_msix_entries should have been
|
||||
* initialized */
|
||||
if (dev->entries_nr == 0)
|
||||
return r;
|
||||
|
||||
r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
for (i = 0; i < dev->entries_nr; i++) {
|
||||
r = request_irq(dev->host_msix_entries[i].vector,
|
||||
kvm_assigned_dev_intr, 0,
|
||||
"kvm_assigned_msix_device",
|
||||
(void *)dev);
|
||||
/* FIXME: free requested_irq's on failure */
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int assigned_device_enable_guest_intx(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev,
|
||||
struct kvm_assigned_irq *irq)
|
||||
{
|
||||
dev->guest_irq = irq->guest_irq;
|
||||
dev->ack_notifier.gsi = irq->guest_irq;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
static int assigned_device_enable_guest_msi(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev,
|
||||
struct kvm_assigned_irq *irq)
|
||||
{
|
||||
dev->guest_irq = irq->guest_irq;
|
||||
dev->ack_notifier.gsi = -1;
|
||||
dev->host_irq_disabled = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static int assigned_device_enable_guest_msix(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev,
|
||||
struct kvm_assigned_irq *irq)
|
||||
{
|
||||
dev->guest_irq = irq->guest_irq;
|
||||
dev->ack_notifier.gsi = -1;
|
||||
dev->host_irq_disabled = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int assign_host_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev,
|
||||
__u32 host_irq_type)
|
||||
{
|
||||
int r = -EEXIST;
|
||||
|
||||
if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
|
||||
return r;
|
||||
|
||||
switch (host_irq_type) {
|
||||
case KVM_DEV_IRQ_HOST_INTX:
|
||||
r = assigned_device_enable_host_intx(kvm, dev);
|
||||
break;
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
case KVM_DEV_IRQ_HOST_MSI:
|
||||
r = assigned_device_enable_host_msi(kvm, dev);
|
||||
break;
|
||||
#endif
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
case KVM_DEV_IRQ_HOST_MSIX:
|
||||
r = assigned_device_enable_host_msix(kvm, dev);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
if (!r)
|
||||
dev->irq_requested_type |= host_irq_type;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int assign_guest_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *dev,
|
||||
struct kvm_assigned_irq *irq,
|
||||
unsigned long guest_irq_type)
|
||||
{
|
||||
int id;
|
||||
int r = -EEXIST;
|
||||
|
||||
if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
|
||||
return r;
|
||||
|
||||
id = kvm_request_irq_source_id(kvm);
|
||||
if (id < 0)
|
||||
return id;
|
||||
|
||||
dev->irq_source_id = id;
|
||||
|
||||
switch (guest_irq_type) {
|
||||
case KVM_DEV_IRQ_GUEST_INTX:
|
||||
r = assigned_device_enable_guest_intx(kvm, dev, irq);
|
||||
break;
|
||||
#ifdef __KVM_HAVE_MSI
|
||||
case KVM_DEV_IRQ_GUEST_MSI:
|
||||
r = assigned_device_enable_guest_msi(kvm, dev, irq);
|
||||
break;
|
||||
#endif
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
case KVM_DEV_IRQ_GUEST_MSIX:
|
||||
r = assigned_device_enable_guest_msix(kvm, dev, irq);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
if (!r) {
|
||||
dev->irq_requested_type |= guest_irq_type;
|
||||
kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
|
||||
} else
|
||||
kvm_free_irq_source_id(kvm, dev->irq_source_id);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
|
||||
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_irq *assigned_irq)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
struct kvm_assigned_dev_kernel *match;
|
||||
unsigned long host_irq_type, guest_irq_type;
|
||||
|
||||
if (!capable(CAP_SYS_RAWIO))
|
||||
return -EPERM;
|
||||
|
||||
if (!irqchip_in_kernel(kvm))
|
||||
return r;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
r = -ENODEV;
|
||||
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
|
||||
assigned_irq->assigned_dev_id);
|
||||
if (!match)
|
||||
goto out;
|
||||
|
||||
host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
|
||||
guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
|
||||
|
||||
r = -EINVAL;
|
||||
/* can only assign one type at a time */
|
||||
if (hweight_long(host_irq_type) > 1)
|
||||
goto out;
|
||||
if (hweight_long(guest_irq_type) > 1)
|
||||
goto out;
|
||||
if (host_irq_type == 0 && guest_irq_type == 0)
|
||||
goto out;
|
||||
|
||||
r = 0;
|
||||
if (host_irq_type)
|
||||
r = assign_host_irq(kvm, match, host_irq_type);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
if (guest_irq_type)
|
||||
r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_irq
|
||||
*assigned_irq)
|
||||
{
|
||||
int r = -ENODEV;
|
||||
struct kvm_assigned_dev_kernel *match;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
|
||||
assigned_irq->assigned_dev_id);
|
||||
if (!match)
|
||||
goto out;
|
||||
|
||||
r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
|
||||
struct kvm_assigned_pci_dev *assigned_dev)
|
||||
{
|
||||
int r = 0;
|
||||
struct kvm_assigned_dev_kernel *match;
|
||||
struct pci_dev *dev;
|
||||
|
||||
down_read(&kvm->slots_lock);
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
|
||||
assigned_dev->assigned_dev_id);
|
||||
if (match) {
|
||||
/* device already assigned */
|
||||
r = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
|
||||
match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
|
||||
if (match == NULL) {
|
||||
printk(KERN_INFO "%s: Couldn't allocate memory\n",
|
||||
__func__);
|
||||
r = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
dev = pci_get_bus_and_slot(assigned_dev->busnr,
|
||||
assigned_dev->devfn);
|
||||
if (!dev) {
|
||||
printk(KERN_INFO "%s: host device not found\n", __func__);
|
||||
r = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
if (pci_enable_device(dev)) {
|
||||
printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
|
||||
r = -EBUSY;
|
||||
goto out_put;
|
||||
}
|
||||
r = pci_request_regions(dev, "kvm_assigned_device");
|
||||
if (r) {
|
||||
printk(KERN_INFO "%s: Could not get access to device regions\n",
|
||||
__func__);
|
||||
goto out_disable;
|
||||
}
|
||||
|
||||
pci_reset_function(dev);
|
||||
|
||||
match->assigned_dev_id = assigned_dev->assigned_dev_id;
|
||||
match->host_busnr = assigned_dev->busnr;
|
||||
match->host_devfn = assigned_dev->devfn;
|
||||
match->flags = assigned_dev->flags;
|
||||
match->dev = dev;
|
||||
spin_lock_init(&match->assigned_dev_lock);
|
||||
match->irq_source_id = -1;
|
||||
match->kvm = kvm;
|
||||
match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
|
||||
INIT_WORK(&match->interrupt_work,
|
||||
kvm_assigned_dev_interrupt_work_handler);
|
||||
|
||||
list_add(&match->list, &kvm->arch.assigned_dev_head);
|
||||
|
||||
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
|
||||
if (!kvm->arch.iommu_domain) {
|
||||
r = kvm_iommu_map_guest(kvm);
|
||||
if (r)
|
||||
goto out_list_del;
|
||||
}
|
||||
r = kvm_assign_device(kvm, match);
|
||||
if (r)
|
||||
goto out_list_del;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
up_read(&kvm->slots_lock);
|
||||
return r;
|
||||
out_list_del:
|
||||
list_del(&match->list);
|
||||
pci_release_regions(dev);
|
||||
out_disable:
|
||||
pci_disable_device(dev);
|
||||
out_put:
|
||||
pci_dev_put(dev);
|
||||
out_free:
|
||||
kfree(match);
|
||||
mutex_unlock(&kvm->lock);
|
||||
up_read(&kvm->slots_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
|
||||
struct kvm_assigned_pci_dev *assigned_dev)
|
||||
{
|
||||
int r = 0;
|
||||
struct kvm_assigned_dev_kernel *match;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
|
||||
assigned_dev->assigned_dev_id);
|
||||
if (!match) {
|
||||
printk(KERN_INFO "%s: device hasn't been assigned before, "
|
||||
"so cannot be deassigned\n", __func__);
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
|
||||
kvm_deassign_device(kvm, match);
|
||||
|
||||
kvm_free_assigned_device(kvm, match);
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
|
||||
struct kvm_assigned_msix_nr *entry_nr)
|
||||
{
|
||||
int r = 0;
|
||||
struct kvm_assigned_dev_kernel *adev;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
|
||||
entry_nr->assigned_dev_id);
|
||||
if (!adev) {
|
||||
r = -EINVAL;
|
||||
goto msix_nr_out;
|
||||
}
|
||||
|
||||
if (adev->entries_nr == 0) {
|
||||
adev->entries_nr = entry_nr->entry_nr;
|
||||
if (adev->entries_nr == 0 ||
|
||||
adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
|
||||
r = -EINVAL;
|
||||
goto msix_nr_out;
|
||||
}
|
||||
|
||||
adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
|
||||
entry_nr->entry_nr,
|
||||
GFP_KERNEL);
|
||||
if (!adev->host_msix_entries) {
|
||||
r = -ENOMEM;
|
||||
goto msix_nr_out;
|
||||
}
|
||||
adev->guest_msix_entries = kzalloc(
|
||||
sizeof(struct kvm_guest_msix_entry) *
|
||||
entry_nr->entry_nr, GFP_KERNEL);
|
||||
if (!adev->guest_msix_entries) {
|
||||
kfree(adev->host_msix_entries);
|
||||
r = -ENOMEM;
|
||||
goto msix_nr_out;
|
||||
}
|
||||
} else /* Not allowed set MSI-X number twice */
|
||||
r = -EINVAL;
|
||||
msix_nr_out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
|
||||
struct kvm_assigned_msix_entry *entry)
|
||||
{
|
||||
int r = 0, i;
|
||||
struct kvm_assigned_dev_kernel *adev;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
|
||||
entry->assigned_dev_id);
|
||||
|
||||
if (!adev) {
|
||||
r = -EINVAL;
|
||||
goto msix_entry_out;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->entries_nr; i++)
|
||||
if (adev->guest_msix_entries[i].vector == 0 ||
|
||||
adev->guest_msix_entries[i].entry == entry->entry) {
|
||||
adev->guest_msix_entries[i].entry = entry->entry;
|
||||
adev->guest_msix_entries[i].vector = entry->gsi;
|
||||
adev->host_msix_entries[i].entry = entry->entry;
|
||||
break;
|
||||
}
|
||||
if (i == adev->entries_nr) {
|
||||
r = -ENOSPC;
|
||||
goto msix_entry_out;
|
||||
}
|
||||
|
||||
msix_entry_out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
|
||||
unsigned long arg)
|
||||
{
|
||||
void __user *argp = (void __user *)arg;
|
||||
int r = -ENOTTY;
|
||||
|
||||
switch (ioctl) {
|
||||
case KVM_ASSIGN_PCI_DEVICE: {
|
||||
struct kvm_assigned_pci_dev assigned_dev;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_ASSIGN_IRQ: {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
#ifdef KVM_CAP_ASSIGN_DEV_IRQ
|
||||
case KVM_ASSIGN_DEV_IRQ: {
|
||||
struct kvm_assigned_irq assigned_irq;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_DEASSIGN_DEV_IRQ: {
|
||||
struct kvm_assigned_irq assigned_irq;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
|
||||
case KVM_DEASSIGN_PCI_DEVICE: {
|
||||
struct kvm_assigned_pci_dev assigned_dev;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
case KVM_SET_GSI_ROUTING: {
|
||||
struct kvm_irq_routing routing;
|
||||
struct kvm_irq_routing __user *urouting;
|
||||
struct kvm_irq_routing_entry *entries;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&routing, argp, sizeof(routing)))
|
||||
goto out;
|
||||
r = -EINVAL;
|
||||
if (routing.nr >= KVM_MAX_IRQ_ROUTES)
|
||||
goto out;
|
||||
if (routing.flags)
|
||||
goto out;
|
||||
r = -ENOMEM;
|
||||
entries = vmalloc(routing.nr * sizeof(*entries));
|
||||
if (!entries)
|
||||
goto out;
|
||||
r = -EFAULT;
|
||||
urouting = argp;
|
||||
if (copy_from_user(entries, urouting->entries,
|
||||
routing.nr * sizeof(*entries)))
|
||||
goto out_free_irq_routing;
|
||||
r = kvm_set_irq_routing(kvm, entries, routing.nr,
|
||||
routing.flags);
|
||||
out_free_irq_routing:
|
||||
vfree(entries);
|
||||
break;
|
||||
}
|
||||
#endif /* KVM_CAP_IRQ_ROUTING */
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
case KVM_ASSIGN_SET_MSIX_NR: {
|
||||
struct kvm_assigned_msix_nr entry_nr;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_ASSIGN_SET_MSIX_ENTRY: {
|
||||
struct kvm_assigned_msix_entry entry;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&entry, argp, sizeof entry))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work)
|
||||
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
|
||||
struct kvm *kvm = irqfd->kvm;
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
|
||||
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
|
||||
union kvm_ioapic_redirect_entry entry;
|
||||
int ret = 1;
|
||||
|
||||
mutex_lock(&ioapic->lock);
|
||||
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
|
||||
entry = ioapic->redirtbl[irq];
|
||||
level ^= entry.fields.polarity;
|
||||
@ -198,34 +199,51 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
|
||||
}
|
||||
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
|
||||
}
|
||||
mutex_unlock(&ioapic->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
|
||||
int trigger_mode)
|
||||
static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
|
||||
int trigger_mode)
|
||||
{
|
||||
union kvm_ioapic_redirect_entry *ent;
|
||||
int i;
|
||||
|
||||
ent = &ioapic->redirtbl[pin];
|
||||
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
|
||||
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
|
||||
|
||||
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
|
||||
if (ent->fields.vector != vector)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We are dropping lock while calling ack notifiers because ack
|
||||
* notifier callbacks for assigned devices call into IOAPIC
|
||||
* recursively. Since remote_irr is cleared only after call
|
||||
* to notifiers if the same vector will be delivered while lock
|
||||
* is dropped it will be put into irr and will be delivered
|
||||
* after ack notifier returns.
|
||||
*/
|
||||
mutex_unlock(&ioapic->lock);
|
||||
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
|
||||
mutex_lock(&ioapic->lock);
|
||||
|
||||
if (trigger_mode != IOAPIC_LEVEL_TRIG)
|
||||
continue;
|
||||
|
||||
if (trigger_mode == IOAPIC_LEVEL_TRIG) {
|
||||
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
|
||||
ent->fields.remote_irr = 0;
|
||||
if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
|
||||
ioapic_service(ioapic, pin);
|
||||
if (!ent->fields.mask && (ioapic->irr & (1 << i)))
|
||||
ioapic_service(ioapic, i);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < IOAPIC_NUM_PINS; i++)
|
||||
if (ioapic->redirtbl[i].fields.vector == vector)
|
||||
__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
|
||||
mutex_lock(&ioapic->lock);
|
||||
__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
|
||||
mutex_unlock(&ioapic->lock);
|
||||
}
|
||||
|
||||
static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
|
||||
@ -250,8 +268,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
ioapic_debug("addr %lx\n", (unsigned long)addr);
|
||||
ASSERT(!(addr & 0xf)); /* check alignment */
|
||||
|
||||
mutex_lock(&ioapic->kvm->irq_lock);
|
||||
addr &= 0xff;
|
||||
mutex_lock(&ioapic->lock);
|
||||
switch (addr) {
|
||||
case IOAPIC_REG_SELECT:
|
||||
result = ioapic->ioregsel;
|
||||
@ -265,6 +283,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
result = 0;
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&ioapic->lock);
|
||||
|
||||
switch (len) {
|
||||
case 8:
|
||||
*(u64 *) val = result;
|
||||
@ -277,7 +297,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
default:
|
||||
printk(KERN_WARNING "ioapic: wrong length %d\n", len);
|
||||
}
|
||||
mutex_unlock(&ioapic->kvm->irq_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -293,15 +312,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
(void*)addr, len, val);
|
||||
ASSERT(!(addr & 0xf)); /* check alignment */
|
||||
|
||||
mutex_lock(&ioapic->kvm->irq_lock);
|
||||
if (len == 4 || len == 8)
|
||||
data = *(u32 *) val;
|
||||
else {
|
||||
printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
|
||||
goto unlock;
|
||||
return 0;
|
||||
}
|
||||
|
||||
addr &= 0xff;
|
||||
mutex_lock(&ioapic->lock);
|
||||
switch (addr) {
|
||||
case IOAPIC_REG_SELECT:
|
||||
ioapic->ioregsel = data;
|
||||
@ -312,15 +331,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
break;
|
||||
#ifdef CONFIG_IA64
|
||||
case IOAPIC_REG_EOI:
|
||||
kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
|
||||
__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
unlock:
|
||||
mutex_unlock(&ioapic->kvm->irq_lock);
|
||||
mutex_unlock(&ioapic->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -349,6 +367,7 @@ int kvm_ioapic_init(struct kvm *kvm)
|
||||
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
|
||||
if (!ioapic)
|
||||
return -ENOMEM;
|
||||
mutex_init(&ioapic->lock);
|
||||
kvm->arch.vioapic = ioapic;
|
||||
kvm_ioapic_reset(ioapic);
|
||||
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
|
||||
@ -360,3 +379,26 @@ int kvm_ioapic_init(struct kvm *kvm)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
|
||||
if (!ioapic)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&ioapic->lock);
|
||||
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
|
||||
mutex_unlock(&ioapic->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
|
||||
if (!ioapic)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&ioapic->lock);
|
||||
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
|
||||
mutex_unlock(&ioapic->lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -41,9 +41,11 @@ struct kvm_ioapic {
|
||||
u32 irr;
|
||||
u32 pad;
|
||||
union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
|
||||
unsigned long irq_states[IOAPIC_NUM_PINS];
|
||||
struct kvm_io_device dev;
|
||||
struct kvm *kvm;
|
||||
void (*ack_notifier)(void *opaque, int irq);
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
#ifdef DEBUG
|
||||
@ -73,4 +75,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
|
||||
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
|
||||
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
struct kvm_lapic_irq *irq);
|
||||
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||
|
||||
#endif
|
||||
|
@ -31,20 +31,39 @@
|
||||
|
||||
#include "ioapic.h"
|
||||
|
||||
static inline int kvm_irq_line_state(unsigned long *irq_state,
|
||||
int irq_source_id, int level)
|
||||
{
|
||||
/* Logical OR for level trig interrupt */
|
||||
if (level)
|
||||
set_bit(irq_source_id, irq_state);
|
||||
else
|
||||
clear_bit(irq_source_id, irq_state);
|
||||
|
||||
return !!(*irq_state);
|
||||
}
|
||||
|
||||
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int level)
|
||||
struct kvm *kvm, int irq_source_id, int level)
|
||||
{
|
||||
#ifdef CONFIG_X86
|
||||
return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
|
||||
struct kvm_pic *pic = pic_irqchip(kvm);
|
||||
level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin],
|
||||
irq_source_id, level);
|
||||
return kvm_pic_set_irq(pic, e->irqchip.pin, level);
|
||||
#else
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int level)
|
||||
struct kvm *kvm, int irq_source_id, int level)
|
||||
{
|
||||
return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin],
|
||||
irq_source_id, level);
|
||||
|
||||
return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level);
|
||||
}
|
||||
|
||||
inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
|
||||
@ -63,8 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
int i, r = -1;
|
||||
struct kvm_vcpu *vcpu, *lowest = NULL;
|
||||
|
||||
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
|
||||
|
||||
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
|
||||
kvm_is_dm_lowest_prio(irq))
|
||||
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
|
||||
@ -96,10 +113,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
}
|
||||
|
||||
static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
struct kvm *kvm, int level)
|
||||
struct kvm *kvm, int irq_source_id, int level)
|
||||
{
|
||||
struct kvm_lapic_irq irq;
|
||||
|
||||
if (!level)
|
||||
return -1;
|
||||
|
||||
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
|
||||
|
||||
irq.dest_id = (e->msi.address_lo &
|
||||
@ -116,78 +136,67 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||
return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
|
||||
}
|
||||
|
||||
/* This should be called with the kvm->irq_lock mutex held
|
||||
/*
|
||||
* Return value:
|
||||
* < 0 Interrupt was ignored (masked or not delivered for other reasons)
|
||||
* = 0 Interrupt was coalesced (previous irq is still pending)
|
||||
* > 0 Number of CPUs interrupt was delivered to
|
||||
*/
|
||||
int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
|
||||
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
unsigned long *irq_state, sig_level;
|
||||
int ret = -1;
|
||||
struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
|
||||
int ret = -1, i = 0;
|
||||
struct kvm_irq_routing_table *irq_rt;
|
||||
struct hlist_node *n;
|
||||
|
||||
trace_kvm_set_irq(irq, level, irq_source_id);
|
||||
|
||||
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
|
||||
|
||||
if (irq < KVM_IOAPIC_NUM_PINS) {
|
||||
irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
|
||||
|
||||
/* Logical OR for level trig interrupt */
|
||||
if (level)
|
||||
set_bit(irq_source_id, irq_state);
|
||||
else
|
||||
clear_bit(irq_source_id, irq_state);
|
||||
sig_level = !!(*irq_state);
|
||||
} else if (!level)
|
||||
return ret;
|
||||
else /* Deal with MSI/MSI-X */
|
||||
sig_level = 1;
|
||||
|
||||
/* Not possible to detect if the guest uses the PIC or the
|
||||
* IOAPIC. So set the bit in both. The guest will ignore
|
||||
* writes to the unused one.
|
||||
*/
|
||||
list_for_each_entry(e, &kvm->irq_routing, link)
|
||||
if (e->gsi == irq) {
|
||||
int r = e->set(e, kvm, sig_level);
|
||||
if (r < 0)
|
||||
continue;
|
||||
rcu_read_lock();
|
||||
irq_rt = rcu_dereference(kvm->irq_routing);
|
||||
if (irq < irq_rt->nr_rt_entries)
|
||||
hlist_for_each_entry(e, n, &irq_rt->map[irq], link)
|
||||
irq_set[i++] = *e;
|
||||
rcu_read_unlock();
|
||||
|
||||
while(i--) {
|
||||
int r;
|
||||
r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level);
|
||||
if (r < 0)
|
||||
continue;
|
||||
|
||||
ret = r + ((ret < 0) ? 0 : ret);
|
||||
}
|
||||
|
||||
ret = r + ((ret < 0) ? 0 : ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
struct hlist_node *n;
|
||||
unsigned gsi = pin;
|
||||
int gsi;
|
||||
|
||||
trace_kvm_ack_irq(irqchip, pin);
|
||||
|
||||
list_for_each_entry(e, &kvm->irq_routing, link)
|
||||
if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
|
||||
e->irqchip.irqchip == irqchip &&
|
||||
e->irqchip.pin == pin) {
|
||||
gsi = e->gsi;
|
||||
break;
|
||||
}
|
||||
|
||||
hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
|
||||
if (kian->gsi == gsi)
|
||||
kian->irq_acked(kian);
|
||||
rcu_read_lock();
|
||||
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
if (kian->gsi == gsi)
|
||||
kian->irq_acked(kian);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
|
||||
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
@ -195,8 +204,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_del_init(&kian->link);
|
||||
hlist_del_init_rcu(&kian->link);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
int kvm_request_irq_source_id(struct kvm *kvm)
|
||||
@ -205,16 +215,17 @@ int kvm_request_irq_source_id(struct kvm *kvm)
|
||||
int irq_source_id;
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
irq_source_id = find_first_zero_bit(bitmap,
|
||||
sizeof(kvm->arch.irq_sources_bitmap));
|
||||
irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
|
||||
|
||||
if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
|
||||
if (irq_source_id >= BITS_PER_LONG) {
|
||||
printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
|
||||
return -EFAULT;
|
||||
irq_source_id = -EFAULT;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
||||
set_bit(irq_source_id, bitmap);
|
||||
unlock:
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
|
||||
return irq_source_id;
|
||||
@ -228,13 +239,23 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
if (irq_source_id < 0 ||
|
||||
irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
|
||||
irq_source_id >= BITS_PER_LONG) {
|
||||
printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
|
||||
return;
|
||||
goto unlock;
|
||||
}
|
||||
for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
|
||||
clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
|
||||
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
|
||||
if (!irqchip_in_kernel(kvm))
|
||||
goto unlock;
|
||||
|
||||
for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) {
|
||||
clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]);
|
||||
if (i >= 16)
|
||||
continue;
|
||||
#ifdef CONFIG_X86
|
||||
clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]);
|
||||
#endif
|
||||
}
|
||||
unlock:
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
@ -243,7 +264,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
kimn->irq = irq;
|
||||
hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
|
||||
hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
@ -251,8 +272,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||
struct kvm_irq_mask_notifier *kimn)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_del(&kimn->link);
|
||||
hlist_del_rcu(&kimn->link);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
|
||||
@ -260,33 +282,37 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
|
||||
struct kvm_irq_mask_notifier *kimn;
|
||||
struct hlist_node *n;
|
||||
|
||||
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
|
||||
|
||||
hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
|
||||
if (kimn->irq == irq)
|
||||
kimn->func(kimn, mask);
|
||||
}
|
||||
|
||||
static void __kvm_free_irq_routing(struct list_head *irq_routing)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry *e, *n;
|
||||
|
||||
list_for_each_entry_safe(e, n, irq_routing, link)
|
||||
kfree(e);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void kvm_free_irq_routing(struct kvm *kvm)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
__kvm_free_irq_routing(&kvm->irq_routing);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
/* Called only during vm destruction. Nobody can use the pointer
|
||||
at this stage */
|
||||
kfree(kvm->irq_routing);
|
||||
}
|
||||
|
||||
static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
|
||||
static int setup_routing_entry(struct kvm_irq_routing_table *rt,
|
||||
struct kvm_kernel_irq_routing_entry *e,
|
||||
const struct kvm_irq_routing_entry *ue)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
int delta;
|
||||
struct kvm_kernel_irq_routing_entry *ei;
|
||||
struct hlist_node *n;
|
||||
|
||||
/*
|
||||
* Do not allow GSI to be mapped to the same irqchip more than once.
|
||||
* Allow only one to one mapping between GSI and MSI.
|
||||
*/
|
||||
hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
|
||||
if (ei->type == KVM_IRQ_ROUTING_MSI ||
|
||||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
|
||||
return r;
|
||||
|
||||
e->gsi = ue->gsi;
|
||||
e->type = ue->type;
|
||||
@ -309,6 +335,9 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
|
||||
}
|
||||
e->irqchip.irqchip = ue->u.irqchip.irqchip;
|
||||
e->irqchip.pin = ue->u.irqchip.pin + delta;
|
||||
if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS)
|
||||
goto out;
|
||||
rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
|
||||
break;
|
||||
case KVM_IRQ_ROUTING_MSI:
|
||||
e->set = kvm_set_msi;
|
||||
@ -319,6 +348,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
|
||||
hlist_add_head(&e->link, &rt->map[e->gsi]);
|
||||
r = 0;
|
||||
out:
|
||||
return r;
|
||||
@ -330,43 +361,53 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
||||
unsigned nr,
|
||||
unsigned flags)
|
||||
{
|
||||
struct list_head irq_list = LIST_HEAD_INIT(irq_list);
|
||||
struct list_head tmp = LIST_HEAD_INIT(tmp);
|
||||
struct kvm_kernel_irq_routing_entry *e = NULL;
|
||||
unsigned i;
|
||||
struct kvm_irq_routing_table *new, *old;
|
||||
u32 i, j, nr_rt_entries = 0;
|
||||
int r;
|
||||
|
||||
for (i = 0; i < nr; ++i) {
|
||||
if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
|
||||
return -EINVAL;
|
||||
nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
|
||||
}
|
||||
|
||||
nr_rt_entries += 1;
|
||||
|
||||
new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
|
||||
+ (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
|
||||
GFP_KERNEL);
|
||||
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
new->rt_entries = (void *)&new->map[nr_rt_entries];
|
||||
|
||||
new->nr_rt_entries = nr_rt_entries;
|
||||
for (i = 0; i < 3; i++)
|
||||
for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
|
||||
new->chip[i][j] = -1;
|
||||
|
||||
for (i = 0; i < nr; ++i) {
|
||||
r = -EINVAL;
|
||||
if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
|
||||
goto out;
|
||||
if (ue->flags)
|
||||
goto out;
|
||||
r = -ENOMEM;
|
||||
e = kzalloc(sizeof(*e), GFP_KERNEL);
|
||||
if (!e)
|
||||
goto out;
|
||||
r = setup_routing_entry(e, ue);
|
||||
r = setup_routing_entry(new, &new->rt_entries[i], ue);
|
||||
if (r)
|
||||
goto out;
|
||||
++ue;
|
||||
list_add(&e->link, &irq_list);
|
||||
e = NULL;
|
||||
}
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
list_splice(&kvm->irq_routing, &tmp);
|
||||
INIT_LIST_HEAD(&kvm->irq_routing);
|
||||
list_splice(&irq_list, &kvm->irq_routing);
|
||||
INIT_LIST_HEAD(&irq_list);
|
||||
list_splice(&tmp, &irq_list);
|
||||
old = kvm->irq_routing;
|
||||
rcu_assign_pointer(kvm->irq_routing, new);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
synchronize_rcu();
|
||||
|
||||
new = old;
|
||||
r = 0;
|
||||
|
||||
out:
|
||||
kfree(e);
|
||||
__kvm_free_irq_routing(&irq_list);
|
||||
kfree(new);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user