mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-10 07:00:48 +00:00
First batch of KVM changes for 4.14
Common: - improve heuristic for boosting preempted spinlocks by ignoring VCPUs in user mode ARM: - fix for decoding external abort types from guests - added support for migrating the active priority of interrupts when running a GICv2 guest on a GICv3 host - minor cleanup PPC: - expose storage keys to userspace - merge powerpc/topic/ppc-kvm branch that contains find_linux_pte_or_hugepte and POWER9 thread management cleanup - merge kvm-ppc-fixes with a fix that missed 4.13 because of vacations - fixes s390: - merge of topic branch tlb-flushing from the s390 tree to get the no-dat base features - merge of kvm/master to avoid conflicts with additional sthyi fixes - wire up the no-dat enhancements in KVM - multiple epoch facility (z14 feature) - Configuration z/Architecture Mode - more sthyi fixes - gdb server range checking fix - small code cleanups x86: - emulate Hyper-V TSC frequency MSRs - add nested INVPCID - emulate EPTP switching VMFUNC - support Virtual GIF - support 5 level page tables - speedup nested VM exits by packing byte operations - speedup MMIO by using hardware provided physical address - a lot of fixes and cleanups, especially nested -----BEGIN PGP SIGNATURE----- iQEcBAABCAAGBQJZspE1AAoJEED/6hsPKofoDcMIALT11n+LKV50QGwQdg2W1GOt aChbgnj/Kegit3hQlDhVNb8kmdZEOZzSL81Lh0VPEr7zXU8QiWn2snbizDPv8sde MpHhcZYZZ0YrpoiZKjl8yiwcu88OWGn2qtJ7OpuTS5hvEGAfxMncp0AMZho6fnz/ ySTwJ9GK2MTgBw39OAzCeDOeoYn4NKYMwjJGqBXRhNX8PG/1wmfqv0vPrd6wfg31 KJ58BumavwJjr8YbQ1xELm9rpQrAmaayIsG0R1dEUqCbt5a1+t2gt4h2uY7tWcIv ACt2bIze7eF3xA+OpRs+eT+yemiH3t9btIVmhCfzUpnQ+V5Z55VMSwASLtTuJRQ= =R8Ry -----END PGP SIGNATURE----- Merge tag 'kvm-4.14-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Radim Krčmář: "First batch of KVM changes for 4.14 Common: - improve heuristic for boosting preempted spinlocks by ignoring VCPUs in user mode ARM: - fix for decoding external abort types from guests - added support for migrating the active priority of interrupts when running a GICv2 guest on a GICv3 host - minor cleanup PPC: - expose storage keys to userspace - merge kvm-ppc-fixes with a fix that missed 4.13 because of vacations - fixes s390: - merge of kvm/master to avoid conflicts with additional sthyi fixes - wire up the no-dat enhancements in KVM - multiple epoch facility (z14 feature) - Configuration z/Architecture Mode - more sthyi fixes - gdb server range checking fix - small code cleanups x86: - emulate Hyper-V TSC frequency MSRs - add nested INVPCID - emulate EPTP switching VMFUNC - support Virtual GIF - support 5 level page tables - speedup nested VM exits by packing byte operations - speedup MMIO by using hardware provided physical address - a lot of fixes and cleanups, especially nested" * tag 'kvm-4.14-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (67 commits) KVM: arm/arm64: Support uaccess of GICC_APRn KVM: arm/arm64: Extract GICv3 max APRn index calculation KVM: arm/arm64: vITS: Drop its_ite->lpi field KVM: arm/arm64: vgic: constify seq_operations and file_operations KVM: arm/arm64: Fix guest external abort matching KVM: PPC: Book3S HV: Fix memory leak in kvm_vm_ioctl_get_htab_fd KVM: s390: vsie: cleanup mcck reinjection KVM: s390: use WARN_ON_ONCE only for checking KVM: s390: guestdbg: fix range check KVM: PPC: Book3S HV: Report storage key support to userspace KVM: PPC: Book3S HV: Fix case where HDEC is treated as 32-bit on POWER9 KVM: PPC: Book3S HV: Fix invalid use of register expression KVM: PPC: Book3S HV: Fix H_REGISTER_VPA VPA size validation KVM: PPC: Book3S HV: Fix setting of storage key in H_ENTER KVM: PPC: e500mc: Fix a NULL dereference KVM: PPC: e500: Fix some NULL dereferences on error KVM: PPC: Book3S HV: Protect updates to spapr_tce_tables list KVM: s390: we are always in czam mode KVM: s390: expose no-DAT to guest and migration support KVM: s390: sthyi: remove invalid guest write access ...
This commit is contained in:
commit
0756b7fbb6
@ -83,6 +83,11 @@ Groups:
|
||||
|
||||
Bits for undefined preemption levels are RAZ/WI.
|
||||
|
||||
Note that this differs from a CPU's view of the APRs on hardware in which
|
||||
a GIC without the security extensions expose group 0 and group 1 active
|
||||
priorities in separate register groups, whereas we show a combined view
|
||||
similar to GICv2's GICH_APR.
|
||||
|
||||
For historical reasons and to provide ABI compatibility with userspace we
|
||||
export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask
|
||||
field in the lower 5 bits of a word, meaning that userspace must always
|
||||
|
@ -176,7 +176,8 @@ Architectures: s390
|
||||
|
||||
3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
|
||||
|
||||
Allows user space to set/get the TOD clock extension (u8).
|
||||
Allows user space to set/get the TOD clock extension (u8) (superseded by
|
||||
KVM_S390_VM_TOD_EXT).
|
||||
|
||||
Parameters: address of a buffer in user space to store the data (u8) to
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
@ -190,6 +191,17 @@ the POP (u64).
|
||||
Parameters: address of a buffer in user space to store the data (u64) to
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
|
||||
3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
|
||||
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
|
||||
the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
|
||||
also allows user space to get/set it. If the guest CPU model does not support
|
||||
it, it is stored as 0 and not allowed to be set to a value != 0.
|
||||
|
||||
Parameters: address of a buffer in user space to store the data
|
||||
(kvm_s390_vm_tod_clock) to
|
||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
||||
-EINVAL if setting the TOD clock extension to != 0 is not supported
|
||||
|
||||
4. GROUP: KVM_S390_VM_CRYPTO
|
||||
Architectures: s390
|
||||
|
||||
|
37
MAINTAINERS
37
MAINTAINERS
@ -7464,18 +7464,30 @@ L: kvm@vger.kernel.org
|
||||
W: http://www.linux-kvm.org
|
||||
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
|
||||
S: Supported
|
||||
F: Documentation/*/kvm*.txt
|
||||
F: Documentation/virtual/kvm/
|
||||
F: arch/*/kvm/
|
||||
F: arch/x86/kernel/kvm.c
|
||||
F: arch/x86/kernel/kvmclock.c
|
||||
F: arch/*/include/asm/kvm*
|
||||
F: include/linux/kvm*
|
||||
F: include/trace/events/kvm.h
|
||||
F: include/uapi/asm-generic/kvm*
|
||||
F: include/uapi/linux/kvm*
|
||||
F: virt/kvm/
|
||||
F: include/asm-generic/kvm*
|
||||
F: include/linux/kvm*
|
||||
F: include/kvm/iodev.h
|
||||
F: virt/kvm/*
|
||||
F: tools/kvm/
|
||||
|
||||
KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V
|
||||
KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
|
||||
M: Paolo Bonzini <pbonzini@redhat.com>
|
||||
M: Radim KrÄmář <rkrcmar@redhat.com>
|
||||
L: kvm@vger.kernel.org
|
||||
W: http://www.linux-kvm.org
|
||||
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
|
||||
S: Supported
|
||||
F: arch/x86/kvm/
|
||||
F: arch/x86/include/uapi/asm/kvm*
|
||||
F: arch/x86/include/asm/kvm*
|
||||
F: arch/x86/kernel/kvm.c
|
||||
F: arch/x86/kernel/kvmclock.c
|
||||
|
||||
KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd)
|
||||
M: Joerg Roedel <joro@8bytes.org>
|
||||
L: kvm@vger.kernel.org
|
||||
W: http://www.linux-kvm.org/
|
||||
@ -7483,7 +7495,7 @@ S: Maintained
|
||||
F: arch/x86/include/asm/svm.h
|
||||
F: arch/x86/kvm/svm.c
|
||||
|
||||
KERNEL VIRTUAL MACHINE (KVM) FOR ARM
|
||||
KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm)
|
||||
M: Christoffer Dall <christoffer.dall@linaro.org>
|
||||
M: Marc Zyngier <marc.zyngier@arm.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
@ -7497,14 +7509,16 @@ F: arch/arm/kvm/
|
||||
F: virt/kvm/arm/
|
||||
F: include/kvm/arm_*
|
||||
|
||||
KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
|
||||
KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc)
|
||||
M: Alexander Graf <agraf@suse.com>
|
||||
L: kvm-ppc@vger.kernel.org
|
||||
W: http://www.linux-kvm.org/
|
||||
T: git git://github.com/agraf/linux-2.6.git
|
||||
S: Supported
|
||||
F: arch/powerpc/include/uapi/asm/kvm*
|
||||
F: arch/powerpc/include/asm/kvm*
|
||||
F: arch/powerpc/kvm/
|
||||
F: arch/powerpc/kernel/kvm*
|
||||
|
||||
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
|
||||
M: Christoffer Dall <christoffer.dall@linaro.org>
|
||||
@ -7531,7 +7545,8 @@ L: linux-s390@vger.kernel.org
|
||||
W: http://www.ibm.com/developerworks/linux/linux390/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
|
||||
S: Supported
|
||||
F: Documentation/s390/kvm.txt
|
||||
F: arch/s390/include/uapi/asm/kvm*
|
||||
F: arch/s390/include/asm/gmap.h
|
||||
F: arch/s390/include/asm/kvm*
|
||||
F: arch/s390/kvm/
|
||||
F: arch/s390/mm/gmap.c
|
||||
|
@ -227,7 +227,6 @@
|
||||
|
||||
#define HSR_DABT_S1PTW (_AC(1, UL) << 7)
|
||||
#define HSR_DABT_CM (_AC(1, UL) << 8)
|
||||
#define HSR_DABT_EA (_AC(1, UL) << 9)
|
||||
|
||||
#define kvm_arm_exception_type \
|
||||
{0, "RESET" }, \
|
||||
|
@ -149,11 +149,6 @@ static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu)
|
||||
return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_EA;
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW;
|
||||
@ -206,6 +201,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
|
||||
return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
|
||||
case FSC_SEA:
|
||||
case FSC_SEA_TTW0:
|
||||
case FSC_SEA_TTW1:
|
||||
case FSC_SEA_TTW2:
|
||||
case FSC_SEA_TTW3:
|
||||
case FSC_SECC:
|
||||
case FSC_SECC_TTW0:
|
||||
case FSC_SECC_TTW1:
|
||||
case FSC_SECC_TTW2:
|
||||
case FSC_SECC_TTW3:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
|
||||
|
@ -67,7 +67,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
|
||||
trace_kvm_wfx(*vcpu_pc(vcpu), true);
|
||||
vcpu->stat.wfe_exit_stat++;
|
||||
kvm_vcpu_on_spin(vcpu);
|
||||
kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
|
||||
} else {
|
||||
trace_kvm_wfx(*vcpu_pc(vcpu), false);
|
||||
vcpu->stat.wfi_exit_stat++;
|
||||
|
@ -188,11 +188,6 @@ static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
|
||||
return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_EA);
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
|
||||
@ -240,6 +235,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
|
||||
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
|
||||
case FSC_SEA:
|
||||
case FSC_SEA_TTW0:
|
||||
case FSC_SEA_TTW1:
|
||||
case FSC_SEA_TTW2:
|
||||
case FSC_SEA_TTW3:
|
||||
case FSC_SECC:
|
||||
case FSC_SECC_TTW0:
|
||||
case FSC_SECC_TTW1:
|
||||
case FSC_SECC_TTW2:
|
||||
case FSC_SECC_TTW3:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 esr = kvm_vcpu_get_hsr(vcpu);
|
||||
|
@ -84,7 +84,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
|
||||
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
|
||||
vcpu->stat.wfe_exit_stat++;
|
||||
kvm_vcpu_on_spin(vcpu);
|
||||
kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
|
||||
} else {
|
||||
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
|
||||
vcpu->stat.wfi_exit_stat++;
|
||||
|
@ -208,29 +208,12 @@ static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu,
|
||||
static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r, u8 apr)
|
||||
{
|
||||
struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
|
||||
u8 idx = r->Op2 & 3;
|
||||
|
||||
/*
|
||||
* num_pri_bits are initialized with HW supported values.
|
||||
* We can rely safely on num_pri_bits even if VM has not
|
||||
* restored ICC_CTLR_EL1 before restoring APnR registers.
|
||||
*/
|
||||
switch (vgic_v3_cpu->num_pri_bits) {
|
||||
case 7:
|
||||
vgic_v3_access_apr_reg(vcpu, p, apr, idx);
|
||||
break;
|
||||
case 6:
|
||||
if (idx > 1)
|
||||
goto err;
|
||||
vgic_v3_access_apr_reg(vcpu, p, apr, idx);
|
||||
break;
|
||||
default:
|
||||
if (idx > 0)
|
||||
goto err;
|
||||
vgic_v3_access_apr_reg(vcpu, p, apr, idx);
|
||||
}
|
||||
if (idx > vgic_v3_max_apr_idx(vcpu))
|
||||
goto err;
|
||||
|
||||
vgic_v3_access_apr_reg(vcpu, p, apr, idx);
|
||||
return true;
|
||||
err:
|
||||
if (!p->is_write)
|
||||
|
@ -98,6 +98,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
return !!(vcpu->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 1;
|
||||
|
@ -104,6 +104,7 @@
|
||||
#define HPTE_R_C ASM_CONST(0x0000000000000080)
|
||||
#define HPTE_R_R ASM_CONST(0x0000000000000100)
|
||||
#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00)
|
||||
#define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI)
|
||||
|
||||
#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000)
|
||||
#define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000)
|
||||
|
@ -1941,6 +1941,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
|
||||
rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
|
||||
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
|
||||
if (ret < 0) {
|
||||
kfree(ctx);
|
||||
kvm_put_kvm(kvm);
|
||||
return ret;
|
||||
}
|
||||
|
@ -265,8 +265,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct kvmppc_spapr_tce_table *stt = filp->private_data;
|
||||
struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
|
||||
struct kvm *kvm = stt->kvm;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
list_del_rcu(&stt->list);
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
|
||||
WARN_ON(!kref_read(&stit->kref));
|
||||
@ -298,7 +301,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
unsigned long npages, size;
|
||||
int ret = -ENOMEM;
|
||||
int i;
|
||||
int fd = -1;
|
||||
|
||||
if (!args->size)
|
||||
return -EINVAL;
|
||||
@ -328,11 +330,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
|
||||
stt, O_RDWR | O_CLOEXEC);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/* Check this LIOBN hasn't been previously allocated */
|
||||
@ -344,17 +341,19 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
if (!ret)
|
||||
ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
|
||||
stt, O_RDWR | O_CLOEXEC);
|
||||
|
||||
if (ret >= 0) {
|
||||
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
|
||||
kvm_get_kvm(kvm);
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
if (!ret)
|
||||
return fd;
|
||||
|
||||
put_unused_fd(fd);
|
||||
if (ret >= 0)
|
||||
return ret;
|
||||
|
||||
fail:
|
||||
for (i = 0; i < npages; i++)
|
||||
|
@ -485,7 +485,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
|
||||
|
||||
switch (subfunc) {
|
||||
case H_VPA_REG_VPA: /* register VPA */
|
||||
if (len < sizeof(struct lppaca))
|
||||
/*
|
||||
* The size of our lppaca is 1kB because of the way we align
|
||||
* it for the guest to avoid crossing a 4kB boundary. We only
|
||||
* use 640 bytes of the structure though, so we should accept
|
||||
* clients that set a size of 640.
|
||||
*/
|
||||
if (len < 640)
|
||||
break;
|
||||
vpap = &tvcpu->arch.vpa;
|
||||
err = 0;
|
||||
@ -3336,6 +3342,14 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
|
||||
if (radix_enabled())
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* POWER7, POWER8 and POWER9 all support 32 storage keys for data.
|
||||
* POWER7 doesn't support keys for instruction accesses,
|
||||
* POWER8 and POWER9 do.
|
||||
*/
|
||||
info->data_keys = 32;
|
||||
info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
|
||||
|
||||
info->flags = KVM_PPC_PAGE_SIZES_REAL;
|
||||
if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
|
||||
info->flags |= KVM_PPC_1T_SEGMENTS;
|
||||
|
@ -269,7 +269,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
|
||||
if (!realmode)
|
||||
local_irq_restore(irq_flags);
|
||||
|
||||
ptel &= ~(HPTE_R_PP0 - psize);
|
||||
ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
|
||||
ptel |= pa;
|
||||
|
||||
if (pa)
|
||||
|
@ -982,7 +982,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
/* We are entering the guest on that thread, push VCPU to XIVE */
|
||||
ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
|
||||
cmpldi cr0, r10, r0
|
||||
cmpldi cr0, r10, 0
|
||||
beq no_xive
|
||||
ld r11, VCPU_XIVE_SAVED_STATE(r4)
|
||||
li r9, TM_QW1_OS
|
||||
@ -1286,7 +1286,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
|
||||
bne 2f
|
||||
mfspr r3,SPRN_HDEC
|
||||
cmpwi r3,0
|
||||
EXTEND_HDEC(r3)
|
||||
cmpdi r3,0
|
||||
mr r4,r9
|
||||
bge fast_guest_return
|
||||
2:
|
||||
|
@ -455,16 +455,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
|
||||
if (err)
|
||||
goto free_vcpu;
|
||||
|
||||
if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
|
||||
if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto uninit_vcpu;
|
||||
}
|
||||
|
||||
err = kvmppc_e500_tlb_init(vcpu_e500);
|
||||
if (err)
|
||||
goto uninit_id;
|
||||
|
||||
vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
|
||||
if (!vcpu->arch.shared)
|
||||
if (!vcpu->arch.shared) {
|
||||
err = -ENOMEM;
|
||||
goto uninit_tlb;
|
||||
}
|
||||
|
||||
return vcpu;
|
||||
|
||||
|
@ -331,8 +331,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
|
||||
goto uninit_vcpu;
|
||||
|
||||
vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!vcpu->arch.shared)
|
||||
if (!vcpu->arch.shared) {
|
||||
err = -ENOMEM;
|
||||
goto uninit_tlb;
|
||||
}
|
||||
|
||||
return vcpu;
|
||||
|
||||
|
@ -58,6 +58,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
|
||||
}
|
||||
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 1;
|
||||
|
@ -226,7 +226,9 @@ struct kvm_s390_sie_block {
|
||||
#define ECB3_RI 0x01
|
||||
__u8 ecb3; /* 0x0063 */
|
||||
__u32 scaol; /* 0x0064 */
|
||||
__u8 reserved68[4]; /* 0x0068 */
|
||||
__u8 reserved68; /* 0x0068 */
|
||||
__u8 epdx; /* 0x0069 */
|
||||
__u8 reserved6a[2]; /* 0x006a */
|
||||
__u32 todpr; /* 0x006c */
|
||||
__u8 reserved70[16]; /* 0x0070 */
|
||||
__u64 mso; /* 0x0080 */
|
||||
@ -265,6 +267,7 @@ struct kvm_s390_sie_block {
|
||||
__u64 cbrlo; /* 0x01b8 */
|
||||
__u8 reserved1c0[8]; /* 0x01c0 */
|
||||
#define ECD_HOSTREGMGMT 0x20000000
|
||||
#define ECD_MEF 0x08000000
|
||||
__u32 ecd; /* 0x01c8 */
|
||||
__u8 reserved1cc[18]; /* 0x01cc */
|
||||
__u64 pp; /* 0x01de */
|
||||
@ -739,6 +742,7 @@ struct kvm_arch{
|
||||
struct kvm_s390_cpu_model model;
|
||||
struct kvm_s390_crypto crypto;
|
||||
struct kvm_s390_vsie vsie;
|
||||
u8 epdx;
|
||||
u64 epoch;
|
||||
struct kvm_s390_migration_state *migration_state;
|
||||
/* subset of available cpu features enabled by user space */
|
||||
|
@ -15,6 +15,6 @@
|
||||
#define ESSA_SET_STABLE_IF_RESIDENT 6
|
||||
#define ESSA_SET_STABLE_NODAT 7
|
||||
|
||||
#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT
|
||||
#define ESSA_MAX ESSA_SET_STABLE_NODAT
|
||||
|
||||
#endif
|
||||
|
@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
|
||||
/* kvm attributes for KVM_S390_VM_TOD */
|
||||
#define KVM_S390_VM_TOD_LOW 0
|
||||
#define KVM_S390_VM_TOD_HIGH 1
|
||||
#define KVM_S390_VM_TOD_EXT 2
|
||||
|
||||
struct kvm_s390_vm_tod_clock {
|
||||
__u8 epoch_idx;
|
||||
__u64 tod;
|
||||
};
|
||||
|
||||
/* kvm attributes for KVM_S390_VM_CPU_MODEL */
|
||||
/* processor related attributes are r/w */
|
||||
|
@ -150,7 +150,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
|
||||
vcpu->stat.diagnose_44++;
|
||||
kvm_vcpu_on_spin(vcpu);
|
||||
kvm_vcpu_on_spin(vcpu, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -308,7 +308,7 @@ static inline int in_addr_range(u64 addr, u64 a, u64 b)
|
||||
return (addr >= a) && (addr <= b);
|
||||
else
|
||||
/* "overflowing" interval */
|
||||
return (addr <= a) && (addr >= b);
|
||||
return (addr >= a) || (addr <= b);
|
||||
}
|
||||
|
||||
#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
|
||||
|
@ -2479,6 +2479,7 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_mchk_info *mchk;
|
||||
union mci mci;
|
||||
__u64 cr14 = 0; /* upper bits are not used */
|
||||
int rc;
|
||||
|
||||
mci.val = mcck_info->mcic;
|
||||
if (mci.sr)
|
||||
@ -2496,12 +2497,13 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
|
||||
if (mci.ck) {
|
||||
/* Inject the floating machine check */
|
||||
inti.type = KVM_S390_MCHK;
|
||||
WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti));
|
||||
rc = __inject_vm(vcpu->kvm, &inti);
|
||||
} else {
|
||||
/* Inject the machine check to specified vcpu */
|
||||
irq.type = KVM_S390_MCHK;
|
||||
WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
|
||||
rc = kvm_s390_inject_vcpu(vcpu, &irq);
|
||||
}
|
||||
WARN_ON_ONCE(rc);
|
||||
}
|
||||
|
||||
int kvm_set_routing_entry(struct kvm *kvm,
|
||||
|
@ -130,6 +130,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
struct kvm_s390_tod_clock_ext {
|
||||
__u8 epoch_idx;
|
||||
__u64 tod;
|
||||
__u8 reserved[7];
|
||||
} __packed;
|
||||
|
||||
/* allow nested virtualization in KVM (if enabled by user space) */
|
||||
static int nested;
|
||||
module_param(nested, int, S_IRUGO);
|
||||
@ -874,6 +880,26 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_s390_vm_tod_clock gtod;
|
||||
|
||||
if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
|
||||
return -EFAULT;
|
||||
|
||||
if (test_kvm_facility(kvm, 139))
|
||||
kvm_s390_set_tod_clock_ext(kvm, >od);
|
||||
else if (gtod.epoch_idx == 0)
|
||||
kvm_s390_set_tod_clock(kvm, gtod.tod);
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
|
||||
gtod.epoch_idx, gtod.tod);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
u8 gtod_high;
|
||||
@ -909,6 +935,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return -EINVAL;
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_TOD_EXT:
|
||||
ret = kvm_s390_set_tod_ext(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_TOD_HIGH:
|
||||
ret = kvm_s390_set_tod_high(kvm, attr);
|
||||
break;
|
||||
@ -922,6 +951,43 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
|
||||
struct kvm_s390_vm_tod_clock *gtod)
|
||||
{
|
||||
struct kvm_s390_tod_clock_ext htod;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
get_tod_clock_ext((char *)&htod);
|
||||
|
||||
gtod->tod = htod.tod + kvm->arch.epoch;
|
||||
gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
|
||||
|
||||
if (gtod->tod < htod.tod)
|
||||
gtod->epoch_idx += 1;
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
struct kvm_s390_vm_tod_clock gtod;
|
||||
|
||||
memset(>od, 0, sizeof(gtod));
|
||||
|
||||
if (test_kvm_facility(kvm, 139))
|
||||
kvm_s390_get_tod_clock_ext(kvm, >od);
|
||||
else
|
||||
gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
|
||||
|
||||
if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
|
||||
return -EFAULT;
|
||||
|
||||
VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
|
||||
gtod.epoch_idx, gtod.tod);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
u8 gtod_high = 0;
|
||||
@ -954,6 +1020,9 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return -EINVAL;
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_TOD_EXT:
|
||||
ret = kvm_s390_get_tod_ext(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_TOD_HIGH:
|
||||
ret = kvm_s390_get_tod_high(kvm, attr);
|
||||
break;
|
||||
@ -1505,7 +1574,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
|
||||
if (r < 0)
|
||||
pgstev = 0;
|
||||
/* save the value */
|
||||
res[i++] = (pgstev >> 24) & 0x3;
|
||||
res[i++] = (pgstev >> 24) & 0x43;
|
||||
/*
|
||||
* if the next bit is too far away, stop.
|
||||
* if we reached the previous "next", find the next one
|
||||
@ -1583,7 +1652,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
|
||||
|
||||
pgstev = bits[i];
|
||||
pgstev = pgstev << 24;
|
||||
mask &= _PGSTE_GPS_USAGE_MASK;
|
||||
mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
|
||||
set_pgste_bits(kvm->mm, hva, mask, pgstev);
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
@ -1858,8 +1927,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
|
||||
S390_ARCH_FAC_LIST_SIZE_BYTE);
|
||||
|
||||
/* we are always in czam mode - even on pre z14 machines */
|
||||
set_kvm_facility(kvm->arch.model.fac_mask, 138);
|
||||
set_kvm_facility(kvm->arch.model.fac_list, 138);
|
||||
/* we emulate STHYI in kvm */
|
||||
set_kvm_facility(kvm->arch.model.fac_mask, 74);
|
||||
set_kvm_facility(kvm->arch.model.fac_list, 74);
|
||||
if (MACHINE_HAS_TLB_GUEST) {
|
||||
set_kvm_facility(kvm->arch.model.fac_mask, 147);
|
||||
set_kvm_facility(kvm->arch.model.fac_list, 147);
|
||||
}
|
||||
|
||||
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
|
||||
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
|
||||
@ -2369,6 +2446,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.sie_block->eca |= ECA_VX;
|
||||
vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
|
||||
}
|
||||
if (test_kvm_facility(vcpu->kvm, 139))
|
||||
vcpu->arch.sie_block->ecd |= ECD_MEF;
|
||||
|
||||
vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
|
||||
| SDNXC;
|
||||
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
|
||||
@ -2447,6 +2527,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
return kvm_s390_vcpu_has_irq(vcpu, 0);
|
||||
}
|
||||
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
|
||||
}
|
||||
|
||||
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
|
||||
@ -2855,6 +2940,35 @@ retry:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
|
||||
const struct kvm_s390_vm_tod_clock *gtod)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_s390_tod_clock_ext htod;
|
||||
int i;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
preempt_disable();
|
||||
|
||||
get_tod_clock_ext((char *)&htod);
|
||||
|
||||
kvm->arch.epoch = gtod->tod - htod.tod;
|
||||
kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
|
||||
|
||||
if (kvm->arch.epoch > gtod->tod)
|
||||
kvm->arch.epdx -= 1;
|
||||
|
||||
kvm_s390_vcpu_block_all(kvm);
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
vcpu->arch.sie_block->epoch = kvm->arch.epoch;
|
||||
vcpu->arch.sie_block->epdx = kvm->arch.epdx;
|
||||
}
|
||||
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
preempt_enable();
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
@ -272,6 +272,8 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
|
||||
int handle_sthyi(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* implemented in kvm-s390.c */
|
||||
void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
|
||||
const struct kvm_s390_vm_tod_clock *gtod);
|
||||
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
|
||||
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
|
||||
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
|
||||
|
@ -988,6 +988,8 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
|
||||
if (pgstev & _PGSTE_GPS_ZERO)
|
||||
res |= 1;
|
||||
}
|
||||
if (pgstev & _PGSTE_GPS_NODAT)
|
||||
res |= 0x20;
|
||||
vcpu->run->s.regs.gprs[r1] = res;
|
||||
/*
|
||||
* It is possible that all the normal 511 slots were full, in which case
|
||||
@ -1027,7 +1029,9 @@ static int handle_essa(struct kvm_vcpu *vcpu)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
/* Check for invalid operation request code */
|
||||
orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
|
||||
if (orc > ESSA_MAX)
|
||||
/* ORCs 0-6 are always valid */
|
||||
if (orc > (test_kvm_facility(vcpu->kvm, 147) ? ESSA_SET_STABLE_NODAT
|
||||
: ESSA_SET_STABLE_IF_RESIDENT))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
if (likely(!vcpu->kvm->arch.migration_state)) {
|
||||
|
@ -155,29 +155,26 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
|
||||
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter,
|
||||
u64 *status_reg)
|
||||
{
|
||||
int rc;
|
||||
unsigned int i;
|
||||
struct kvm_vcpu *v;
|
||||
bool all_stopped = true;
|
||||
|
||||
switch (parameter & 0xff) {
|
||||
case 0:
|
||||
rc = SIGP_CC_NOT_OPERATIONAL;
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
kvm_for_each_vcpu(i, v, vcpu->kvm) {
|
||||
v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
|
||||
kvm_clear_async_pf_completion_queue(v);
|
||||
}
|
||||
|
||||
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
|
||||
break;
|
||||
default:
|
||||
rc = -EOPNOTSUPP;
|
||||
kvm_for_each_vcpu(i, v, vcpu->kvm) {
|
||||
if (v == vcpu)
|
||||
continue;
|
||||
if (!is_vcpu_stopped(v))
|
||||
all_stopped = false;
|
||||
}
|
||||
return rc;
|
||||
|
||||
*status_reg &= 0xffffffff00000000UL;
|
||||
|
||||
/* Reject set arch order, with czam we're always in z/Arch mode. */
|
||||
*status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER :
|
||||
SIGP_STATUS_INCORRECT_STATE);
|
||||
return SIGP_CC_STATUS_STORED;
|
||||
}
|
||||
|
||||
static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
|
||||
@ -446,7 +443,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
||||
switch (order_code) {
|
||||
case SIGP_SET_ARCHITECTURE:
|
||||
vcpu->stat.instruction_sigp_arch++;
|
||||
rc = __sigp_set_arch(vcpu, parameter);
|
||||
rc = __sigp_set_arch(vcpu, parameter,
|
||||
&vcpu->run->s.regs.gprs[r1]);
|
||||
break;
|
||||
default:
|
||||
rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
|
||||
|
@ -436,14 +436,6 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
|
||||
if (addr & ~PAGE_MASK)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
/*
|
||||
* If the page has not yet been faulted in, we want to do that
|
||||
* now and not after all the expensive calculations.
|
||||
*/
|
||||
r = write_guest(vcpu, addr, reg2, &cc, 1);
|
||||
if (r)
|
||||
return kvm_s390_inject_prog_cond(vcpu, r);
|
||||
|
||||
sctns = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!sctns)
|
||||
return -ENOMEM;
|
||||
|
@ -349,6 +349,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
scb_s->eca |= scb_o->eca & ECA_IB;
|
||||
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
|
||||
scb_s->eca |= scb_o->eca & ECA_CEI;
|
||||
/* Epoch Extension */
|
||||
if (test_kvm_facility(vcpu->kvm, 139))
|
||||
scb_s->ecd |= scb_o->ecd & ECD_MEF;
|
||||
|
||||
prepare_ibc(vcpu, vsie_page);
|
||||
rc = shadow_crycb(vcpu, vsie_page);
|
||||
@ -806,8 +809,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
{
|
||||
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
|
||||
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
|
||||
struct mcck_volatile_info *mcck_info;
|
||||
struct sie_page *sie_page;
|
||||
int rc;
|
||||
|
||||
handle_last_fault(vcpu, vsie_page);
|
||||
@ -831,9 +832,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
|
||||
if (rc == -EINTR) {
|
||||
VCPU_EVENT(vcpu, 3, "%s", "machine check");
|
||||
sie_page = container_of(scb_s, struct sie_page, sie_block);
|
||||
mcck_info = &sie_page->mcck_info;
|
||||
kvm_s390_reinject_machine_check(vcpu, mcck_info);
|
||||
kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -919,6 +918,13 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
|
||||
*/
|
||||
preempt_disable();
|
||||
scb_s->epoch += vcpu->kvm->arch.epoch;
|
||||
|
||||
if (scb_s->ecd & ECD_MEF) {
|
||||
scb_s->epdx += vcpu->kvm->arch.epdx;
|
||||
if (scb_s->epoch < vcpu->kvm->arch.epoch)
|
||||
scb_s->epdx += 1;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
|
@ -919,7 +919,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
|
||||
case ESSA_GET_STATE:
|
||||
break;
|
||||
case ESSA_SET_STABLE:
|
||||
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
|
||||
pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
|
||||
pgstev |= _PGSTE_GPS_USAGE_STABLE;
|
||||
break;
|
||||
case ESSA_SET_UNUSED:
|
||||
@ -965,6 +965,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
|
||||
pgstev |= _PGSTE_GPS_USAGE_STABLE;
|
||||
}
|
||||
break;
|
||||
case ESSA_SET_STABLE_NODAT:
|
||||
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
|
||||
pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
|
||||
break;
|
||||
default:
|
||||
/* we should never get here! */
|
||||
break;
|
||||
|
@ -83,6 +83,7 @@ static struct facility_def facility_defs[] = {
|
||||
78, /* enhanced-DAT 2 */
|
||||
130, /* instruction-execution-protection */
|
||||
131, /* enhanced-SOP 2 and side-effect */
|
||||
139, /* multiple epoch facility */
|
||||
146, /* msa extension 8 */
|
||||
-1 /* END */
|
||||
}
|
||||
|
@ -288,6 +288,7 @@
|
||||
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
|
||||
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
|
||||
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
|
||||
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
|
||||
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
|
||||
|
@ -219,8 +219,8 @@ struct x86_emulate_ops {
|
||||
struct x86_instruction_info *info,
|
||||
enum x86_intercept_stage stage);
|
||||
|
||||
void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
|
||||
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
|
||||
bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
|
||||
u32 *ecx, u32 *edx, bool check_limit);
|
||||
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
|
||||
|
||||
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
|
||||
|
@ -79,15 +79,14 @@
|
||||
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
|
||||
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
|
||||
|
||||
#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
|
||||
#define CR3_PCID_INVD BIT_64(63)
|
||||
#define CR4_RESERVED_BITS \
|
||||
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
|
||||
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
|
||||
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
|
||||
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
|
||||
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \
|
||||
| X86_CR4_PKE))
|
||||
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
|
||||
| X86_CR4_SMAP | X86_CR4_PKE))
|
||||
|
||||
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
|
||||
|
||||
@ -204,7 +203,6 @@ enum {
|
||||
#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
|
||||
|
||||
#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
|
||||
PFERR_USER_MASK | \
|
||||
PFERR_WRITE_MASK | \
|
||||
PFERR_PRESENT_MASK)
|
||||
|
||||
@ -317,15 +315,17 @@ struct kvm_pio_request {
|
||||
int size;
|
||||
};
|
||||
|
||||
#define PT64_ROOT_MAX_LEVEL 5
|
||||
|
||||
struct rsvd_bits_validate {
|
||||
u64 rsvd_bits_mask[2][4];
|
||||
u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL];
|
||||
u64 bad_mt_xwr;
|
||||
};
|
||||
|
||||
/*
|
||||
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
|
||||
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
|
||||
* mode.
|
||||
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
|
||||
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
|
||||
* current mmu mode.
|
||||
*/
|
||||
struct kvm_mmu {
|
||||
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
|
||||
@ -548,8 +548,8 @@ struct kvm_vcpu_arch {
|
||||
|
||||
struct kvm_queued_exception {
|
||||
bool pending;
|
||||
bool injected;
|
||||
bool has_error_code;
|
||||
bool reinject;
|
||||
u8 nr;
|
||||
u32 error_code;
|
||||
u8 nested_apf;
|
||||
@ -687,8 +687,12 @@ struct kvm_vcpu_arch {
|
||||
int pending_ioapic_eoi;
|
||||
int pending_external_vector;
|
||||
|
||||
/* GPA available (AMD only) */
|
||||
/* GPA available */
|
||||
bool gpa_available;
|
||||
gpa_t gpa_val;
|
||||
|
||||
/* be preempted when it's in kernel-mode(cpl=0) */
|
||||
bool preempted_in_kernel;
|
||||
};
|
||||
|
||||
struct kvm_lpage_info {
|
||||
@ -979,7 +983,7 @@ struct kvm_x86_ops {
|
||||
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
|
||||
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
|
||||
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
|
||||
int (*get_tdp_level)(void);
|
||||
int (*get_tdp_level)(struct kvm_vcpu *vcpu);
|
||||
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
|
||||
int (*get_lpage_level)(void);
|
||||
bool (*rdtscp_supported)(void);
|
||||
@ -1297,20 +1301,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
|
||||
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
||||
}
|
||||
|
||||
static inline u64 get_canonical(u64 la)
|
||||
{
|
||||
return ((int64_t)la << 16) >> 16;
|
||||
}
|
||||
|
||||
static inline bool is_noncanonical_address(u64 la)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return get_canonical(la) != la;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define TSS_IOPB_BASE_OFFSET 0x66
|
||||
#define TSS_BASE_SIZE 0x68
|
||||
#define TSS_IOPB_SIZE (65536 / 8)
|
||||
|
@ -107,6 +107,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
||||
#define V_IRQ_SHIFT 8
|
||||
#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
|
||||
|
||||
#define V_GIF_SHIFT 9
|
||||
#define V_GIF_MASK (1 << V_GIF_SHIFT)
|
||||
|
||||
#define V_INTR_PRIO_SHIFT 16
|
||||
#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
|
||||
|
||||
@ -116,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
||||
#define V_INTR_MASKING_SHIFT 24
|
||||
#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
|
||||
|
||||
#define V_GIF_ENABLE_SHIFT 25
|
||||
#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
|
||||
|
||||
#define AVIC_ENABLE_SHIFT 31
|
||||
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
|
||||
|
||||
|
@ -72,6 +72,7 @@
|
||||
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
|
||||
#define SECONDARY_EXEC_RDRAND 0x00000800
|
||||
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
|
||||
#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
|
||||
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
|
||||
#define SECONDARY_EXEC_RDSEED 0x00010000
|
||||
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
|
||||
@ -114,6 +115,10 @@
|
||||
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
|
||||
#define VMX_MISC_ACTIVITY_HLT 0x00000040
|
||||
|
||||
/* VMFUNC functions */
|
||||
#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
|
||||
#define VMFUNC_EPTP_ENTRIES 512
|
||||
|
||||
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
|
||||
{
|
||||
return vmx_basic & GENMASK_ULL(30, 0);
|
||||
@ -187,6 +192,8 @@ enum vmcs_field {
|
||||
APIC_ACCESS_ADDR_HIGH = 0x00002015,
|
||||
POSTED_INTR_DESC_ADDR = 0x00002016,
|
||||
POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
|
||||
VM_FUNCTION_CONTROL = 0x00002018,
|
||||
VM_FUNCTION_CONTROL_HIGH = 0x00002019,
|
||||
EPT_POINTER = 0x0000201a,
|
||||
EPT_POINTER_HIGH = 0x0000201b,
|
||||
EOI_EXIT_BITMAP0 = 0x0000201c,
|
||||
@ -197,6 +204,8 @@ enum vmcs_field {
|
||||
EOI_EXIT_BITMAP2_HIGH = 0x00002021,
|
||||
EOI_EXIT_BITMAP3 = 0x00002022,
|
||||
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
|
||||
EPTP_LIST_ADDRESS = 0x00002024,
|
||||
EPTP_LIST_ADDRESS_HIGH = 0x00002025,
|
||||
VMREAD_BITMAP = 0x00002026,
|
||||
VMWRITE_BITMAP = 0x00002028,
|
||||
XSS_EXIT_BITMAP = 0x0000202C,
|
||||
@ -444,6 +453,7 @@ enum vmcs_field {
|
||||
|
||||
#define VMX_EPT_EXECUTE_ONLY_BIT (1ull)
|
||||
#define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6)
|
||||
#define VMX_EPT_PAGE_WALK_5_BIT (1ull << 7)
|
||||
#define VMX_EPTP_UC_BIT (1ull << 8)
|
||||
#define VMX_EPTP_WB_BIT (1ull << 14)
|
||||
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
|
||||
@ -459,12 +469,14 @@ enum vmcs_field {
|
||||
#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */
|
||||
#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */
|
||||
|
||||
#define VMX_EPT_DEFAULT_GAW 3
|
||||
#define VMX_EPT_MAX_GAW 0x4
|
||||
#define VMX_EPT_MT_EPTE_SHIFT 3
|
||||
#define VMX_EPT_GAW_EPTP_SHIFT 3
|
||||
#define VMX_EPT_AD_ENABLE_BIT (1ull << 6)
|
||||
#define VMX_EPT_DEFAULT_MT 0x6ull
|
||||
#define VMX_EPTP_PWL_MASK 0x38ull
|
||||
#define VMX_EPTP_PWL_4 0x18ull
|
||||
#define VMX_EPTP_PWL_5 0x20ull
|
||||
#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6)
|
||||
#define VMX_EPTP_MT_MASK 0x7ull
|
||||
#define VMX_EPTP_MT_WB 0x6ull
|
||||
#define VMX_EPTP_MT_UC 0x0ull
|
||||
#define VMX_EPT_READABLE_MASK 0x1ull
|
||||
#define VMX_EPT_WRITABLE_MASK 0x2ull
|
||||
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
|
||||
|
@ -126,16 +126,20 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
|
||||
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
|
||||
|
||||
/*
|
||||
* The existing code assumes virtual address is 48-bit in the canonical
|
||||
* address checks; exit if it is ever changed.
|
||||
* The existing code assumes virtual address is 48-bit or 57-bit in the
|
||||
* canonical address checks; exit if it is ever changed.
|
||||
*/
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
|
||||
if (best && ((best->eax & 0xff00) >> 8) != 48 &&
|
||||
((best->eax & 0xff00) >> 8) != 0)
|
||||
return -EINVAL;
|
||||
if (best) {
|
||||
int vaddr_bits = (best->eax & 0xff00) >> 8;
|
||||
|
||||
if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Update physical-address width */
|
||||
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
|
||||
kvm_pmu_refresh(vcpu);
|
||||
return 0;
|
||||
@ -383,7 +387,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
|
||||
/* cpuid 7.0.ecx*/
|
||||
const u32 kvm_cpuid_7_0_ecx_x86_features =
|
||||
F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
|
||||
F(AVX512VBMI) | F(LA57) | F(PKU) |
|
||||
0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
|
||||
|
||||
/* cpuid 7.0.edx*/
|
||||
const u32 kvm_cpuid_7_0_edx_x86_features =
|
||||
@ -853,16 +858,24 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
|
||||
return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
|
||||
}
|
||||
|
||||
void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
|
||||
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
|
||||
u32 *ecx, u32 *edx, bool check_limit)
|
||||
{
|
||||
u32 function = *eax, index = *ecx;
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
bool entry_found = true;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, function, index);
|
||||
|
||||
if (!best)
|
||||
best = check_cpuid_limit(vcpu, function, index);
|
||||
if (!best) {
|
||||
entry_found = false;
|
||||
if (!check_limit)
|
||||
goto out;
|
||||
|
||||
best = check_cpuid_limit(vcpu, function, index);
|
||||
}
|
||||
|
||||
out:
|
||||
if (best) {
|
||||
*eax = best->eax;
|
||||
*ebx = best->ebx;
|
||||
@ -870,7 +883,8 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
|
||||
*edx = best->edx;
|
||||
} else
|
||||
*eax = *ebx = *ecx = *edx = 0;
|
||||
trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx);
|
||||
trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found);
|
||||
return entry_found;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_cpuid);
|
||||
|
||||
@ -883,7 +897,7 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
|
||||
|
||||
eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
|
||||
ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
|
||||
kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
|
||||
kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include "x86.h"
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
int kvm_update_cpuid(struct kvm_vcpu *vcpu);
|
||||
bool kvm_mpx_supported(void);
|
||||
@ -20,7 +21,8 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
|
||||
int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
|
||||
struct kvm_cpuid2 *cpuid,
|
||||
struct kvm_cpuid_entry2 __user *entries);
|
||||
void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
|
||||
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
|
||||
u32 *ecx, u32 *edx, bool check_limit);
|
||||
|
||||
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
|
||||
|
||||
@ -29,95 +31,87 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
|
||||
return vcpu->arch.maxphyaddr;
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
struct cpuid_reg {
|
||||
u32 function;
|
||||
u32 index;
|
||||
int reg;
|
||||
};
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_XSAVE))
|
||||
static const struct cpuid_reg reverse_cpuid[] = {
|
||||
[CPUID_1_EDX] = { 1, 0, CPUID_EDX},
|
||||
[CPUID_8000_0001_EDX] = {0x80000001, 0, CPUID_EDX},
|
||||
[CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
|
||||
[CPUID_1_ECX] = { 1, 0, CPUID_ECX},
|
||||
[CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX},
|
||||
[CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX},
|
||||
[CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
|
||||
[CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
|
||||
[CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
|
||||
[CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX},
|
||||
[CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX},
|
||||
[CPUID_6_EAX] = { 6, 0, CPUID_EAX},
|
||||
[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
|
||||
[CPUID_7_ECX] = { 7, 0, CPUID_ECX},
|
||||
[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
|
||||
};
|
||||
|
||||
static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
|
||||
{
|
||||
unsigned x86_leaf = x86_feature / 32;
|
||||
|
||||
BUILD_BUG_ON(!__builtin_constant_p(x86_leaf));
|
||||
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
|
||||
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
|
||||
|
||||
return reverse_cpuid[x86_leaf];
|
||||
}
|
||||
|
||||
static __always_inline int *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsigned x86_feature)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index);
|
||||
if (!entry)
|
||||
return NULL;
|
||||
|
||||
switch (cpuid.reg) {
|
||||
case CPUID_EAX:
|
||||
return &entry->eax;
|
||||
case CPUID_EBX:
|
||||
return &entry->ebx;
|
||||
case CPUID_ECX:
|
||||
return &entry->ecx;
|
||||
case CPUID_EDX:
|
||||
return &entry->edx;
|
||||
default:
|
||||
BUILD_BUG();
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_feature)
|
||||
{
|
||||
int *reg;
|
||||
|
||||
if (x86_feature == X86_FEATURE_XSAVE &&
|
||||
!static_cpu_has(X86_FEATURE_XSAVE))
|
||||
return false;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 1, 0);
|
||||
return best && (best->ecx & bit(X86_FEATURE_XSAVE));
|
||||
reg = guest_cpuid_get_register(vcpu, x86_feature);
|
||||
if (!reg)
|
||||
return false;
|
||||
|
||||
return *reg & bit(x86_feature);
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu)
|
||||
static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
int *reg;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 1, 0);
|
||||
return best && (best->edx & bit(X86_FEATURE_MTRR));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ebx & bit(X86_FEATURE_SMEP));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ebx & bit(X86_FEATURE_SMAP));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ecx & bit(X86_FEATURE_PKU));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
|
||||
return best && (best->edx & bit(X86_FEATURE_LM));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
|
||||
return best && (best->ecx & bit(X86_FEATURE_OSVW));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 1, 0);
|
||||
return best && (best->ecx & bit(X86_FEATURE_PCID));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 1, 0);
|
||||
return best && (best->ecx & bit(X86_FEATURE_X2APIC));
|
||||
reg = guest_cpuid_get_register(vcpu, x86_feature);
|
||||
if (reg)
|
||||
*reg &= ~bit(x86_feature);
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
|
||||
@ -128,58 +122,6 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
|
||||
return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
|
||||
return best && (best->edx & bit(X86_FEATURE_GBPAGES));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ebx & bit(X86_FEATURE_RTM));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ebx & bit(X86_FEATURE_MPX));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
|
||||
return best && (best->edx & bit(X86_FEATURE_RDTSCP));
|
||||
}
|
||||
|
||||
/*
|
||||
* NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
|
||||
*/
|
||||
#define BIT_NRIPS 3
|
||||
|
||||
static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0);
|
||||
|
||||
/*
|
||||
* NRIPS is a scattered cpuid feature, so we can't use
|
||||
* X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit
|
||||
* position 8, not 3).
|
||||
*/
|
||||
return best && (best->edx & bit(BIT_NRIPS));
|
||||
}
|
||||
#undef BIT_NRIPS
|
||||
|
||||
static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
#include "x86.h"
|
||||
#include "tss.h"
|
||||
#include "mmu.h"
|
||||
|
||||
/*
|
||||
* Operand types
|
||||
@ -688,16 +689,18 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
|
||||
ulong la;
|
||||
u32 lim;
|
||||
u16 sel;
|
||||
u8 va_bits;
|
||||
|
||||
la = seg_base(ctxt, addr.seg) + addr.ea;
|
||||
*max_size = 0;
|
||||
switch (mode) {
|
||||
case X86EMUL_MODE_PROT64:
|
||||
*linear = la;
|
||||
if (is_noncanonical_address(la))
|
||||
va_bits = ctxt_virt_addr_bits(ctxt);
|
||||
if (get_canonical(la, va_bits) != la)
|
||||
goto bad;
|
||||
|
||||
*max_size = min_t(u64, ~0u, (1ull << 48) - la);
|
||||
*max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
|
||||
if (size > *max_size)
|
||||
goto bad;
|
||||
break;
|
||||
@ -1748,8 +1751,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
||||
sizeof(base3), &ctxt->exception);
|
||||
if (ret != X86EMUL_CONTINUE)
|
||||
return ret;
|
||||
if (is_noncanonical_address(get_desc_base(&seg_desc) |
|
||||
((u64)base3 << 32)))
|
||||
if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
|
||||
((u64)base3 << 32), ctxt))
|
||||
return emulate_gp(ctxt, 0);
|
||||
}
|
||||
load:
|
||||
@ -2333,7 +2336,7 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
|
||||
|
||||
eax = 0x80000001;
|
||||
ecx = 0;
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
|
||||
return edx & bit(X86_FEATURE_LM);
|
||||
}
|
||||
|
||||
@ -2636,7 +2639,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
eax = ecx = 0;
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
|
||||
return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
|
||||
&& ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
|
||||
&& edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
|
||||
@ -2656,7 +2659,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
|
||||
|
||||
eax = 0x00000000;
|
||||
ecx = 0x00000000;
|
||||
ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
|
||||
ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
|
||||
/*
|
||||
* Intel ("GenuineIntel")
|
||||
* remark: Intel CPUs only support "syscall" in 64bit
|
||||
@ -2840,8 +2843,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
|
||||
ss_sel = cs_sel + 8;
|
||||
cs.d = 0;
|
||||
cs.l = 1;
|
||||
if (is_noncanonical_address(rcx) ||
|
||||
is_noncanonical_address(rdx))
|
||||
if (emul_is_noncanonical_address(rcx, ctxt) ||
|
||||
emul_is_noncanonical_address(rdx, ctxt))
|
||||
return emulate_gp(ctxt, 0);
|
||||
break;
|
||||
}
|
||||
@ -3551,7 +3554,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
|
||||
/*
|
||||
* Check MOVBE is set in the guest-visible CPUID leaf.
|
||||
*/
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
|
||||
if (!(ecx & FFL(MOVBE)))
|
||||
return emulate_ud(ctxt);
|
||||
|
||||
@ -3756,7 +3759,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
|
||||
if (rc != X86EMUL_CONTINUE)
|
||||
return rc;
|
||||
if (ctxt->mode == X86EMUL_MODE_PROT64 &&
|
||||
is_noncanonical_address(desc_ptr.address))
|
||||
emul_is_noncanonical_address(desc_ptr.address, ctxt))
|
||||
return emulate_gp(ctxt, 0);
|
||||
if (lgdt)
|
||||
ctxt->ops->set_gdt(ctxt, &desc_ptr);
|
||||
@ -3865,7 +3868,7 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
|
||||
|
||||
eax = reg_read(ctxt, VCPU_REGS_RAX);
|
||||
ecx = reg_read(ctxt, VCPU_REGS_RCX);
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
|
||||
*reg_write(ctxt, VCPU_REGS_RAX) = eax;
|
||||
*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
|
||||
*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
|
||||
@ -3924,7 +3927,7 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u32 eax = 1, ebx, ecx = 0, edx;
|
||||
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
|
||||
if (!(edx & FFL(FXSR)))
|
||||
return emulate_ud(ctxt);
|
||||
|
||||
@ -4097,8 +4100,17 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
|
||||
u64 rsvd = 0;
|
||||
|
||||
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
|
||||
if (efer & EFER_LMA)
|
||||
rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
|
||||
if (efer & EFER_LMA) {
|
||||
u64 maxphyaddr;
|
||||
u32 eax = 0x80000008;
|
||||
|
||||
if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL,
|
||||
NULL, false))
|
||||
maxphyaddr = eax & 0xff;
|
||||
else
|
||||
maxphyaddr = 36;
|
||||
rsvd = rsvd_bits(maxphyaddr, 62);
|
||||
}
|
||||
|
||||
if (new_val & rsvd)
|
||||
return emulate_gp(ctxt, 0);
|
||||
|
@ -1160,6 +1160,12 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
|
||||
pdata);
|
||||
}
|
||||
case HV_X64_MSR_TSC_FREQUENCY:
|
||||
data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
|
||||
break;
|
||||
case HV_X64_MSR_APIC_FREQUENCY:
|
||||
data = APIC_BUS_FREQUENCY;
|
||||
break;
|
||||
default:
|
||||
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
|
||||
return 1;
|
||||
@ -1268,7 +1274,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
|
||||
|
||||
switch (code) {
|
||||
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
|
||||
kvm_vcpu_on_spin(vcpu);
|
||||
kvm_vcpu_on_spin(vcpu, true);
|
||||
break;
|
||||
case HVCALL_POST_MESSAGE:
|
||||
case HVCALL_SIGNAL_EVENT:
|
||||
|
@ -4,7 +4,7 @@
|
||||
#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
|
||||
#define KVM_POSSIBLE_CR4_GUEST_BITS \
|
||||
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
|
||||
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
|
||||
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)
|
||||
|
||||
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
|
||||
enum kvm_reg reg)
|
||||
|
@ -54,8 +54,6 @@
|
||||
#define PRIu64 "u"
|
||||
#define PRIo64 "o"
|
||||
|
||||
#define APIC_BUS_CYCLE_NS 1
|
||||
|
||||
/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
|
||||
#define apic_debug(fmt, arg...)
|
||||
|
||||
|
@ -12,6 +12,9 @@
|
||||
#define KVM_APIC_SHORT_MASK 0xc0000
|
||||
#define KVM_APIC_DEST_MASK 0x800
|
||||
|
||||
#define APIC_BUS_CYCLE_NS 1
|
||||
#define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS)
|
||||
|
||||
struct kvm_timer {
|
||||
struct hrtimer timer;
|
||||
s64 period; /* unit: ns */
|
||||
|
@ -2169,8 +2169,8 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
}
|
||||
|
||||
struct mmu_page_path {
|
||||
struct kvm_mmu_page *parent[PT64_ROOT_LEVEL];
|
||||
unsigned int idx[PT64_ROOT_LEVEL];
|
||||
struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL];
|
||||
unsigned int idx[PT64_ROOT_MAX_LEVEL];
|
||||
};
|
||||
|
||||
#define for_each_sp(pvec, sp, parents, i) \
|
||||
@ -2385,8 +2385,8 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
|
||||
iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
|
||||
iterator->level = vcpu->arch.mmu.shadow_root_level;
|
||||
|
||||
if (iterator->level == PT64_ROOT_LEVEL &&
|
||||
vcpu->arch.mmu.root_level < PT64_ROOT_LEVEL &&
|
||||
if (iterator->level == PT64_ROOT_4LEVEL &&
|
||||
vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL &&
|
||||
!vcpu->arch.mmu.direct_map)
|
||||
--iterator->level;
|
||||
|
||||
@ -2610,9 +2610,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
|
||||
|
||||
sp = list_last_entry(&kvm->arch.active_mmu_pages,
|
||||
struct kvm_mmu_page, link);
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
|
||||
|
||||
return true;
|
||||
return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3262,7 +3260,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
|
||||
|
||||
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
|
||||
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
|
||||
static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
|
||||
static int make_mmu_pages_available(struct kvm_vcpu *vcpu);
|
||||
|
||||
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
|
||||
gfn_t gfn, bool prefault)
|
||||
@ -3302,7 +3300,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
make_mmu_pages_available(vcpu);
|
||||
if (make_mmu_pages_available(vcpu) < 0)
|
||||
goto out_unlock;
|
||||
if (likely(!force_pt_level))
|
||||
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
|
||||
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
|
||||
@ -3326,8 +3325,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
|
||||
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
|
||||
return;
|
||||
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL &&
|
||||
(vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL ||
|
||||
if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL &&
|
||||
(vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
|
||||
vcpu->arch.mmu.direct_map)) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
@ -3379,10 +3378,14 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
|
||||
struct kvm_mmu_page *sp;
|
||||
unsigned i;
|
||||
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
|
||||
if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
make_mmu_pages_available(vcpu);
|
||||
sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL);
|
||||
if(make_mmu_pages_available(vcpu) < 0) {
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return 1;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, 0, 0,
|
||||
vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
|
||||
++sp->root_count;
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
vcpu->arch.mmu.root_hpa = __pa(sp->spt);
|
||||
@ -3392,7 +3395,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
|
||||
|
||||
MMU_WARN_ON(VALID_PAGE(root));
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
make_mmu_pages_available(vcpu);
|
||||
if (make_mmu_pages_available(vcpu) < 0) {
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return 1;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
|
||||
i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
|
||||
root = __pa(sp->spt);
|
||||
@ -3423,15 +3429,18 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
* Do we shadow a long mode page table? If so we need to
|
||||
* write-protect the guests page table root.
|
||||
*/
|
||||
if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
|
||||
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
MMU_WARN_ON(VALID_PAGE(root));
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
make_mmu_pages_available(vcpu);
|
||||
sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
|
||||
0, ACC_ALL);
|
||||
if (make_mmu_pages_available(vcpu) < 0) {
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return 1;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
|
||||
vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
|
||||
root = __pa(sp->spt);
|
||||
++sp->root_count;
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
@ -3445,7 +3454,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
* the shadow page table may be a PAE or a long mode page table.
|
||||
*/
|
||||
pm_mask = PT_PRESENT_MASK;
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL)
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL)
|
||||
pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -3463,7 +3472,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
return 1;
|
||||
}
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
make_mmu_pages_available(vcpu);
|
||||
if (make_mmu_pages_available(vcpu) < 0) {
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return 1;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
|
||||
0, ACC_ALL);
|
||||
root = __pa(sp->spt);
|
||||
@ -3478,7 +3490,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
* If we shadow a 32 bit page table with a long mode page
|
||||
* table we enter this path.
|
||||
*/
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
|
||||
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) {
|
||||
if (vcpu->arch.mmu.lm_root == NULL) {
|
||||
/*
|
||||
* The additional page necessary for this is only
|
||||
@ -3523,7 +3535,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
|
||||
if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
|
||||
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
sp = page_header(root);
|
||||
mmu_sync_children(vcpu, sp);
|
||||
@ -3588,6 +3600,13 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
|
||||
|
||||
static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
||||
{
|
||||
/*
|
||||
* A nested guest cannot use the MMIO cache if it is using nested
|
||||
* page tables, because cr2 is a nGPA while the cache stores GPAs.
|
||||
*/
|
||||
if (mmu_is_nested(vcpu))
|
||||
return false;
|
||||
|
||||
if (direct)
|
||||
return vcpu_match_mmio_gpa(vcpu, addr);
|
||||
|
||||
@ -3599,7 +3618,7 @@ static bool
|
||||
walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
|
||||
{
|
||||
struct kvm_shadow_walk_iterator iterator;
|
||||
u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
|
||||
u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull;
|
||||
int root, leaf;
|
||||
bool reserved = false;
|
||||
|
||||
@ -3640,7 +3659,23 @@ exit:
|
||||
return reserved;
|
||||
}
|
||||
|
||||
int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
||||
/*
|
||||
* Return values of handle_mmio_page_fault:
|
||||
* RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
|
||||
* directly.
|
||||
* RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
|
||||
* fault path update the mmio spte.
|
||||
* RET_MMIO_PF_RETRY: let CPU fault again on the address.
|
||||
* RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
|
||||
*/
|
||||
enum {
|
||||
RET_MMIO_PF_EMULATE = 1,
|
||||
RET_MMIO_PF_INVALID = 2,
|
||||
RET_MMIO_PF_RETRY = 0,
|
||||
RET_MMIO_PF_BUG = -1
|
||||
};
|
||||
|
||||
static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
||||
{
|
||||
u64 spte;
|
||||
bool reserved;
|
||||
@ -3872,7 +3907,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
make_mmu_pages_available(vcpu);
|
||||
if (make_mmu_pages_available(vcpu) < 0)
|
||||
goto out_unlock;
|
||||
if (likely(!force_pt_level))
|
||||
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
|
||||
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
|
||||
@ -4025,7 +4061,13 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
||||
rsvd_check->rsvd_bits_mask[1][0] =
|
||||
rsvd_check->rsvd_bits_mask[0][0];
|
||||
break;
|
||||
case PT64_ROOT_LEVEL:
|
||||
case PT64_ROOT_5LEVEL:
|
||||
rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd |
|
||||
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
|
||||
rsvd_bits(maxphyaddr, 51);
|
||||
rsvd_check->rsvd_bits_mask[1][4] =
|
||||
rsvd_check->rsvd_bits_mask[0][4];
|
||||
case PT64_ROOT_4LEVEL:
|
||||
rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
|
||||
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
|
||||
rsvd_bits(maxphyaddr, 51);
|
||||
@ -4055,7 +4097,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
|
||||
cpuid_maxphyaddr(vcpu), context->root_level,
|
||||
context->nx, guest_cpuid_has_gbpages(vcpu),
|
||||
context->nx,
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
|
||||
is_pse(vcpu), guest_cpuid_is_amd(vcpu));
|
||||
}
|
||||
|
||||
@ -4065,6 +4108,8 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
|
||||
{
|
||||
u64 bad_mt_xwr;
|
||||
|
||||
rsvd_check->rsvd_bits_mask[0][4] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
|
||||
rsvd_check->rsvd_bits_mask[0][3] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
|
||||
rsvd_check->rsvd_bits_mask[0][2] =
|
||||
@ -4074,6 +4119,7 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
|
||||
rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
|
||||
|
||||
/* large page */
|
||||
rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
|
||||
rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
|
||||
rsvd_check->rsvd_bits_mask[1][2] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
|
||||
@ -4120,8 +4166,8 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
|
||||
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
|
||||
boot_cpu_data.x86_phys_bits,
|
||||
context->shadow_root_level, uses_nx,
|
||||
guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
|
||||
true);
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
|
||||
is_pse(vcpu), true);
|
||||
|
||||
if (!shadow_me_mask)
|
||||
return;
|
||||
@ -4185,66 +4231,85 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
|
||||
boot_cpu_data.x86_phys_bits, execonly);
|
||||
}
|
||||
|
||||
#define BYTE_MASK(access) \
|
||||
((1 & (access) ? 2 : 0) | \
|
||||
(2 & (access) ? 4 : 0) | \
|
||||
(3 & (access) ? 8 : 0) | \
|
||||
(4 & (access) ? 16 : 0) | \
|
||||
(5 & (access) ? 32 : 0) | \
|
||||
(6 & (access) ? 64 : 0) | \
|
||||
(7 & (access) ? 128 : 0))
|
||||
|
||||
|
||||
static void update_permission_bitmask(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu *mmu, bool ept)
|
||||
{
|
||||
unsigned bit, byte, pfec;
|
||||
u8 map;
|
||||
bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
|
||||
unsigned byte;
|
||||
|
||||
const u8 x = BYTE_MASK(ACC_EXEC_MASK);
|
||||
const u8 w = BYTE_MASK(ACC_WRITE_MASK);
|
||||
const u8 u = BYTE_MASK(ACC_USER_MASK);
|
||||
|
||||
bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0;
|
||||
bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0;
|
||||
bool cr0_wp = is_write_protection(vcpu);
|
||||
|
||||
cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
|
||||
cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
|
||||
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
|
||||
pfec = byte << 1;
|
||||
map = 0;
|
||||
wf = pfec & PFERR_WRITE_MASK;
|
||||
uf = pfec & PFERR_USER_MASK;
|
||||
ff = pfec & PFERR_FETCH_MASK;
|
||||
unsigned pfec = byte << 1;
|
||||
|
||||
/*
|
||||
* PFERR_RSVD_MASK bit is set in PFEC if the access is not
|
||||
* subject to SMAP restrictions, and cleared otherwise. The
|
||||
* bit is only meaningful if the SMAP bit is set in CR4.
|
||||
* Each "*f" variable has a 1 bit for each UWX value
|
||||
* that causes a fault with the given PFEC.
|
||||
*/
|
||||
smapf = !(pfec & PFERR_RSVD_MASK);
|
||||
for (bit = 0; bit < 8; ++bit) {
|
||||
x = bit & ACC_EXEC_MASK;
|
||||
w = bit & ACC_WRITE_MASK;
|
||||
u = bit & ACC_USER_MASK;
|
||||
|
||||
if (!ept) {
|
||||
/* Not really needed: !nx will cause pte.nx to fault */
|
||||
x |= !mmu->nx;
|
||||
/* Allow supervisor writes if !cr0.wp */
|
||||
w |= !is_write_protection(vcpu) && !uf;
|
||||
/* Disallow supervisor fetches of user code if cr4.smep */
|
||||
x &= !(cr4_smep && u && !uf);
|
||||
/* Faults from writes to non-writable pages */
|
||||
u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0;
|
||||
/* Faults from user mode accesses to supervisor pages */
|
||||
u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0;
|
||||
/* Faults from fetches of non-executable pages*/
|
||||
u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0;
|
||||
/* Faults from kernel mode fetches of user pages */
|
||||
u8 smepf = 0;
|
||||
/* Faults from kernel mode accesses of user pages */
|
||||
u8 smapf = 0;
|
||||
|
||||
/*
|
||||
* SMAP:kernel-mode data accesses from user-mode
|
||||
* mappings should fault. A fault is considered
|
||||
* as a SMAP violation if all of the following
|
||||
* conditions are ture:
|
||||
* - X86_CR4_SMAP is set in CR4
|
||||
* - A user page is accessed
|
||||
* - Page fault in kernel mode
|
||||
* - if CPL = 3 or X86_EFLAGS_AC is clear
|
||||
*
|
||||
* Here, we cover the first three conditions.
|
||||
* The fourth is computed dynamically in
|
||||
* permission_fault() and is in smapf.
|
||||
*
|
||||
* Also, SMAP does not affect instruction
|
||||
* fetches, add the !ff check here to make it
|
||||
* clearer.
|
||||
*/
|
||||
smap = cr4_smap && u && !uf && !ff;
|
||||
}
|
||||
if (!ept) {
|
||||
/* Faults from kernel mode accesses to user pages */
|
||||
u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
|
||||
|
||||
fault = (ff && !x) || (uf && !u) || (wf && !w) ||
|
||||
(smapf && smap);
|
||||
map |= fault << bit;
|
||||
/* Not really needed: !nx will cause pte.nx to fault */
|
||||
if (!mmu->nx)
|
||||
ff = 0;
|
||||
|
||||
/* Allow supervisor writes if !cr0.wp */
|
||||
if (!cr0_wp)
|
||||
wf = (pfec & PFERR_USER_MASK) ? wf : 0;
|
||||
|
||||
/* Disallow supervisor fetches of user code if cr4.smep */
|
||||
if (cr4_smep)
|
||||
smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0;
|
||||
|
||||
/*
|
||||
* SMAP:kernel-mode data accesses from user-mode
|
||||
* mappings should fault. A fault is considered
|
||||
* as a SMAP violation if all of the following
|
||||
* conditions are ture:
|
||||
* - X86_CR4_SMAP is set in CR4
|
||||
* - A user page is accessed
|
||||
* - The access is not a fetch
|
||||
* - Page fault in kernel mode
|
||||
* - if CPL = 3 or X86_EFLAGS_AC is clear
|
||||
*
|
||||
* Here, we cover the first three conditions.
|
||||
* The fourth is computed dynamically in permission_fault();
|
||||
* PFERR_RSVD_MASK bit will be set in PFEC if the access is
|
||||
* *not* subject to SMAP restrictions.
|
||||
*/
|
||||
if (cr4_smap)
|
||||
smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf;
|
||||
}
|
||||
mmu->permissions[byte] = map;
|
||||
|
||||
mmu->permissions[byte] = ff | uf | wf | smepf | smapf;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4358,7 +4423,10 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
|
||||
static void paging64_init_context(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu *context)
|
||||
{
|
||||
paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
|
||||
int root_level = is_la57_mode(vcpu) ?
|
||||
PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
|
||||
|
||||
paging64_init_context_common(vcpu, context, root_level);
|
||||
}
|
||||
|
||||
static void paging32_init_context(struct kvm_vcpu *vcpu,
|
||||
@ -4399,7 +4467,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
context->sync_page = nonpaging_sync_page;
|
||||
context->invlpg = nonpaging_invlpg;
|
||||
context->update_pte = nonpaging_update_pte;
|
||||
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
|
||||
context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = true;
|
||||
context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
|
||||
@ -4413,7 +4481,8 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
context->root_level = 0;
|
||||
} else if (is_long_mode(vcpu)) {
|
||||
context->nx = is_nx(vcpu);
|
||||
context->root_level = PT64_ROOT_LEVEL;
|
||||
context->root_level = is_la57_mode(vcpu) ?
|
||||
PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
|
||||
reset_rsvds_bits_mask(vcpu, context);
|
||||
context->gva_to_gpa = paging64_gva_to_gpa;
|
||||
} else if (is_pae(vcpu)) {
|
||||
@ -4470,7 +4539,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
||||
|
||||
MMU_WARN_ON(VALID_PAGE(context->root_hpa));
|
||||
|
||||
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
|
||||
context->shadow_root_level = PT64_ROOT_4LEVEL;
|
||||
|
||||
context->nx = true;
|
||||
context->ept_ad = accessed_dirty;
|
||||
@ -4479,7 +4548,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
||||
context->sync_page = ept_sync_page;
|
||||
context->invlpg = ept_invlpg;
|
||||
context->update_pte = ept_update_pte;
|
||||
context->root_level = context->shadow_root_level;
|
||||
context->root_level = PT64_ROOT_4LEVEL;
|
||||
context->root_hpa = INVALID_PAGE;
|
||||
context->direct_map = false;
|
||||
context->base_role.ad_disabled = !accessed_dirty;
|
||||
@ -4524,7 +4593,8 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
|
||||
g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
|
||||
} else if (is_long_mode(vcpu)) {
|
||||
g_context->nx = is_nx(vcpu);
|
||||
g_context->root_level = PT64_ROOT_LEVEL;
|
||||
g_context->root_level = is_la57_mode(vcpu) ?
|
||||
PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
|
||||
reset_rsvds_bits_mask(vcpu, g_context);
|
||||
g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
|
||||
} else if (is_pae(vcpu)) {
|
||||
@ -4814,12 +4884,12 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
|
||||
|
||||
static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
|
||||
static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
LIST_HEAD(invalid_list);
|
||||
|
||||
if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
|
||||
if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
|
||||
@ -4828,6 +4898,10 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
|
||||
++vcpu->kvm->stat.mmu_recycled;
|
||||
}
|
||||
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
|
||||
|
||||
if (!kvm_mmu_available_pages(vcpu->kvm))
|
||||
return -ENOSPC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
|
||||
@ -4835,7 +4909,13 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
|
||||
{
|
||||
int r, emulation_type = EMULTYPE_RETRY;
|
||||
enum emulation_result er;
|
||||
bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
|
||||
bool direct = vcpu->arch.mmu.direct_map;
|
||||
|
||||
/* With shadow page tables, fault_address contains a GVA or nGPA. */
|
||||
if (vcpu->arch.mmu.direct_map) {
|
||||
vcpu->arch.gpa_available = true;
|
||||
vcpu->arch.gpa_val = cr2;
|
||||
}
|
||||
|
||||
if (unlikely(error_code & PFERR_RSVD_MASK)) {
|
||||
r = handle_mmio_page_fault(vcpu, cr2, direct);
|
||||
@ -4847,6 +4927,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
|
||||
return 1;
|
||||
if (r < 0)
|
||||
return r;
|
||||
/* Must be RET_MMIO_PF_INVALID. */
|
||||
}
|
||||
|
||||
r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
|
||||
@ -4862,11 +4943,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
|
||||
* This can occur when using nested virtualization with nested
|
||||
* paging in both guests. If true, we simply unprotect the page
|
||||
* and resume the guest.
|
||||
*
|
||||
* Note: AMD only (since it supports the PFERR_GUEST_PAGE_MASK used
|
||||
* in PFERR_NEXT_GUEST_PAGE)
|
||||
*/
|
||||
if (error_code == PFERR_NESTED_GUEST_PAGE) {
|
||||
if (vcpu->arch.mmu.direct_map &&
|
||||
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
|
||||
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
|
||||
return 1;
|
||||
}
|
||||
|
@ -37,7 +37,8 @@
|
||||
#define PT32_DIR_PSE36_MASK \
|
||||
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
|
||||
|
||||
#define PT64_ROOT_LEVEL 4
|
||||
#define PT64_ROOT_5LEVEL 5
|
||||
#define PT64_ROOT_4LEVEL 4
|
||||
#define PT32_ROOT_LEVEL 2
|
||||
#define PT32E_ROOT_LEVEL 3
|
||||
|
||||
@ -48,6 +49,9 @@
|
||||
|
||||
static inline u64 rsvd_bits(int s, int e)
|
||||
{
|
||||
if (e < s)
|
||||
return 0;
|
||||
|
||||
return ((1ULL << (e - s + 1)) - 1) << s;
|
||||
}
|
||||
|
||||
@ -56,23 +60,6 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
|
||||
void
|
||||
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
|
||||
|
||||
/*
|
||||
* Return values of handle_mmio_page_fault:
|
||||
* RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
|
||||
* directly.
|
||||
* RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
|
||||
* fault path update the mmio spte.
|
||||
* RET_MMIO_PF_RETRY: let CPU fault again on the address.
|
||||
* RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
|
||||
*/
|
||||
enum {
|
||||
RET_MMIO_PF_EMULATE = 1,
|
||||
RET_MMIO_PF_INVALID = 2,
|
||||
RET_MMIO_PF_RETRY = 0,
|
||||
RET_MMIO_PF_BUG = -1
|
||||
};
|
||||
|
||||
int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
|
||||
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
|
||||
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
||||
bool accessed_dirty);
|
||||
|
@ -62,11 +62,11 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
|
||||
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
|
||||
return;
|
||||
|
||||
if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
|
||||
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
|
||||
hpa_t root = vcpu->arch.mmu.root_hpa;
|
||||
|
||||
sp = page_header(root);
|
||||
__mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
|
||||
__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -130,7 +130,7 @@ static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu)
|
||||
* enable MTRRs and it is obviously undesirable to run the
|
||||
* guest entirely with UC memory and we use WB.
|
||||
*/
|
||||
if (guest_cpuid_has_mtrr(vcpu))
|
||||
if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR))
|
||||
return MTRR_TYPE_UNCACHABLE;
|
||||
else
|
||||
return MTRR_TYPE_WRBACK;
|
||||
|
@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
&map_writable))
|
||||
return 0;
|
||||
|
||||
if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
|
||||
walker.gfn, pfn, walker.pte_access, &r))
|
||||
if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
|
||||
return r;
|
||||
|
||||
/*
|
||||
@ -819,7 +818,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
goto out_unlock;
|
||||
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
|
||||
make_mmu_pages_available(vcpu);
|
||||
if (make_mmu_pages_available(vcpu) < 0)
|
||||
goto out_unlock;
|
||||
if (!force_pt_level)
|
||||
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
|
||||
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
|
||||
|
@ -280,9 +280,9 @@ module_param(avic, int, S_IRUGO);
|
||||
static int vls = true;
|
||||
module_param(vls, int, 0444);
|
||||
|
||||
/* AVIC VM ID bit masks and lock */
|
||||
static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
|
||||
static DEFINE_SPINLOCK(avic_vm_id_lock);
|
||||
/* enable/disable Virtual GIF */
|
||||
static int vgif = true;
|
||||
module_param(vgif, int, 0444);
|
||||
|
||||
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
|
||||
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
|
||||
@ -479,19 +479,33 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit)
|
||||
recalc_intercepts(svm);
|
||||
}
|
||||
|
||||
static inline bool vgif_enabled(struct vcpu_svm *svm)
|
||||
{
|
||||
return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
|
||||
}
|
||||
|
||||
static inline void enable_gif(struct vcpu_svm *svm)
|
||||
{
|
||||
svm->vcpu.arch.hflags |= HF_GIF_MASK;
|
||||
if (vgif_enabled(svm))
|
||||
svm->vmcb->control.int_ctl |= V_GIF_MASK;
|
||||
else
|
||||
svm->vcpu.arch.hflags |= HF_GIF_MASK;
|
||||
}
|
||||
|
||||
static inline void disable_gif(struct vcpu_svm *svm)
|
||||
{
|
||||
svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
|
||||
if (vgif_enabled(svm))
|
||||
svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
|
||||
else
|
||||
svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
|
||||
}
|
||||
|
||||
static inline bool gif_set(struct vcpu_svm *svm)
|
||||
{
|
||||
return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
|
||||
if (vgif_enabled(svm))
|
||||
return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
|
||||
else
|
||||
return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
|
||||
}
|
||||
|
||||
static unsigned long iopm_base;
|
||||
@ -567,10 +581,10 @@ static inline void invlpga(unsigned long addr, u32 asid)
|
||||
asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
|
||||
}
|
||||
|
||||
static int get_npt_level(void)
|
||||
static int get_npt_level(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return PT64_ROOT_LEVEL;
|
||||
return PT64_ROOT_4LEVEL;
|
||||
#else
|
||||
return PT32E_ROOT_LEVEL;
|
||||
#endif
|
||||
@ -641,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
unsigned nr = vcpu->arch.exception.nr;
|
||||
bool has_error_code = vcpu->arch.exception.has_error_code;
|
||||
bool reinject = vcpu->arch.exception.reinject;
|
||||
bool reinject = vcpu->arch.exception.injected;
|
||||
u32 error_code = vcpu->arch.exception.error_code;
|
||||
|
||||
/*
|
||||
@ -973,6 +987,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
|
||||
static void disable_nmi_singlestep(struct vcpu_svm *svm)
|
||||
{
|
||||
svm->nmi_singlestep = false;
|
||||
|
||||
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
|
||||
/* Clear our flags if they were not set by the guest */
|
||||
if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
|
||||
@ -989,6 +1004,8 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm)
|
||||
*/
|
||||
#define SVM_VM_DATA_HASH_BITS 8
|
||||
static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
|
||||
static u32 next_vm_id = 0;
|
||||
static bool next_vm_id_wrapped = 0;
|
||||
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
|
||||
|
||||
/* Note:
|
||||
@ -1108,6 +1125,13 @@ static __init int svm_hardware_setup(void)
|
||||
}
|
||||
}
|
||||
|
||||
if (vgif) {
|
||||
if (!boot_cpu_has(X86_FEATURE_VGIF))
|
||||
vgif = false;
|
||||
else
|
||||
pr_info("Virtual GIF supported\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
@ -1305,6 +1329,12 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
|
||||
}
|
||||
|
||||
if (vgif) {
|
||||
clr_intercept(svm, INTERCEPT_STGI);
|
||||
clr_intercept(svm, INTERCEPT_CLGI);
|
||||
svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
|
||||
}
|
||||
|
||||
mark_all_dirty(svm->vmcb);
|
||||
|
||||
enable_gif(svm);
|
||||
@ -1387,34 +1417,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int avic_get_next_vm_id(void)
|
||||
{
|
||||
int id;
|
||||
|
||||
spin_lock(&avic_vm_id_lock);
|
||||
|
||||
/* AVIC VM ID is one-based. */
|
||||
id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1);
|
||||
if (id <= AVIC_VM_ID_MASK)
|
||||
__set_bit(id, avic_vm_id_bitmap);
|
||||
else
|
||||
id = -EAGAIN;
|
||||
|
||||
spin_unlock(&avic_vm_id_lock);
|
||||
return id;
|
||||
}
|
||||
|
||||
static inline int avic_free_vm_id(int id)
|
||||
{
|
||||
if (id <= 0 || id > AVIC_VM_ID_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&avic_vm_id_lock);
|
||||
__clear_bit(id, avic_vm_id_bitmap);
|
||||
spin_unlock(&avic_vm_id_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void avic_vm_destroy(struct kvm *kvm)
|
||||
{
|
||||
unsigned long flags;
|
||||
@ -1423,8 +1425,6 @@ static void avic_vm_destroy(struct kvm *kvm)
|
||||
if (!avic)
|
||||
return;
|
||||
|
||||
avic_free_vm_id(vm_data->avic_vm_id);
|
||||
|
||||
if (vm_data->avic_logical_id_table_page)
|
||||
__free_page(vm_data->avic_logical_id_table_page);
|
||||
if (vm_data->avic_physical_id_table_page)
|
||||
@ -1438,19 +1438,16 @@ static void avic_vm_destroy(struct kvm *kvm)
|
||||
static int avic_vm_init(struct kvm *kvm)
|
||||
{
|
||||
unsigned long flags;
|
||||
int vm_id, err = -ENOMEM;
|
||||
int err = -ENOMEM;
|
||||
struct kvm_arch *vm_data = &kvm->arch;
|
||||
struct page *p_page;
|
||||
struct page *l_page;
|
||||
struct kvm_arch *ka;
|
||||
u32 vm_id;
|
||||
|
||||
if (!avic)
|
||||
return 0;
|
||||
|
||||
vm_id = avic_get_next_vm_id();
|
||||
if (vm_id < 0)
|
||||
return vm_id;
|
||||
vm_data->avic_vm_id = (u32)vm_id;
|
||||
|
||||
/* Allocating physical APIC ID table (4KB) */
|
||||
p_page = alloc_page(GFP_KERNEL);
|
||||
if (!p_page)
|
||||
@ -1468,6 +1465,22 @@ static int avic_vm_init(struct kvm *kvm)
|
||||
clear_page(page_address(l_page));
|
||||
|
||||
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
|
||||
again:
|
||||
vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
|
||||
if (vm_id == 0) { /* id is 1-based, zero is not okay */
|
||||
next_vm_id_wrapped = 1;
|
||||
goto again;
|
||||
}
|
||||
/* Is it still in use? Only possible if wrapped at least once */
|
||||
if (next_vm_id_wrapped) {
|
||||
hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
|
||||
struct kvm *k2 = container_of(ka, struct kvm, arch);
|
||||
struct kvm_arch *vd2 = &k2->arch;
|
||||
if (vd2->avic_vm_id == vm_id)
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
vm_data->avic_vm_id = vm_id;
|
||||
hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
|
||||
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
|
||||
|
||||
@ -1580,7 +1593,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
}
|
||||
init_vmcb(svm);
|
||||
|
||||
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
|
||||
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
|
||||
|
||||
if (kvm_vcpu_apicv_active(vcpu) && !init_event)
|
||||
@ -2384,7 +2397,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
|
||||
vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
|
||||
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
|
||||
vcpu->arch.mmu.shadow_root_level = get_npt_level();
|
||||
vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
|
||||
reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
|
||||
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
|
||||
}
|
||||
@ -3147,6 +3160,13 @@ static int stgi_interception(struct vcpu_svm *svm)
|
||||
if (nested_svm_check_permissions(svm))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* If VGIF is enabled, the STGI intercept is only added to
|
||||
* detect the opening of the NMI window; remove it now.
|
||||
*/
|
||||
if (vgif_enabled(svm))
|
||||
clr_intercept(svm, INTERCEPT_STGI);
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
|
||||
ret = kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
|
||||
@ -3744,7 +3764,10 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
|
||||
|
||||
static int pause_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
kvm_vcpu_on_spin(&(svm->vcpu));
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
bool in_kernel = (svm_get_cpl(vcpu) == 0);
|
||||
|
||||
kvm_vcpu_on_spin(vcpu, in_kernel);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -4228,8 +4251,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
|
||||
|
||||
trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
|
||||
|
||||
vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
|
||||
|
||||
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
|
||||
vcpu->arch.cr0 = svm->vmcb->save.cr0;
|
||||
if (npt_enabled)
|
||||
@ -4682,9 +4703,11 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
* In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
|
||||
* 1, because that's a separate STGI/VMRUN intercept. The next time we
|
||||
* get that intercept, this function will be called again though and
|
||||
* we'll get the vintr intercept.
|
||||
* we'll get the vintr intercept. However, if the vGIF feature is
|
||||
* enabled, the STGI interception will not occur. Enable the irq
|
||||
* window under the assumption that the hardware will set the GIF.
|
||||
*/
|
||||
if (gif_set(svm) && nested_svm_intr(svm)) {
|
||||
if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
|
||||
svm_set_vintr(svm);
|
||||
svm_inject_irq(svm, 0x0);
|
||||
}
|
||||
@ -4698,8 +4721,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
||||
== HF_NMI_MASK)
|
||||
return; /* IRET will cause a vm exit */
|
||||
|
||||
if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
|
||||
if (!gif_set(svm)) {
|
||||
if (vgif_enabled(svm))
|
||||
set_intercept(svm, INTERCEPT_STGI);
|
||||
return; /* STGI will cause a vm exit */
|
||||
}
|
||||
|
||||
if (svm->nested.exit_required)
|
||||
return; /* we're not going to run the guest yet */
|
||||
@ -5071,17 +5097,14 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
|
||||
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
|
||||
/* Update nrips enabled cache */
|
||||
svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
|
||||
svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
|
||||
|
||||
if (!kvm_vcpu_apicv_active(vcpu))
|
||||
return;
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, 1, 0);
|
||||
if (entry)
|
||||
entry->ecx &= ~bit(X86_FEATURE_X2APIC);
|
||||
guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
|
||||
}
|
||||
|
||||
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
||||
|
@ -151,8 +151,8 @@ TRACE_EVENT(kvm_fast_mmio,
|
||||
*/
|
||||
TRACE_EVENT(kvm_cpuid,
|
||||
TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
|
||||
unsigned long rcx, unsigned long rdx),
|
||||
TP_ARGS(function, rax, rbx, rcx, rdx),
|
||||
unsigned long rcx, unsigned long rdx, bool found),
|
||||
TP_ARGS(function, rax, rbx, rcx, rdx, found),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, function )
|
||||
@ -160,6 +160,7 @@ TRACE_EVENT(kvm_cpuid,
|
||||
__field( unsigned long, rbx )
|
||||
__field( unsigned long, rcx )
|
||||
__field( unsigned long, rdx )
|
||||
__field( bool, found )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@ -168,11 +169,13 @@ TRACE_EVENT(kvm_cpuid,
|
||||
__entry->rbx = rbx;
|
||||
__entry->rcx = rcx;
|
||||
__entry->rdx = rdx;
|
||||
__entry->found = found;
|
||||
),
|
||||
|
||||
TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
|
||||
TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s",
|
||||
__entry->function, __entry->rax,
|
||||
__entry->rbx, __entry->rcx, __entry->rdx)
|
||||
__entry->rbx, __entry->rcx, __entry->rdx,
|
||||
__entry->found ? "found" : "not found")
|
||||
);
|
||||
|
||||
#define AREG(x) { APIC_##x, "APIC_" #x }
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -311,13 +311,13 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
(MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
|
||||
u64 new_state = msr_info->data &
|
||||
(MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
|
||||
u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
|
||||
0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
|
||||
u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
|
||||
(guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
|
||||
|
||||
if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE)
|
||||
return 1;
|
||||
if (!msr_info->host_initiated &&
|
||||
((msr_info->data & reserved_bits) != 0 ||
|
||||
new_state == X2APIC_ENABLE ||
|
||||
(new_state == MSR_IA32_APICBASE_ENABLE &&
|
||||
((new_state == MSR_IA32_APICBASE_ENABLE &&
|
||||
old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
|
||||
(new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
|
||||
old_state == 0)))
|
||||
@ -390,15 +390,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
|
||||
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
if (!vcpu->arch.exception.pending) {
|
||||
if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
|
||||
queue:
|
||||
if (has_error && !is_protmode(vcpu))
|
||||
has_error = false;
|
||||
vcpu->arch.exception.pending = true;
|
||||
if (reinject) {
|
||||
/*
|
||||
* On vmentry, vcpu->arch.exception.pending is only
|
||||
* true if an event injection was blocked by
|
||||
* nested_run_pending. In that case, however,
|
||||
* vcpu_enter_guest requests an immediate exit,
|
||||
* and the guest shouldn't proceed far enough to
|
||||
* need reinjection.
|
||||
*/
|
||||
WARN_ON_ONCE(vcpu->arch.exception.pending);
|
||||
vcpu->arch.exception.injected = true;
|
||||
} else {
|
||||
vcpu->arch.exception.pending = true;
|
||||
vcpu->arch.exception.injected = false;
|
||||
}
|
||||
vcpu->arch.exception.has_error_code = has_error;
|
||||
vcpu->arch.exception.nr = nr;
|
||||
vcpu->arch.exception.error_code = error_code;
|
||||
vcpu->arch.exception.reinject = reinject;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -413,8 +426,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
|
||||
class2 = exception_class(nr);
|
||||
if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
|
||||
|| (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
|
||||
/* generate double fault per SDM Table 5-5 */
|
||||
/*
|
||||
* Generate double fault per SDM Table 5-5. Set
|
||||
* exception.pending = true so that the double fault
|
||||
* can trigger a nested vmexit.
|
||||
*/
|
||||
vcpu->arch.exception.pending = true;
|
||||
vcpu->arch.exception.injected = false;
|
||||
vcpu->arch.exception.has_error_code = true;
|
||||
vcpu->arch.exception.nr = DF_VECTOR;
|
||||
vcpu->arch.exception.error_code = 0;
|
||||
@ -755,19 +773,22 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
if (cr4 & CR4_RESERVED_BITS)
|
||||
return 1;
|
||||
|
||||
if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
|
||||
return 1;
|
||||
|
||||
if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
|
||||
return 1;
|
||||
|
||||
if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
|
||||
return 1;
|
||||
|
||||
if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
|
||||
return 1;
|
||||
|
||||
if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
|
||||
return 1;
|
||||
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
|
||||
return 1;
|
||||
|
||||
if (is_long_mode(vcpu)) {
|
||||
@ -780,7 +801,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
return 1;
|
||||
|
||||
if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
|
||||
if (!guest_cpuid_has_pcid(vcpu))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
|
||||
return 1;
|
||||
|
||||
/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
|
||||
@ -814,10 +835,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_long_mode(vcpu)) {
|
||||
if (cr3 & CR3_L_MODE_RESERVED_BITS)
|
||||
return 1;
|
||||
} else if (is_pae(vcpu) && is_paging(vcpu) &&
|
||||
if (is_long_mode(vcpu) &&
|
||||
(cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62)))
|
||||
return 1;
|
||||
else if (is_pae(vcpu) && is_paging(vcpu) &&
|
||||
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
|
||||
return 1;
|
||||
|
||||
@ -884,7 +905,7 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 fixed = DR6_FIXED_1;
|
||||
|
||||
if (!guest_cpuid_has_rtm(vcpu))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
|
||||
fixed |= DR6_RTM;
|
||||
return fixed;
|
||||
}
|
||||
@ -994,6 +1015,7 @@ static u32 emulated_msrs[] = {
|
||||
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
|
||||
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
|
||||
HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
|
||||
HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
|
||||
HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
|
||||
HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
|
||||
HV_X64_MSR_RESET,
|
||||
@ -1022,21 +1044,11 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
if (efer & efer_reserved_bits)
|
||||
return false;
|
||||
|
||||
if (efer & EFER_FFXSR) {
|
||||
struct kvm_cpuid_entry2 *feat;
|
||||
|
||||
feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
|
||||
if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
|
||||
if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (efer & EFER_SVME) {
|
||||
struct kvm_cpuid_entry2 *feat;
|
||||
|
||||
feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
|
||||
if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
|
||||
if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1084,7 +1096,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
case MSR_KERNEL_GS_BASE:
|
||||
case MSR_CSTAR:
|
||||
case MSR_LSTAR:
|
||||
if (is_noncanonical_address(msr->data))
|
||||
if (is_noncanonical_address(msr->data, vcpu))
|
||||
return 1;
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_EIP:
|
||||
@ -1101,7 +1113,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
* value, and that something deterministic happens if the guest
|
||||
* invokes 64-bit SYSENTER.
|
||||
*/
|
||||
msr->data = get_canonical(msr->data);
|
||||
msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu));
|
||||
}
|
||||
return kvm_x86_ops->set_msr(vcpu, msr);
|
||||
}
|
||||
@ -1534,8 +1546,9 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
|
||||
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
|
||||
|
||||
if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
|
||||
if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
|
||||
update_ia32_tsc_adjust_msr(vcpu, offset);
|
||||
|
||||
kvm_vcpu_write_tsc_offset(vcpu, offset);
|
||||
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
|
||||
|
||||
@ -2185,7 +2198,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
kvm_set_lapic_tscdeadline_msr(vcpu, data);
|
||||
break;
|
||||
case MSR_IA32_TSC_ADJUST:
|
||||
if (guest_cpuid_has_tsc_adjust(vcpu)) {
|
||||
if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
|
||||
if (!msr_info->host_initiated) {
|
||||
s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
|
||||
adjust_tsc_offset_guest(vcpu, adj);
|
||||
@ -2307,12 +2320,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
|
||||
break;
|
||||
case MSR_AMD64_OSVW_ID_LENGTH:
|
||||
if (!guest_cpuid_has_osvw(vcpu))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
|
||||
return 1;
|
||||
vcpu->arch.osvw.length = data;
|
||||
break;
|
||||
case MSR_AMD64_OSVW_STATUS:
|
||||
if (!guest_cpuid_has_osvw(vcpu))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
|
||||
return 1;
|
||||
vcpu->arch.osvw.status = data;
|
||||
break;
|
||||
@ -2537,12 +2550,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
msr_info->data = 0xbe702111;
|
||||
break;
|
||||
case MSR_AMD64_OSVW_ID_LENGTH:
|
||||
if (!guest_cpuid_has_osvw(vcpu))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
|
||||
return 1;
|
||||
msr_info->data = vcpu->arch.osvw.length;
|
||||
break;
|
||||
case MSR_AMD64_OSVW_STATUS:
|
||||
if (!guest_cpuid_has_osvw(vcpu))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
|
||||
return 1;
|
||||
msr_info->data = vcpu->arch.osvw.status;
|
||||
break;
|
||||
@ -2882,6 +2895,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int idx;
|
||||
|
||||
if (vcpu->preempted)
|
||||
vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
|
||||
|
||||
/*
|
||||
* Disable page faults because we're in atomic context here.
|
||||
* kvm_write_guest_offset_cached() would call might_fault()
|
||||
@ -3074,8 +3091,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events)
|
||||
{
|
||||
process_nmi(vcpu);
|
||||
/*
|
||||
* FIXME: pass injected and pending separately. This is only
|
||||
* needed for nested virtualization, whose state cannot be
|
||||
* migrated yet. For now we can combine them.
|
||||
*/
|
||||
events->exception.injected =
|
||||
vcpu->arch.exception.pending &&
|
||||
(vcpu->arch.exception.pending ||
|
||||
vcpu->arch.exception.injected) &&
|
||||
!kvm_exception_is_soft(vcpu->arch.exception.nr);
|
||||
events->exception.nr = vcpu->arch.exception.nr;
|
||||
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
|
||||
@ -3130,6 +3153,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
|
||||
process_nmi(vcpu);
|
||||
vcpu->arch.exception.injected = false;
|
||||
vcpu->arch.exception.pending = events->exception.injected;
|
||||
vcpu->arch.exception.nr = events->exception.nr;
|
||||
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
|
||||
@ -4671,25 +4695,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
|
||||
*/
|
||||
if (vcpu->arch.gpa_available &&
|
||||
emulator_can_use_gpa(ctxt) &&
|
||||
vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
|
||||
(addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
|
||||
gpa = exception->address;
|
||||
goto mmio;
|
||||
(addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
|
||||
gpa = vcpu->arch.gpa_val;
|
||||
ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
|
||||
} else {
|
||||
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
|
||||
if (ret < 0)
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
}
|
||||
|
||||
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
|
||||
|
||||
if (ret < 0)
|
||||
return X86EMUL_PROPAGATE_FAULT;
|
||||
|
||||
/* For APIC access vmexit */
|
||||
if (ret)
|
||||
goto mmio;
|
||||
|
||||
if (ops->read_write_emulate(vcpu, gpa, val, bytes))
|
||||
if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
|
||||
return X86EMUL_CONTINUE;
|
||||
|
||||
mmio:
|
||||
/*
|
||||
* Is this MMIO handled locally?
|
||||
*/
|
||||
@ -5227,10 +5244,10 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
|
||||
return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
|
||||
}
|
||||
|
||||
static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
|
||||
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
|
||||
static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
|
||||
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit)
|
||||
{
|
||||
kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
|
||||
return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
|
||||
}
|
||||
|
||||
static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
|
||||
@ -6362,11 +6379,42 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
|
||||
int r;
|
||||
|
||||
/* try to reinject previous events if any */
|
||||
if (vcpu->arch.exception.injected) {
|
||||
kvm_x86_ops->queue_exception(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Exceptions must be injected immediately, or the exception
|
||||
* frame will have the address of the NMI or interrupt handler.
|
||||
*/
|
||||
if (!vcpu->arch.exception.pending) {
|
||||
if (vcpu->arch.nmi_injected) {
|
||||
kvm_x86_ops->set_nmi(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vcpu->arch.interrupt.pending) {
|
||||
kvm_x86_ops->set_irq(vcpu);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
|
||||
r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
|
||||
if (r != 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* try to inject new event if pending */
|
||||
if (vcpu->arch.exception.pending) {
|
||||
trace_kvm_inj_exception(vcpu->arch.exception.nr,
|
||||
vcpu->arch.exception.has_error_code,
|
||||
vcpu->arch.exception.error_code);
|
||||
|
||||
vcpu->arch.exception.pending = false;
|
||||
vcpu->arch.exception.injected = true;
|
||||
|
||||
if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
|
||||
__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
|
||||
X86_EFLAGS_RF);
|
||||
@ -6378,27 +6426,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
|
||||
}
|
||||
|
||||
kvm_x86_ops->queue_exception(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vcpu->arch.nmi_injected) {
|
||||
kvm_x86_ops->set_nmi(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vcpu->arch.interrupt.pending) {
|
||||
kvm_x86_ops->set_irq(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
|
||||
r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
|
||||
if (r != 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* try to inject new event if pending */
|
||||
if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
|
||||
} else if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
|
||||
vcpu->arch.smi_pending = false;
|
||||
enter_smm(vcpu);
|
||||
} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
|
||||
@ -6615,7 +6643,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
|
||||
trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
|
||||
vcpu->arch.hflags |= HF_SMM_MASK;
|
||||
memset(buf, 0, 512);
|
||||
if (guest_cpuid_has_longmode(vcpu))
|
||||
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
|
||||
enter_smm_save_state_64(vcpu, buf);
|
||||
else
|
||||
enter_smm_save_state_32(vcpu, buf);
|
||||
@ -6667,7 +6695,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
|
||||
kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
|
||||
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
|
||||
|
||||
if (guest_cpuid_has_longmode(vcpu))
|
||||
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
|
||||
kvm_x86_ops->set_efer(vcpu, 0);
|
||||
|
||||
kvm_update_cpuid(vcpu);
|
||||
@ -6774,6 +6802,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
|
||||
vcpu->mmio_needed = 0;
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
@ -6862,6 +6891,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
kvm_x86_ops->enable_nmi_window(vcpu);
|
||||
if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
|
||||
kvm_x86_ops->enable_irq_window(vcpu);
|
||||
WARN_ON(vcpu->arch.exception.pending);
|
||||
}
|
||||
|
||||
if (kvm_lapic_enabled(vcpu)) {
|
||||
@ -7004,6 +7034,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
if (vcpu->arch.apic_attention)
|
||||
kvm_lapic_sync_from_vapic(vcpu);
|
||||
|
||||
vcpu->arch.gpa_available = false;
|
||||
r = kvm_x86_ops->handle_exit(vcpu);
|
||||
return r;
|
||||
|
||||
@ -7422,7 +7453,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
int pending_vec, max_bits, idx;
|
||||
struct desc_ptr dt;
|
||||
|
||||
if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
|
||||
(sregs->cr4 & X86_CR4_OSXSAVE))
|
||||
return -EINVAL;
|
||||
|
||||
apic_base_msr.data = sregs->apic_base;
|
||||
apic_base_msr.host_initiated = true;
|
||||
if (kvm_set_apic_base(vcpu, &apic_base_msr))
|
||||
return -EINVAL;
|
||||
|
||||
dt.size = sregs->idt.limit;
|
||||
@ -7441,9 +7478,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
|
||||
mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
|
||||
kvm_x86_ops->set_efer(vcpu, sregs->efer);
|
||||
apic_base_msr.data = sregs->apic_base;
|
||||
apic_base_msr.host_initiated = true;
|
||||
kvm_set_apic_base(vcpu, &apic_base_msr);
|
||||
|
||||
mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
|
||||
kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
|
||||
@ -7734,6 +7768,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
vcpu->arch.nmi_injected = false;
|
||||
kvm_clear_interrupt_queue(vcpu);
|
||||
kvm_clear_exception_queue(vcpu);
|
||||
vcpu->arch.exception.pending = false;
|
||||
|
||||
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
|
||||
kvm_update_dr0123(vcpu);
|
||||
@ -7993,6 +8028,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
kvm_pmu_init(vcpu);
|
||||
|
||||
vcpu->arch.pending_external_vector = -1;
|
||||
vcpu->arch.preempted_in_kernel = false;
|
||||
|
||||
kvm_hv_vcpu_init(vcpu);
|
||||
|
||||
@ -8440,6 +8476,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
|
||||
}
|
||||
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.preempted_in_kernel;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.exception.pending = false;
|
||||
vcpu->arch.exception.injected = false;
|
||||
}
|
||||
|
||||
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
|
||||
@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
|
||||
|
||||
static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
|
||||
return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
|
||||
vcpu->arch.nmi_injected;
|
||||
}
|
||||
|
||||
@ -62,6 +62,16 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
|
||||
return cs_l;
|
||||
}
|
||||
|
||||
static inline bool is_la57_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return (vcpu->arch.efer & EFER_LMA) &&
|
||||
kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
|
||||
@ -87,10 +97,48 @@ static inline u32 bit(int bitno)
|
||||
return 1 << (bitno & 31);
|
||||
}
|
||||
|
||||
static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
|
||||
}
|
||||
|
||||
static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
|
||||
}
|
||||
|
||||
static inline u64 get_canonical(u64 la, u8 vaddr_bits)
|
||||
{
|
||||
return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
|
||||
}
|
||||
|
||||
static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool emul_is_noncanonical_address(u64 la,
|
||||
struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
|
||||
gva_t gva, gfn_t gfn, unsigned access)
|
||||
{
|
||||
vcpu->arch.mmio_gva = gva & PAGE_MASK;
|
||||
/*
|
||||
* If this is a shadow nested page table, the "GVA" is
|
||||
* actually a nGPA.
|
||||
*/
|
||||
vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
|
||||
vcpu->arch.access = access;
|
||||
vcpu->arch.mmio_gfn = gfn;
|
||||
vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
|
||||
|
@ -720,7 +720,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
|
||||
bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
|
||||
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
|
||||
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
|
||||
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
|
||||
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
|
||||
|
||||
@ -800,6 +800,7 @@ int kvm_arch_hardware_setup(void);
|
||||
void kvm_arch_hardware_unsetup(void);
|
||||
void kvm_arch_check_processor_compat(void *rtn);
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
|
||||
|
||||
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
|
||||
@ -985,6 +986,12 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn)
|
||||
return (hpa_t)pfn << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu,
|
||||
gpa_t gpa)
|
||||
{
|
||||
return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa));
|
||||
}
|
||||
|
||||
static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
|
||||
{
|
||||
unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
|
||||
|
@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
|
||||
struct kvm_ppc_smmu_info {
|
||||
__u64 flags;
|
||||
__u32 slb_size;
|
||||
__u32 pad;
|
||||
__u16 data_keys; /* # storage keys supported for data */
|
||||
__u16 instr_keys; /* # storage keys supported for instructions */
|
||||
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
|
||||
};
|
||||
|
||||
|
@ -416,6 +416,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
&& !v->arch.power_off && !v->arch.pause);
|
||||
}
|
||||
|
||||
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu_mode_priv(vcpu);
|
||||
}
|
||||
|
||||
/* Just ensure a guest exit from a particular CPU */
|
||||
static void exit_vm_noop(void *info)
|
||||
{
|
||||
|
@ -1454,25 +1454,6 @@ out:
|
||||
kvm_set_pfn_accessed(pfn);
|
||||
}
|
||||
|
||||
static bool is_abort_sea(unsigned long fault_status)
|
||||
{
|
||||
switch (fault_status) {
|
||||
case FSC_SEA:
|
||||
case FSC_SEA_TTW0:
|
||||
case FSC_SEA_TTW1:
|
||||
case FSC_SEA_TTW2:
|
||||
case FSC_SEA_TTW3:
|
||||
case FSC_SECC:
|
||||
case FSC_SECC_TTW0:
|
||||
case FSC_SECC_TTW1:
|
||||
case FSC_SECC_TTW2:
|
||||
case FSC_SECC_TTW3:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_handle_guest_abort - handles all 2nd stage aborts
|
||||
* @vcpu: the VCPU pointer
|
||||
@ -1498,20 +1479,21 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
|
||||
|
||||
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
|
||||
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
|
||||
|
||||
/*
|
||||
* The host kernel will handle the synchronous external abort. There
|
||||
* is no need to pass the error into the guest.
|
||||
*/
|
||||
if (is_abort_sea(fault_status)) {
|
||||
/* Synchronous External Abort? */
|
||||
if (kvm_vcpu_dabt_isextabt(vcpu)) {
|
||||
/*
|
||||
* For RAS the host kernel may handle this abort.
|
||||
* There is no need to pass the error into the guest.
|
||||
*/
|
||||
if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
|
||||
return 1;
|
||||
}
|
||||
|
||||
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
|
||||
if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) {
|
||||
kvm_inject_vabt(vcpu);
|
||||
return 1;
|
||||
if (unlikely(!is_iabt)) {
|
||||
kvm_inject_vabt(vcpu);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
|
||||
|
@ -234,7 +234,7 @@ static int vgic_debug_show(struct seq_file *s, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct seq_operations vgic_debug_seq_ops = {
|
||||
static const struct seq_operations vgic_debug_seq_ops = {
|
||||
.start = vgic_debug_start,
|
||||
.next = vgic_debug_next,
|
||||
.stop = vgic_debug_stop,
|
||||
@ -255,7 +255,7 @@ static int debug_open(struct inode *inode, struct file *file)
|
||||
return ret;
|
||||
};
|
||||
|
||||
static struct file_operations vgic_debug_fops = {
|
||||
static const struct file_operations vgic_debug_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = debug_open,
|
||||
.read = seq_read,
|
||||
|
@ -144,7 +144,6 @@ struct its_ite {
|
||||
|
||||
struct vgic_irq *irq;
|
||||
struct its_collection *collection;
|
||||
u32 lpi;
|
||||
u32 event_id;
|
||||
};
|
||||
|
||||
@ -813,7 +812,7 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
|
||||
/* Must be called with its_lock mutex held */
|
||||
static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
|
||||
struct its_collection *collection,
|
||||
u32 lpi_id, u32 event_id)
|
||||
u32 event_id)
|
||||
{
|
||||
struct its_ite *ite;
|
||||
|
||||
@ -823,7 +822,6 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
|
||||
|
||||
ite->event_id = event_id;
|
||||
ite->collection = collection;
|
||||
ite->lpi = lpi_id;
|
||||
|
||||
list_add_tail(&ite->ite_list, &device->itt_head);
|
||||
return ite;
|
||||
@ -873,7 +871,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
|
||||
new_coll = collection;
|
||||
}
|
||||
|
||||
ite = vgic_its_alloc_ite(device, collection, lpi_nr, event_id);
|
||||
ite = vgic_its_alloc_ite(device, collection, event_id);
|
||||
if (IS_ERR(ite)) {
|
||||
if (new_coll)
|
||||
vgic_its_free_collection(its, coll_id);
|
||||
@ -1848,7 +1846,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
|
||||
|
||||
next_offset = compute_next_eventid_offset(&dev->itt_head, ite);
|
||||
val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) |
|
||||
((u64)ite->lpi << KVM_ITS_ITE_PINTID_SHIFT) |
|
||||
((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) |
|
||||
ite->collection->collection_id;
|
||||
val = cpu_to_le64(val);
|
||||
return kvm_write_guest(kvm, gpa, &val, ite_esz);
|
||||
@ -1895,7 +1893,7 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
|
||||
if (!collection)
|
||||
return -EINVAL;
|
||||
|
||||
ite = vgic_its_alloc_ite(dev, collection, lpi_id, event_id);
|
||||
ite = vgic_its_alloc_ite(dev, collection, event_id);
|
||||
if (IS_ERR(ite))
|
||||
return PTR_ERR(ite);
|
||||
|
||||
|
@ -303,6 +303,51 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
}
|
||||
|
||||
static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
int n; /* which APRn is this */
|
||||
|
||||
n = (addr >> 2) & 0x3;
|
||||
|
||||
if (kvm_vgic_global_state.type == VGIC_V2) {
|
||||
/* GICv2 hardware systems support max. 32 groups */
|
||||
if (n != 0)
|
||||
return 0;
|
||||
return vcpu->arch.vgic_cpu.vgic_v2.vgic_apr;
|
||||
} else {
|
||||
struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
if (n > vgic_v3_max_apr_idx(vcpu))
|
||||
return 0;
|
||||
/* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */
|
||||
return vgicv3->vgic_ap1r[n];
|
||||
}
|
||||
}
|
||||
|
||||
static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
int n; /* which APRn is this */
|
||||
|
||||
n = (addr >> 2) & 0x3;
|
||||
|
||||
if (kvm_vgic_global_state.type == VGIC_V2) {
|
||||
/* GICv2 hardware systems support max. 32 groups */
|
||||
if (n != 0)
|
||||
return;
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_apr = val;
|
||||
} else {
|
||||
struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
if (n > vgic_v3_max_apr_idx(vcpu))
|
||||
return;
|
||||
/* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */
|
||||
vgicv3->vgic_ap1r[n] = val;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct vgic_register_region vgic_v2_dist_registers[] = {
|
||||
REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL,
|
||||
vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
|
||||
@ -364,7 +409,7 @@ static const struct vgic_register_region vgic_v2_cpu_registers[] = {
|
||||
vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO,
|
||||
vgic_mmio_read_raz, vgic_mmio_write_wi, 16,
|
||||
vgic_mmio_read_apr, vgic_mmio_write_apr, 16,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT,
|
||||
vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
|
||||
|
@ -220,4 +220,20 @@ int vgic_debug_destroy(struct kvm *kvm);
|
||||
bool lock_all_vcpus(struct kvm *kvm);
|
||||
void unlock_all_vcpus(struct kvm *kvm);
|
||||
|
||||
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;
|
||||
|
||||
/*
|
||||
* num_pri_bits are initialized with HW supported values.
|
||||
* We can rely safely on num_pri_bits even if VM has not
|
||||
* restored ICC_CTLR_EL1 before restoring APnR registers.
|
||||
*/
|
||||
switch (cpu_if->num_pri_bits) {
|
||||
case 7: return 3;
|
||||
case 6: return 1;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1609,7 +1609,7 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
struct page **pages, int nr_pages)
|
||||
{
|
||||
unsigned long addr;
|
||||
gfn_t entry;
|
||||
gfn_t entry = 0;
|
||||
|
||||
addr = gfn_to_hva_many(slot, gfn, &entry);
|
||||
if (kvm_is_error_hva(addr))
|
||||
@ -1928,6 +1928,7 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots,
|
||||
* verify that the entire region is valid here.
|
||||
*/
|
||||
while (start_gfn <= end_gfn) {
|
||||
nr_pages_avail = 0;
|
||||
ghc->memslot = __gfn_to_memslot(slots, start_gfn);
|
||||
ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn,
|
||||
&nr_pages_avail);
|
||||
@ -2275,7 +2276,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
}
|
||||
|
||||
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
|
||||
{
|
||||
struct kvm *kvm = me->kvm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
@ -2306,6 +2307,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
continue;
|
||||
if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
|
||||
continue;
|
||||
if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu))
|
||||
continue;
|
||||
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
|
||||
continue;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user