mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 10:43:43 +00:00
Two larger x86 series:
* Redo incorrect fix for SEV/SMAP erratum * Windows 11 Hyper-V workaround Other x86 changes: * Various x86 cleanups * Re-enable access_tracking_perf_test * Fix for #GP handling on SVM * Fix for CPUID leaf 0Dh in KVM_GET_SUPPORTED_CPUID * Fix for ICEBP in interrupt shadow * Avoid false-positive RCU splat * Enable Enlightened MSR-Bitmap support for real ARM: * Correctly update the shadow register on exception injection when running in nVHE mode * Correctly use the mm_ops indirection when performing cache invalidation from the page-table walker * Restrict the vgic-v3 workaround for SEIS to the two known broken implementations Generic code changes: * Dead code cleanup There will be another pull request for ARM fixes next week, but those patches need a bit more soak time. -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmHz5eIUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroNv4wgAopj0Zlutrrtw3KT4/XnmSdMPgN0j jQNzysSLTO5wGQCEogycjYXkGUDFu1Gdi+K91QAyjeKja20pIhPLeS2CBDRJyOc5 73K7sxqz51JnQiVFzkTuA+qzn+lXaJ9LUXtdg8BnQMSKyt2AJOqE8uT10kcYOD5q mW4V3QUA0QpVKN0cYHv/G/zvBwQGGSLZetFbuAzwH2EDTpIi1aio5ZN1r0AoH18L 2x5kYPpqmnoBvo2cB4b7SNmxv3ZPQ5K+wta0uwZ4pO+UuYiRd84RPr5lErywJC3w nci0eC0DoXrC6h+35UItqM8RqAGv6LADbDnr1RGojmfogSD0OtbX8y3hjw== =iKnI -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "Two larger x86 series: - Redo incorrect fix for SEV/SMAP erratum - Windows 11 Hyper-V workaround Other x86 changes: - Various x86 cleanups - Re-enable access_tracking_perf_test - Fix for #GP handling on SVM - Fix for CPUID leaf 0Dh in KVM_GET_SUPPORTED_CPUID - Fix for ICEBP in interrupt shadow - Avoid false-positive RCU splat - Enable Enlightened MSR-Bitmap support for real ARM: - Correctly update the shadow register on exception injection when running in nVHE mode - Correctly use the mm_ops indirection when performing cache invalidation from the page-table walker - Restrict the vgic-v3 workaround for SEIS to the two known broken implementations Generic code changes: - Dead code cleanup" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (43 commits) KVM: eventfd: Fix false positive RCU usage warning KVM: nVMX: Allow VMREAD when Enlightened VMCS is in use KVM: nVMX: Implement evmcs_field_offset() suitable for handle_vmread() KVM: nVMX: Rename vmcs_to_field_offset{,_table} KVM: nVMX: eVMCS: Filter out VM_EXIT_SAVE_VMX_PREEMPTION_TIMER KVM: nVMX: Also filter MSR_IA32_VMX_TRUE_PINBASED_CTLS when eVMCS selftests: kvm: check dynamic bits against KVM_X86_XCOMP_GUEST_SUPP KVM: x86: add system attribute to retrieve full set of supported xsave states KVM: x86: Add a helper to retrieve userspace address from kvm_device_attr selftests: kvm: move vm_xsave_req_perm call to amx_test KVM: x86: Sync the states size with the XCR0/IA32_XSS at, any time KVM: x86: Update vCPU's runtime CPUID on write to MSR_IA32_XSS KVM: x86: Keep MSR_IA32_XSS unchanged for INIT KVM: x86: Free kvm_cpuid_entry2 array on post-KVM_RUN KVM_SET_CPUID{,2} KVM: nVMX: WARN on any attempt to allocate shadow VMCS for vmcs02 KVM: selftests: Don't skip L2's VMCALL in SMM test for SVM guest KVM: x86: Check .flags in kvm_cpuid_check_equal() too KVM: x86: Forcibly leave nested virt when SMM state is toggled KVM: SVM: drop unnecessary code in svm_hv_vmcb_dirty_nested_enlightenments() KVM: SVM: hyper-v: Enable Enlightened MSR-Bitmap support for real ...
This commit is contained in:
commit
3cd7cd8a62
@ -3268,6 +3268,7 @@ number.
|
||||
|
||||
:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
|
||||
KVM_CAP_VCPU_ATTRIBUTES for vcpu device
|
||||
KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set)
|
||||
:Type: device ioctl, vm ioctl, vcpu ioctl
|
||||
:Parameters: struct kvm_device_attr
|
||||
:Returns: 0 on success, -1 on error
|
||||
@ -3302,7 +3303,8 @@ transferred is defined by the particular attribute.
|
||||
------------------------
|
||||
|
||||
:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
|
||||
KVM_CAP_VCPU_ATTRIBUTES for vcpu device
|
||||
KVM_CAP_VCPU_ATTRIBUTES for vcpu device
|
||||
KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device
|
||||
:Type: device ioctl, vm ioctl, vcpu ioctl
|
||||
:Parameters: struct kvm_device_attr
|
||||
:Returns: 0 on success, -1 on error
|
||||
|
@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
|
||||
|
||||
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
write_sysreg_el1(val, SYS_SPSR);
|
||||
if (has_vhe())
|
||||
write_sysreg_el1(val, SYS_SPSR);
|
||||
else
|
||||
__vcpu_sys_reg(vcpu, SPSR_EL1) = val;
|
||||
}
|
||||
|
||||
static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
|
||||
|
@ -983,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
*/
|
||||
stage2_put_pte(ptep, mmu, addr, level, mm_ops);
|
||||
|
||||
if (need_flush) {
|
||||
kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
|
||||
|
||||
dcache_clean_inval_poc((unsigned long)pte_follow,
|
||||
(unsigned long)pte_follow +
|
||||
kvm_granule_size(level));
|
||||
}
|
||||
if (need_flush && mm_ops->dcache_clean_inval_poc)
|
||||
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
|
||||
kvm_granule_size(level));
|
||||
|
||||
if (childp)
|
||||
mm_ops->put_page(childp);
|
||||
@ -1151,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
struct kvm_pgtable *pgt = arg;
|
||||
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
|
||||
kvm_pte_t pte = *ptep;
|
||||
kvm_pte_t *pte_follow;
|
||||
|
||||
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
|
||||
return 0;
|
||||
|
||||
pte_follow = kvm_pte_follow(pte, mm_ops);
|
||||
dcache_clean_inval_poc((unsigned long)pte_follow,
|
||||
(unsigned long)pte_follow +
|
||||
kvm_granule_size(level));
|
||||
if (mm_ops->dcache_clean_inval_poc)
|
||||
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
|
||||
kvm_granule_size(level));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
|
||||
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
|
||||
/* IDbits */
|
||||
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
|
||||
/* SEIS */
|
||||
if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK)
|
||||
val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT);
|
||||
/* A3V */
|
||||
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
|
||||
/* EOImode */
|
||||
|
@ -609,6 +609,18 @@ static int __init early_gicv4_enable(char *buf)
|
||||
}
|
||||
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
|
||||
|
||||
static const struct midr_range broken_seis[] = {
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
|
||||
{},
|
||||
};
|
||||
|
||||
static bool vgic_v3_broken_seis(void)
|
||||
{
|
||||
return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) &&
|
||||
is_midr_in_range_list(read_cpuid_id(), broken_seis));
|
||||
}
|
||||
|
||||
/**
|
||||
* vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
|
||||
* @info: pointer to the GIC description
|
||||
@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
|
||||
group1_trap = true;
|
||||
}
|
||||
|
||||
if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) {
|
||||
kvm_info("GICv3 with locally generated SEI\n");
|
||||
if (vgic_v3_broken_seis()) {
|
||||
kvm_info("GICv3 with broken locally generated SEI\n");
|
||||
|
||||
kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK;
|
||||
group0_trap = true;
|
||||
group1_trap = true;
|
||||
if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
|
||||
|
@ -1483,7 +1483,8 @@ struct kvm_x86_ops {
|
||||
|
||||
int (*get_msr_feature)(struct kvm_msr_entry *entry);
|
||||
|
||||
bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
|
||||
bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
|
||||
void *insn, int insn_len);
|
||||
|
||||
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
|
||||
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
|
||||
@ -1496,6 +1497,7 @@ struct kvm_x86_ops {
|
||||
};
|
||||
|
||||
struct kvm_x86_nested_ops {
|
||||
void (*leave_nested)(struct kvm_vcpu *vcpu);
|
||||
int (*check_events)(struct kvm_vcpu *vcpu);
|
||||
bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
|
||||
void (*triple_fault)(struct kvm_vcpu *vcpu);
|
||||
@ -1861,7 +1863,6 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v);
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
|
||||
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
|
||||
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
|
||||
unsigned long ipi_bitmap_high, u32 min,
|
||||
|
@ -452,6 +452,9 @@ struct kvm_sync_regs {
|
||||
|
||||
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
|
||||
|
||||
/* attributes for system fd (group 0) */
|
||||
#define KVM_X86_XCOMP_GUEST_SUPP 0
|
||||
|
||||
struct kvm_vmx_nested_state_data {
|
||||
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
|
||||
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
|
||||
|
@ -133,6 +133,7 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
|
||||
orig = &vcpu->arch.cpuid_entries[i];
|
||||
if (e2[i].function != orig->function ||
|
||||
e2[i].index != orig->index ||
|
||||
e2[i].flags != orig->flags ||
|
||||
e2[i].eax != orig->eax || e2[i].ebx != orig->ebx ||
|
||||
e2[i].ecx != orig->ecx || e2[i].edx != orig->edx)
|
||||
return -EINVAL;
|
||||
@ -196,10 +197,26 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.pv_cpuid.features = best->eax;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate guest's supported XCR0 taking into account guest CPUID data and
|
||||
* supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0).
|
||||
*/
|
||||
static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = cpuid_entry2_find(entries, nent, 0xd, 0);
|
||||
if (!best)
|
||||
return 0;
|
||||
|
||||
return (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
|
||||
}
|
||||
|
||||
static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
|
||||
int nent)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
|
||||
|
||||
best = cpuid_entry2_find(entries, nent, 1, 0);
|
||||
if (best) {
|
||||
@ -238,6 +255,21 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
|
||||
vcpu->arch.ia32_misc_enable_msr &
|
||||
MSR_IA32_MISC_ENABLE_MWAIT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
|
||||
* the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
|
||||
* requested XCR0 value. The enclave's XFRM must be a subset of XCRO
|
||||
* at the time of EENTER, thus adjust the allowed XFRM by the guest's
|
||||
* supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
|
||||
* '1' even on CPUs that don't support XSAVE.
|
||||
*/
|
||||
best = cpuid_entry2_find(entries, nent, 0x12, 0x1);
|
||||
if (best) {
|
||||
best->ecx &= guest_supported_xcr0 & 0xffffffff;
|
||||
best->edx &= guest_supported_xcr0 >> 32;
|
||||
best->ecx |= XFEATURE_MASK_FPSSE;
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
|
||||
@ -261,27 +293,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
kvm_apic_set_version(vcpu);
|
||||
}
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
|
||||
if (!best)
|
||||
vcpu->arch.guest_supported_xcr0 = 0;
|
||||
else
|
||||
vcpu->arch.guest_supported_xcr0 =
|
||||
(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
|
||||
|
||||
/*
|
||||
* Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
|
||||
* the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
|
||||
* requested XCR0 value. The enclave's XFRM must be a subset of XCRO
|
||||
* at the time of EENTER, thus adjust the allowed XFRM by the guest's
|
||||
* supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
|
||||
* '1' even on CPUs that don't support XSAVE.
|
||||
*/
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1);
|
||||
if (best) {
|
||||
best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff;
|
||||
best->edx &= vcpu->arch.guest_supported_xcr0 >> 32;
|
||||
best->ecx |= XFEATURE_MASK_FPSSE;
|
||||
}
|
||||
vcpu->arch.guest_supported_xcr0 =
|
||||
cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
|
||||
|
||||
kvm_update_pv_runtime(vcpu);
|
||||
|
||||
@ -346,8 +359,14 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
|
||||
* KVM_SET_CPUID{,2} again. To support this legacy behavior, check
|
||||
* whether the supplied CPUID data is equal to what's already set.
|
||||
*/
|
||||
if (vcpu->arch.last_vmentry_cpu != -1)
|
||||
return kvm_cpuid_check_equal(vcpu, e2, nent);
|
||||
if (vcpu->arch.last_vmentry_cpu != -1) {
|
||||
r = kvm_cpuid_check_equal(vcpu, e2, nent);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
kvfree(e2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
r = kvm_check_cpuid(vcpu, e2, nent);
|
||||
if (r)
|
||||
@ -887,13 +906,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
}
|
||||
break;
|
||||
case 0xd: {
|
||||
u64 guest_perm = xstate_get_guest_group_perm();
|
||||
u64 permitted_xcr0 = supported_xcr0 & xstate_get_guest_group_perm();
|
||||
u64 permitted_xss = supported_xss;
|
||||
|
||||
entry->eax &= supported_xcr0 & guest_perm;
|
||||
entry->ebx = xstate_required_size(supported_xcr0, false);
|
||||
entry->eax &= permitted_xcr0;
|
||||
entry->ebx = xstate_required_size(permitted_xcr0, false);
|
||||
entry->ecx = entry->ebx;
|
||||
entry->edx &= (supported_xcr0 & guest_perm) >> 32;
|
||||
if (!supported_xcr0)
|
||||
entry->edx &= permitted_xcr0 >> 32;
|
||||
if (!permitted_xcr0)
|
||||
break;
|
||||
|
||||
entry = do_host_cpuid(array, function, 1);
|
||||
@ -902,20 +922,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
|
||||
cpuid_entry_override(entry, CPUID_D_1_EAX);
|
||||
if (entry->eax & (F(XSAVES)|F(XSAVEC)))
|
||||
entry->ebx = xstate_required_size(supported_xcr0 | supported_xss,
|
||||
entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss,
|
||||
true);
|
||||
else {
|
||||
WARN_ON_ONCE(supported_xss != 0);
|
||||
WARN_ON_ONCE(permitted_xss != 0);
|
||||
entry->ebx = 0;
|
||||
}
|
||||
entry->ecx &= supported_xss;
|
||||
entry->edx &= supported_xss >> 32;
|
||||
entry->ecx &= permitted_xss;
|
||||
entry->edx &= permitted_xss >> 32;
|
||||
|
||||
for (i = 2; i < 64; ++i) {
|
||||
bool s_state;
|
||||
if (supported_xcr0 & BIT_ULL(i))
|
||||
if (permitted_xcr0 & BIT_ULL(i))
|
||||
s_state = false;
|
||||
else if (supported_xss & BIT_ULL(i))
|
||||
else if (permitted_xss & BIT_ULL(i))
|
||||
s_state = true;
|
||||
else
|
||||
continue;
|
||||
@ -929,7 +949,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
* invalid sub-leafs. Only valid sub-leafs should
|
||||
* reach this point, and they should have a non-zero
|
||||
* save state size. Furthermore, check whether the
|
||||
* processor agrees with supported_xcr0/supported_xss
|
||||
* processor agrees with permitted_xcr0/permitted_xss
|
||||
* on whether this is an XCR0- or IA32_XSS-managed area.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) {
|
||||
|
@ -2629,7 +2629,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
|
||||
kvm_apic_set_version(vcpu);
|
||||
|
||||
apic_update_ppr(apic);
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
cancel_apic_timer(apic);
|
||||
apic->lapic_timer.expired_tscdeadline = 0;
|
||||
apic_update_lvtt(apic);
|
||||
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
|
||||
|
@ -983,9 +983,9 @@ void svm_free_nested(struct vcpu_svm *svm)
|
||||
/*
|
||||
* Forcibly leave nested mode in order to be able to reset the VCPU later on.
|
||||
*/
|
||||
void svm_leave_nested(struct vcpu_svm *svm)
|
||||
void svm_leave_nested(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (is_guest_mode(vcpu)) {
|
||||
svm->nested.nested_run_pending = 0;
|
||||
@ -1411,7 +1411,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
|
||||
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
|
||||
svm_leave_nested(svm);
|
||||
svm_leave_nested(vcpu);
|
||||
svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
|
||||
return 0;
|
||||
}
|
||||
@ -1478,7 +1478,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
|
||||
*/
|
||||
|
||||
if (is_guest_mode(vcpu))
|
||||
svm_leave_nested(svm);
|
||||
svm_leave_nested(vcpu);
|
||||
else
|
||||
svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
|
||||
|
||||
@ -1532,6 +1532,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
struct kvm_x86_nested_ops svm_nested_ops = {
|
||||
.leave_nested = svm_leave_nested,
|
||||
.check_events = svm_check_nested_events,
|
||||
.triple_fault = nested_svm_triple_fault,
|
||||
.get_nested_state_pages = svm_get_nested_state_pages,
|
||||
|
@ -2100,8 +2100,13 @@ void __init sev_hardware_setup(void)
|
||||
if (!sev_enabled || !npt_enabled)
|
||||
goto out;
|
||||
|
||||
/* Does the CPU support SEV? */
|
||||
if (!boot_cpu_has(X86_FEATURE_SEV))
|
||||
/*
|
||||
* SEV must obviously be supported in hardware. Sanity check that the
|
||||
* CPU supports decode assists, which is mandatory for SEV guests to
|
||||
* support instruction emulation.
|
||||
*/
|
||||
if (!boot_cpu_has(X86_FEATURE_SEV) ||
|
||||
WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)))
|
||||
goto out;
|
||||
|
||||
/* Retrieve SEV CPUID information */
|
||||
|
@ -290,7 +290,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
|
||||
if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
|
||||
if (!(efer & EFER_SVME)) {
|
||||
svm_leave_nested(svm);
|
||||
svm_leave_nested(vcpu);
|
||||
svm_set_gif(svm, true);
|
||||
/* #GP intercept is still needed for vmware backdoor */
|
||||
if (!enable_vmware_backdoor)
|
||||
@ -312,7 +312,11 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (svm_gp_erratum_intercept)
|
||||
/*
|
||||
* Never intercept #GP for SEV guests, KVM can't
|
||||
* decrypt guest memory to workaround the erratum.
|
||||
*/
|
||||
if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm))
|
||||
set_exception_intercept(svm, GP_VECTOR);
|
||||
}
|
||||
}
|
||||
@ -1010,9 +1014,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
|
||||
* Guest access to VMware backdoor ports could legitimately
|
||||
* trigger #GP because of TSS I/O permission bitmap.
|
||||
* We intercept those #GP and allow access to them anyway
|
||||
* as VMware does.
|
||||
* as VMware does. Don't intercept #GP for SEV guests as KVM can't
|
||||
* decrypt guest memory to decode the faulting instruction.
|
||||
*/
|
||||
if (enable_vmware_backdoor)
|
||||
if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
|
||||
set_exception_intercept(svm, GP_VECTOR);
|
||||
|
||||
svm_set_intercept(svm, INTERCEPT_INTR);
|
||||
@ -2091,10 +2096,6 @@ static int gp_interception(struct kvm_vcpu *vcpu)
|
||||
if (error_code)
|
||||
goto reinject;
|
||||
|
||||
/* All SVM instructions expect page aligned RAX */
|
||||
if (svm->vmcb->save.rax & ~PAGE_MASK)
|
||||
goto reinject;
|
||||
|
||||
/* Decode the instruction for usage later */
|
||||
if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
|
||||
goto reinject;
|
||||
@ -2112,8 +2113,13 @@ static int gp_interception(struct kvm_vcpu *vcpu)
|
||||
if (!is_guest_mode(vcpu))
|
||||
return kvm_emulate_instruction(vcpu,
|
||||
EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
|
||||
} else
|
||||
} else {
|
||||
/* All SVM instructions expect page aligned RAX */
|
||||
if (svm->vmcb->save.rax & ~PAGE_MASK)
|
||||
goto reinject;
|
||||
|
||||
return emulate_svm_instr(vcpu, opcode);
|
||||
}
|
||||
|
||||
reinject:
|
||||
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
||||
@ -4252,79 +4258,140 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
|
||||
static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
|
||||
void *insn, int insn_len)
|
||||
{
|
||||
bool smep, smap, is_user;
|
||||
unsigned long cr4;
|
||||
u64 error_code;
|
||||
|
||||
/* Emulation is always possible when KVM has access to all guest state. */
|
||||
if (!sev_guest(vcpu->kvm))
|
||||
return true;
|
||||
|
||||
/* #UD and #GP should never be intercepted for SEV guests. */
|
||||
WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD |
|
||||
EMULTYPE_TRAP_UD_FORCED |
|
||||
EMULTYPE_VMWARE_GP));
|
||||
|
||||
/*
|
||||
* When the guest is an SEV-ES guest, emulation is not possible.
|
||||
* Emulation is impossible for SEV-ES guests as KVM doesn't have access
|
||||
* to guest register state.
|
||||
*/
|
||||
if (sev_es_guest(vcpu->kvm))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Detect and workaround Errata 1096 Fam_17h_00_0Fh.
|
||||
*
|
||||
* Errata:
|
||||
* When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is
|
||||
* possible that CPU microcode implementing DecodeAssist will fail
|
||||
* to read bytes of instruction which caused #NPF. In this case,
|
||||
* GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly
|
||||
* return 0 instead of the correct guest instruction bytes.
|
||||
*
|
||||
* This happens because CPU microcode reading instruction bytes
|
||||
* uses a special opcode which attempts to read data using CPL=0
|
||||
* privileges. The microcode reads CS:RIP and if it hits a SMAP
|
||||
* fault, it gives up and returns no instruction bytes.
|
||||
*
|
||||
* Detection:
|
||||
* We reach here in case CPU supports DecodeAssist, raised #NPF and
|
||||
* returned 0 in GuestIntrBytes field of the VMCB.
|
||||
* First, errata can only be triggered in case vCPU CR4.SMAP=1.
|
||||
* Second, if vCPU CR4.SMEP=1, errata could only be triggered
|
||||
* in case vCPU CPL==3 (Because otherwise guest would have triggered
|
||||
* a SMEP fault instead of #NPF).
|
||||
* Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL.
|
||||
* As most guests enable SMAP if they have also enabled SMEP, use above
|
||||
* logic in order to attempt minimize false-positive of detecting errata
|
||||
* while still preserving all cases semantic correctness.
|
||||
*
|
||||
* Workaround:
|
||||
* To determine what instruction the guest was executing, the hypervisor
|
||||
* will have to decode the instruction at the instruction pointer.
|
||||
*
|
||||
* In non SEV guest, hypervisor will be able to read the guest
|
||||
* memory to decode the instruction pointer when insn_len is zero
|
||||
* so we return true to indicate that decoding is possible.
|
||||
*
|
||||
* But in the SEV guest, the guest memory is encrypted with the
|
||||
* guest specific key and hypervisor will not be able to decode the
|
||||
* instruction pointer so we will not able to workaround it. Lets
|
||||
* print the error and request to kill the guest.
|
||||
* Emulation is possible if the instruction is already decoded, e.g.
|
||||
* when completing I/O after returning from userspace.
|
||||
*/
|
||||
if (likely(!insn || insn_len))
|
||||
if (emul_type & EMULTYPE_NO_DECODE)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If RIP is invalid, go ahead with emulation which will cause an
|
||||
* internal error exit.
|
||||
* Emulation is possible for SEV guests if and only if a prefilled
|
||||
* buffer containing the bytes of the intercepted instruction is
|
||||
* available. SEV guest memory is encrypted with a guest specific key
|
||||
* and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
|
||||
* decode garbage.
|
||||
*
|
||||
* Inject #UD if KVM reached this point without an instruction buffer.
|
||||
* In practice, this path should never be hit by a well-behaved guest,
|
||||
* e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
|
||||
* is still theoretically reachable, e.g. via unaccelerated fault-like
|
||||
* AVIC access, and needs to be handled by KVM to avoid putting the
|
||||
* guest into an infinite loop. Injecting #UD is somewhat arbitrary,
|
||||
* but its the least awful option given lack of insight into the guest.
|
||||
*/
|
||||
if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
|
||||
if (unlikely(!insn)) {
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Emulate for SEV guests if the insn buffer is not empty. The buffer
|
||||
* will be empty if the DecodeAssist microcode cannot fetch bytes for
|
||||
* the faulting instruction because the code fetch itself faulted, e.g.
|
||||
* the guest attempted to fetch from emulated MMIO or a guest page
|
||||
* table used to translate CS:RIP resides in emulated MMIO.
|
||||
*/
|
||||
if (likely(insn_len))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Detect and workaround Errata 1096 Fam_17h_00_0Fh.
|
||||
*
|
||||
* Errata:
|
||||
* When CPU raises #NPF on guest data access and vCPU CR4.SMAP=1, it is
|
||||
* possible that CPU microcode implementing DecodeAssist will fail to
|
||||
* read guest memory at CS:RIP and vmcb.GuestIntrBytes will incorrectly
|
||||
* be '0'. This happens because microcode reads CS:RIP using a _data_
|
||||
* loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode
|
||||
* gives up and does not fill the instruction bytes buffer.
|
||||
*
|
||||
* As above, KVM reaches this point iff the VM is an SEV guest, the CPU
|
||||
* supports DecodeAssist, a #NPF was raised, KVM's page fault handler
|
||||
* triggered emulation (e.g. for MMIO), and the CPU returned 0 in the
|
||||
* GuestIntrBytes field of the VMCB.
|
||||
*
|
||||
* This does _not_ mean that the erratum has been encountered, as the
|
||||
* DecodeAssist will also fail if the load for CS:RIP hits a legitimate
|
||||
* #PF, e.g. if the guest attempt to execute from emulated MMIO and
|
||||
* encountered a reserved/not-present #PF.
|
||||
*
|
||||
* To hit the erratum, the following conditions must be true:
|
||||
* 1. CR4.SMAP=1 (obviously).
|
||||
* 2. CR4.SMEP=0 || CPL=3. If SMEP=1 and CPL<3, the erratum cannot
|
||||
* have been hit as the guest would have encountered a SMEP
|
||||
* violation #PF, not a #NPF.
|
||||
* 3. The #NPF is not due to a code fetch, in which case failure to
|
||||
* retrieve the instruction bytes is legitimate (see abvoe).
|
||||
*
|
||||
* In addition, don't apply the erratum workaround if the #NPF occurred
|
||||
* while translating guest page tables (see below).
|
||||
*/
|
||||
error_code = to_svm(vcpu)->vmcb->control.exit_info_1;
|
||||
if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
|
||||
goto resume_guest;
|
||||
|
||||
cr4 = kvm_read_cr4(vcpu);
|
||||
smep = cr4 & X86_CR4_SMEP;
|
||||
smap = cr4 & X86_CR4_SMAP;
|
||||
is_user = svm_get_cpl(vcpu) == 3;
|
||||
if (smap && (!smep || is_user)) {
|
||||
if (!sev_guest(vcpu->kvm))
|
||||
return true;
|
||||
|
||||
pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
|
||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||
|
||||
/*
|
||||
* If the fault occurred in userspace, arbitrarily inject #GP
|
||||
* to avoid killing the guest and to hopefully avoid confusing
|
||||
* the guest kernel too much, e.g. injecting #PF would not be
|
||||
* coherent with respect to the guest's page tables. Request
|
||||
* triple fault if the fault occurred in the kernel as there's
|
||||
* no fault that KVM can inject without confusing the guest.
|
||||
* In practice, the triple fault is moot as no sane SEV kernel
|
||||
* will execute from user memory while also running with SMAP=1.
|
||||
*/
|
||||
if (is_user)
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
else
|
||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||
}
|
||||
|
||||
resume_guest:
|
||||
/*
|
||||
* If the erratum was not hit, simply resume the guest and let it fault
|
||||
* again. While awful, e.g. the vCPU may get stuck in an infinite loop
|
||||
* if the fault is at CPL=0, it's the lesser of all evils. Exiting to
|
||||
* userspace will kill the guest, and letting the emulator read garbage
|
||||
* will yield random behavior and potentially corrupt the guest.
|
||||
*
|
||||
* Simply resuming the guest is technically not a violation of the SEV
|
||||
* architecture. AMD's APM states that all code fetches and page table
|
||||
* accesses for SEV guest are encrypted, regardless of the C-Bit. The
|
||||
* APM also states that encrypted accesses to MMIO are "ignored", but
|
||||
* doesn't explicitly define "ignored", i.e. doing nothing and letting
|
||||
* the guest spin is technically "ignoring" the access.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -304,11 +304,6 @@ static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
|
||||
& ~VMCB_ALWAYS_DIRTY_MASK;
|
||||
}
|
||||
|
||||
static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit)
|
||||
{
|
||||
return (vmcb->control.clean & (1 << bit));
|
||||
}
|
||||
|
||||
static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
|
||||
{
|
||||
vmcb->control.clean &= ~(1 << bit);
|
||||
@ -525,7 +520,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
|
||||
|
||||
int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
|
||||
u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
|
||||
void svm_leave_nested(struct vcpu_svm *svm);
|
||||
void svm_leave_nested(struct kvm_vcpu *vcpu);
|
||||
void svm_free_nested(struct vcpu_svm *svm);
|
||||
int svm_allocate_nested(struct vcpu_svm *svm);
|
||||
int nested_svm_vmrun(struct kvm_vcpu *vcpu);
|
||||
|
@ -46,6 +46,9 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
|
||||
if (npt_enabled &&
|
||||
ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB)
|
||||
hve->hv_enlightenments_control.enlightened_npt_tlb = 1;
|
||||
|
||||
if (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)
|
||||
hve->hv_enlightenments_control.msr_bitmap = 1;
|
||||
}
|
||||
|
||||
static inline void svm_hv_hardware_setup(void)
|
||||
@ -83,14 +86,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
|
||||
struct hv_enlightenments *hve =
|
||||
(struct hv_enlightenments *)vmcb->control.reserved_sw;
|
||||
|
||||
/*
|
||||
* vmcb can be NULL if called during early vcpu init.
|
||||
* And its okay not to mark vmcb dirty during vcpu init
|
||||
* as we mark it dirty unconditionally towards end of vcpu
|
||||
* init phase.
|
||||
*/
|
||||
if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
|
||||
hve->hv_enlightenments_control.msr_bitmap)
|
||||
if (hve->hv_enlightenments_control.msr_bitmap)
|
||||
vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
|
||||
}
|
||||
|
||||
|
@ -54,7 +54,6 @@ struct nested_vmx_msrs {
|
||||
|
||||
struct vmcs_config {
|
||||
int size;
|
||||
int order;
|
||||
u32 basic_cap;
|
||||
u32 revision_id;
|
||||
u32 pin_based_exec_ctrl;
|
||||
|
@ -12,8 +12,6 @@
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(enable_evmcs);
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
|
||||
#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
|
||||
#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
|
||||
{EVMCS1_OFFSET(name), clean_field}
|
||||
@ -296,6 +294,7 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
|
||||
};
|
||||
const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
|
||||
{
|
||||
vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
|
||||
@ -362,6 +361,7 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
|
||||
case MSR_IA32_VMX_PROCBASED_CTLS2:
|
||||
ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
|
||||
break;
|
||||
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
|
||||
case MSR_IA32_VMX_PINBASED_CTLS:
|
||||
ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
|
||||
break;
|
||||
|
@ -59,12 +59,12 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
|
||||
SECONDARY_EXEC_SHADOW_VMCS | \
|
||||
SECONDARY_EXEC_TSC_SCALING | \
|
||||
SECONDARY_EXEC_PAUSE_LOOP_EXITING)
|
||||
#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
|
||||
#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \
|
||||
(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
|
||||
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
|
||||
#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
|
||||
#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
|
||||
struct evmcs_field {
|
||||
u16 offset;
|
||||
u16 clean_field;
|
||||
@ -73,26 +73,56 @@ struct evmcs_field {
|
||||
extern const struct evmcs_field vmcs_field_to_evmcs_1[];
|
||||
extern const unsigned int nr_evmcs_1_fields;
|
||||
|
||||
static __always_inline int get_evmcs_offset(unsigned long field,
|
||||
u16 *clean_field)
|
||||
static __always_inline int evmcs_field_offset(unsigned long field,
|
||||
u16 *clean_field)
|
||||
{
|
||||
unsigned int index = ROL16(field, 6);
|
||||
const struct evmcs_field *evmcs_field;
|
||||
|
||||
if (unlikely(index >= nr_evmcs_1_fields)) {
|
||||
WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n",
|
||||
field);
|
||||
if (unlikely(index >= nr_evmcs_1_fields))
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
evmcs_field = &vmcs_field_to_evmcs_1[index];
|
||||
|
||||
/*
|
||||
* Use offset=0 to detect holes in eVMCS. This offset belongs to
|
||||
* 'revision_id' but this field has no encoding and is supposed to
|
||||
* be accessed directly.
|
||||
*/
|
||||
if (unlikely(!evmcs_field->offset))
|
||||
return -ENOENT;
|
||||
|
||||
if (clean_field)
|
||||
*clean_field = evmcs_field->clean_field;
|
||||
|
||||
return evmcs_field->offset;
|
||||
}
|
||||
|
||||
static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs,
|
||||
unsigned long field, u16 offset)
|
||||
{
|
||||
/*
|
||||
* vmcs12_read_any() doesn't care whether the supplied structure
|
||||
* is 'struct vmcs12' or 'struct hv_enlightened_vmcs' as it takes
|
||||
* the exact offset of the required field, use it for convenience
|
||||
* here.
|
||||
*/
|
||||
return vmcs12_read_any((void *)evmcs, field, offset);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
|
||||
static __always_inline int get_evmcs_offset(unsigned long field,
|
||||
u16 *clean_field)
|
||||
{
|
||||
int offset = evmcs_field_offset(field, clean_field);
|
||||
|
||||
WARN_ONCE(offset < 0, "KVM: accessing unsupported EVMCS field %lx\n",
|
||||
field);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static __always_inline void evmcs_write64(unsigned long field, u64 value)
|
||||
{
|
||||
u16 clean_field;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <asm/mmu_context.h>
|
||||
|
||||
#include "cpuid.h"
|
||||
#include "evmcs.h"
|
||||
#include "hyperv.h"
|
||||
#include "mmu.h"
|
||||
#include "nested.h"
|
||||
@ -4851,18 +4852,20 @@ static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
|
||||
struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
|
||||
|
||||
/*
|
||||
* We should allocate a shadow vmcs for vmcs01 only when L1
|
||||
* executes VMXON and free it when L1 executes VMXOFF.
|
||||
* As it is invalid to execute VMXON twice, we shouldn't reach
|
||||
* here when vmcs01 already have an allocated shadow vmcs.
|
||||
* KVM allocates a shadow VMCS only when L1 executes VMXON and frees it
|
||||
* when L1 executes VMXOFF or the vCPU is forced out of nested
|
||||
* operation. VMXON faults if the CPU is already post-VMXON, so it
|
||||
* should be impossible to already have an allocated shadow VMCS. KVM
|
||||
* doesn't support virtualization of VMCS shadowing, so vmcs01 should
|
||||
* always be the loaded VMCS.
|
||||
*/
|
||||
WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
|
||||
if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs))
|
||||
return loaded_vmcs->shadow_vmcs;
|
||||
|
||||
loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
|
||||
if (loaded_vmcs->shadow_vmcs)
|
||||
vmcs_clear(loaded_vmcs->shadow_vmcs);
|
||||
|
||||
if (!loaded_vmcs->shadow_vmcs) {
|
||||
loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
|
||||
if (loaded_vmcs->shadow_vmcs)
|
||||
vmcs_clear(loaded_vmcs->shadow_vmcs);
|
||||
}
|
||||
return loaded_vmcs->shadow_vmcs;
|
||||
}
|
||||
|
||||
@ -5099,27 +5102,49 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
|
||||
if (!nested_vmx_check_permission(vcpu))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
|
||||
* any VMREAD sets the ALU flags for VMfailInvalid.
|
||||
*/
|
||||
if (vmx->nested.current_vmptr == INVALID_GPA ||
|
||||
(is_guest_mode(vcpu) &&
|
||||
get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
|
||||
return nested_vmx_failInvalid(vcpu);
|
||||
|
||||
/* Decode instruction info and find the field to read */
|
||||
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
|
||||
|
||||
offset = vmcs_field_to_offset(field);
|
||||
if (offset < 0)
|
||||
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
|
||||
if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
|
||||
/*
|
||||
* In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
|
||||
* any VMREAD sets the ALU flags for VMfailInvalid.
|
||||
*/
|
||||
if (vmx->nested.current_vmptr == INVALID_GPA ||
|
||||
(is_guest_mode(vcpu) &&
|
||||
get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
|
||||
return nested_vmx_failInvalid(vcpu);
|
||||
|
||||
if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
|
||||
copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
|
||||
offset = get_vmcs12_field_offset(field);
|
||||
if (offset < 0)
|
||||
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
|
||||
|
||||
/* Read the field, zero-extended to a u64 value */
|
||||
value = vmcs12_read_any(vmcs12, field, offset);
|
||||
if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
|
||||
copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
|
||||
|
||||
/* Read the field, zero-extended to a u64 value */
|
||||
value = vmcs12_read_any(vmcs12, field, offset);
|
||||
} else {
|
||||
/*
|
||||
* Hyper-V TLFS (as of 6.0b) explicitly states, that while an
|
||||
* enlightened VMCS is active VMREAD/VMWRITE instructions are
|
||||
* unsupported. Unfortunately, certain versions of Windows 11
|
||||
* don't comply with this requirement which is not enforced in
|
||||
* genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a
|
||||
* workaround, as misbehaving guests will panic on VM-Fail.
|
||||
* Note, enlightened VMCS is incompatible with shadow VMCS so
|
||||
* all VMREADs from L2 should go to L1.
|
||||
*/
|
||||
if (WARN_ON_ONCE(is_guest_mode(vcpu)))
|
||||
return nested_vmx_failInvalid(vcpu);
|
||||
|
||||
offset = evmcs_field_offset(field, NULL);
|
||||
if (offset < 0)
|
||||
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
|
||||
|
||||
/* Read the field, zero-extended to a u64 value */
|
||||
value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now copy part of this value to register or memory, as requested.
|
||||
@ -5214,7 +5239,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
|
||||
|
||||
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
|
||||
|
||||
offset = vmcs_field_to_offset(field);
|
||||
offset = get_vmcs12_field_offset(field);
|
||||
if (offset < 0)
|
||||
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
|
||||
|
||||
@ -6462,7 +6487,7 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void)
|
||||
max_idx = 0;
|
||||
for (i = 0; i < nr_vmcs12_fields; i++) {
|
||||
/* The vmcs12 table is very, very sparsely populated. */
|
||||
if (!vmcs_field_to_offset_table[i])
|
||||
if (!vmcs12_field_offsets[i])
|
||||
continue;
|
||||
|
||||
idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
|
||||
@ -6771,6 +6796,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
|
||||
}
|
||||
|
||||
struct kvm_x86_nested_ops vmx_nested_ops = {
|
||||
.leave_nested = vmx_leave_nested,
|
||||
.check_events = vmx_check_nested_events,
|
||||
.hv_timer_pending = nested_vmx_preemption_timer_pending,
|
||||
.triple_fault = nested_vmx_triple_fault,
|
||||
|
@ -8,7 +8,7 @@
|
||||
FIELD(number, name), \
|
||||
[ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
|
||||
|
||||
const unsigned short vmcs_field_to_offset_table[] = {
|
||||
const unsigned short vmcs12_field_offsets[] = {
|
||||
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
|
||||
FIELD(POSTED_INTR_NV, posted_intr_nv),
|
||||
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
|
||||
@ -151,4 +151,4 @@ const unsigned short vmcs_field_to_offset_table[] = {
|
||||
FIELD(HOST_RSP, host_rsp),
|
||||
FIELD(HOST_RIP, host_rip),
|
||||
};
|
||||
const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs_field_to_offset_table);
|
||||
const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets);
|
||||
|
@ -361,10 +361,10 @@ static inline void vmx_check_vmcs12_offsets(void)
|
||||
CHECK_OFFSET(guest_pml_index, 996);
|
||||
}
|
||||
|
||||
extern const unsigned short vmcs_field_to_offset_table[];
|
||||
extern const unsigned short vmcs12_field_offsets[];
|
||||
extern const unsigned int nr_vmcs12_fields;
|
||||
|
||||
static inline short vmcs_field_to_offset(unsigned long field)
|
||||
static inline short get_vmcs12_field_offset(unsigned long field)
|
||||
{
|
||||
unsigned short offset;
|
||||
unsigned int index;
|
||||
@ -377,7 +377,7 @@ static inline short vmcs_field_to_offset(unsigned long field)
|
||||
return -ENOENT;
|
||||
|
||||
index = array_index_nospec(index, nr_vmcs12_fields);
|
||||
offset = vmcs_field_to_offset_table[index];
|
||||
offset = vmcs12_field_offsets[index];
|
||||
if (offset == 0)
|
||||
return -ENOENT;
|
||||
return offset;
|
||||
|
@ -1487,11 +1487,12 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
|
||||
static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
|
||||
void *insn, int insn_len)
|
||||
{
|
||||
/*
|
||||
* Emulation of instructions in SGX enclaves is impossible as RIP does
|
||||
* not point tthe failing instruction, and even if it did, the code
|
||||
* not point at the failing instruction, and even if it did, the code
|
||||
* stream is inaccessible. Inject #UD instead of exiting to userspace
|
||||
* so that guest userspace can't DoS the guest simply by triggering
|
||||
* emulation (enclaves are CPL3 only).
|
||||
@ -2603,7 +2604,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
return -EIO;
|
||||
|
||||
vmcs_conf->size = vmx_msr_high & 0x1fff;
|
||||
vmcs_conf->order = get_order(vmcs_conf->size);
|
||||
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
|
||||
|
||||
vmcs_conf->revision_id = vmx_msr_low;
|
||||
@ -2628,7 +2628,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
|
||||
struct page *pages;
|
||||
struct vmcs *vmcs;
|
||||
|
||||
pages = __alloc_pages_node(node, flags, vmcs_config.order);
|
||||
pages = __alloc_pages_node(node, flags, 0);
|
||||
if (!pages)
|
||||
return NULL;
|
||||
vmcs = page_address(pages);
|
||||
@ -2647,7 +2647,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
|
||||
|
||||
void free_vmcs(struct vmcs *vmcs)
|
||||
{
|
||||
free_pages((unsigned long)vmcs, vmcs_config.order);
|
||||
free_page((unsigned long)vmcs);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4094,10 +4094,14 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
|
||||
vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
|
||||
|
||||
/*
|
||||
* If 32-bit syscall is enabled, vmx_vcpu_load_vcms rewrites
|
||||
* HOST_IA32_SYSENTER_ESP.
|
||||
* SYSENTER is used for 32-bit system calls on either 32-bit or
|
||||
* 64-bit kernels. It is always zero If neither is allowed, otherwise
|
||||
* vmx_vcpu_load_vmcs loads it with the per-CPU entry stack (and may
|
||||
* have already done so!).
|
||||
*/
|
||||
vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
|
||||
if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32))
|
||||
vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
|
||||
|
||||
rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
|
||||
vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
|
||||
|
||||
@ -4901,8 +4905,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||
dr6 = vmx_get_exit_qual(vcpu);
|
||||
if (!(vcpu->guest_debug &
|
||||
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
|
||||
/*
|
||||
* If the #DB was due to ICEBP, a.k.a. INT1, skip the
|
||||
* instruction. ICEBP generates a trap-like #DB, but
|
||||
* despite its interception control being tied to #DB,
|
||||
* is an instruction intercept, i.e. the VM-Exit occurs
|
||||
* on the ICEBP itself. Note, skipping ICEBP also
|
||||
* clears STI and MOVSS blocking.
|
||||
*
|
||||
* For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
|
||||
* if single-step is enabled in RFLAGS and STI or MOVSS
|
||||
* blocking is active, as the CPU doesn't set the bit
|
||||
* on VM-Exit due to #DB interception. VM-Entry has a
|
||||
* consistency check that a single-step #DB is pending
|
||||
* in this scenario as the previous instruction cannot
|
||||
* have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV
|
||||
* don't modify RFLAGS), therefore the one instruction
|
||||
* delay when activating single-step breakpoints must
|
||||
* have already expired. Note, the CPU sets/clears BS
|
||||
* as appropriate for all other VM-Exits types.
|
||||
*/
|
||||
if (is_icebp(intr_info))
|
||||
WARN_ON(!skip_emulated_instruction(vcpu));
|
||||
else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
|
||||
(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
|
||||
(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
|
||||
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
|
||||
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
|
||||
|
||||
kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
|
||||
return 1;
|
||||
@ -5397,7 +5426,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
gpa_t gpa;
|
||||
|
||||
if (!vmx_can_emulate_instruction(vcpu, NULL, 0))
|
||||
if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
|
@ -3535,6 +3535,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
if (data & ~supported_xss)
|
||||
return 1;
|
||||
vcpu->arch.ia32_xss = data;
|
||||
kvm_update_cpuid_runtime(vcpu);
|
||||
break;
|
||||
case MSR_SMI_COUNT:
|
||||
if (!msr_info->host_initiated)
|
||||
@ -4229,6 +4230,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_SREGS2:
|
||||
case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
|
||||
case KVM_CAP_VCPU_ATTRIBUTES:
|
||||
case KVM_CAP_SYS_ATTRIBUTES:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_EXIT_HYPERCALL:
|
||||
@ -4331,7 +4333,49 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
break;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr)
|
||||
{
|
||||
void __user *uaddr = (void __user*)(unsigned long)attr->addr;
|
||||
|
||||
if ((u64)(unsigned long)uaddr != attr->addr)
|
||||
return ERR_PTR(-EFAULT);
|
||||
return uaddr;
|
||||
}
|
||||
|
||||
static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
|
||||
{
|
||||
u64 __user *uaddr = kvm_get_attr_addr(attr);
|
||||
|
||||
if (attr->group)
|
||||
return -ENXIO;
|
||||
|
||||
if (IS_ERR(uaddr))
|
||||
return PTR_ERR(uaddr);
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_X86_XCOMP_GUEST_SUPP:
|
||||
if (put_user(supported_xcr0, uaddr))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
default:
|
||||
return -ENXIO;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr)
|
||||
{
|
||||
if (attr->group)
|
||||
return -ENXIO;
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_X86_XCOMP_GUEST_SUPP:
|
||||
return 0;
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
}
|
||||
|
||||
long kvm_arch_dev_ioctl(struct file *filp,
|
||||
@ -4422,6 +4466,22 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
case KVM_GET_SUPPORTED_HV_CPUID:
|
||||
r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
|
||||
break;
|
||||
case KVM_GET_DEVICE_ATTR: {
|
||||
struct kvm_device_attr attr;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
|
||||
break;
|
||||
r = kvm_x86_dev_get_attr(&attr);
|
||||
break;
|
||||
}
|
||||
case KVM_HAS_DEVICE_ATTR: {
|
||||
struct kvm_device_attr attr;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
|
||||
break;
|
||||
r = kvm_x86_dev_has_attr(&attr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
@ -4860,8 +4920,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.apic->sipi_vector = events->sipi_vector;
|
||||
|
||||
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
|
||||
if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
|
||||
if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
|
||||
kvm_x86_ops.nested_ops->leave_nested(vcpu);
|
||||
kvm_smm_changed(vcpu, events->smi.smm);
|
||||
}
|
||||
|
||||
vcpu->arch.smi_pending = events->smi.pending;
|
||||
|
||||
@ -5022,11 +5084,11 @@ static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu,
|
||||
static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr;
|
||||
u64 __user *uaddr = kvm_get_attr_addr(attr);
|
||||
int r;
|
||||
|
||||
if ((u64)(unsigned long)uaddr != attr->addr)
|
||||
return -EFAULT;
|
||||
if (IS_ERR(uaddr))
|
||||
return PTR_ERR(uaddr);
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_VCPU_TSC_OFFSET:
|
||||
@ -5045,12 +5107,12 @@ static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
|
||||
static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr;
|
||||
u64 __user *uaddr = kvm_get_attr_addr(attr);
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
int r;
|
||||
|
||||
if ((u64)(unsigned long)uaddr != attr->addr)
|
||||
return -EFAULT;
|
||||
if (IS_ERR(uaddr))
|
||||
return PTR_ERR(uaddr);
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_VCPU_TSC_OFFSET: {
|
||||
@ -6810,6 +6872,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
|
||||
|
||||
static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type,
|
||||
void *insn, int insn_len)
|
||||
{
|
||||
return static_call(kvm_x86_can_emulate_instruction)(vcpu, emul_type,
|
||||
insn, insn_len);
|
||||
}
|
||||
|
||||
int handle_ud(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
|
||||
@ -6817,7 +6886,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
|
||||
char sig[5]; /* ud2; .ascii "kvm" */
|
||||
struct x86_exception e;
|
||||
|
||||
if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0)))
|
||||
if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0)))
|
||||
return 1;
|
||||
|
||||
if (force_emulation_prefix &&
|
||||
@ -8193,7 +8262,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
bool writeback = true;
|
||||
bool write_fault_to_spt;
|
||||
|
||||
if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len)))
|
||||
if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len)))
|
||||
return 1;
|
||||
|
||||
vcpu->arch.l1tf_flush_l1d = true;
|
||||
@ -9706,7 +9775,7 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
|
||||
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
|
||||
}
|
||||
|
||||
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
|
||||
static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
return;
|
||||
@ -11209,7 +11278,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
|
||||
vcpu->arch.msr_misc_features_enables = 0;
|
||||
|
||||
vcpu->arch.xcr0 = XFEATURE_MASK_FP;
|
||||
__kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
|
||||
__kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
|
||||
}
|
||||
|
||||
/* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */
|
||||
@ -11226,8 +11296,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
|
||||
kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
|
||||
|
||||
vcpu->arch.ia32_xss = 0;
|
||||
|
||||
static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
|
||||
|
||||
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
|
||||
|
@ -316,10 +316,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
|
||||
"\tnotq %0\n"
|
||||
"\t" LOCK_PREFIX "andq %0, %2\n"
|
||||
"2:\n"
|
||||
"\t.section .fixup,\"ax\"\n"
|
||||
"3:\tjmp\t2b\n"
|
||||
"\t.previous\n"
|
||||
_ASM_EXTABLE_UA(1b, 3b)
|
||||
_ASM_EXTABLE_UA(1b, 2b)
|
||||
: "=r" (evtchn_pending_sel),
|
||||
"+m" (vi->evtchn_pending_sel),
|
||||
"+m" (v->arch.xen.evtchn_pending_sel)
|
||||
@ -335,10 +332,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
|
||||
"\tnotl %0\n"
|
||||
"\t" LOCK_PREFIX "andl %0, %2\n"
|
||||
"2:\n"
|
||||
"\t.section .fixup,\"ax\"\n"
|
||||
"3:\tjmp\t2b\n"
|
||||
"\t.previous\n"
|
||||
_ASM_EXTABLE_UA(1b, 3b)
|
||||
_ASM_EXTABLE_UA(1b, 2b)
|
||||
: "=r" (evtchn_pending_sel32),
|
||||
"+m" (vi->evtchn_pending_sel),
|
||||
"+m" (v->arch.xen.evtchn_pending_sel)
|
||||
|
@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
|
||||
#define KVM_CAP_VM_GPA_BITS 207
|
||||
#define KVM_CAP_XSAVE2 208
|
||||
#define KVM_CAP_SYS_ATTRIBUTES 209
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -452,6 +452,9 @@ struct kvm_sync_regs {
|
||||
|
||||
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
|
||||
|
||||
/* attributes for system fd (group 0) */
|
||||
#define KVM_X86_XCOMP_GUEST_SUPP 0
|
||||
|
||||
struct kvm_vmx_nested_state_data {
|
||||
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
|
||||
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
|
||||
|
@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
|
||||
#define KVM_CAP_VM_GPA_BITS 207
|
||||
#define KVM_CAP_XSAVE2 208
|
||||
#define KVM_CAP_SYS_ATTRIBUTES 209
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -85,6 +85,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/amx_test
|
||||
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
|
||||
TEST_GEN_PROGS_x86_64 += demand_paging_test
|
||||
TEST_GEN_PROGS_x86_64 += dirty_log_test
|
||||
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
|
||||
|
@ -345,7 +345,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
|
||||
* guest_code - The vCPU's entry point
|
||||
*/
|
||||
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
|
||||
void vm_xsave_req_perm(void);
|
||||
|
||||
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
|
||||
|
||||
|
@ -458,6 +458,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
|
||||
struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
|
||||
void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
void vm_xsave_req_perm(int bit);
|
||||
|
||||
enum x86_page_size {
|
||||
X86_PAGE_SIZE_4K = 0,
|
||||
|
@ -393,13 +393,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
|
||||
struct kvm_vm *vm;
|
||||
int i;
|
||||
|
||||
#ifdef __x86_64__
|
||||
/*
|
||||
* Permission needs to be requested before KVM_SET_CPUID2.
|
||||
*/
|
||||
vm_xsave_req_perm();
|
||||
#endif
|
||||
|
||||
/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
|
||||
if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
|
||||
slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
|
||||
|
@ -665,16 +665,31 @@ static bool is_xfd_supported(void)
|
||||
return !!(eax & CPUID_XFD_BIT);
|
||||
}
|
||||
|
||||
void vm_xsave_req_perm(void)
|
||||
void vm_xsave_req_perm(int bit)
|
||||
{
|
||||
unsigned long bitmask;
|
||||
int kvm_fd;
|
||||
u64 bitmask;
|
||||
long rc;
|
||||
struct kvm_device_attr attr = {
|
||||
.group = 0,
|
||||
.attr = KVM_X86_XCOMP_GUEST_SUPP,
|
||||
.addr = (unsigned long) &bitmask
|
||||
};
|
||||
|
||||
kvm_fd = open_kvm_dev_path_or_exit();
|
||||
rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
|
||||
close(kvm_fd);
|
||||
if (rc == -1 && (errno == ENXIO || errno == EINVAL))
|
||||
exit(KSFT_SKIP);
|
||||
TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
|
||||
if (!(bitmask & (1ULL << bit)))
|
||||
exit(KSFT_SKIP);
|
||||
|
||||
if (!is_xfd_supported())
|
||||
return;
|
||||
exit(KSFT_SKIP);
|
||||
|
||||
rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
|
||||
|
||||
rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
|
||||
XSTATE_XTILE_DATA_BIT);
|
||||
/*
|
||||
* The older kernel version(<5.15) can't support
|
||||
* ARCH_REQ_XCOMP_GUEST_PERM and directly return.
|
||||
@ -684,7 +699,7 @@ void vm_xsave_req_perm(void)
|
||||
|
||||
rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
|
||||
TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
|
||||
TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK,
|
||||
TEST_ASSERT(bitmask & (1ULL << bit),
|
||||
"prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
|
||||
bitmask);
|
||||
}
|
||||
|
@ -329,6 +329,8 @@ int main(int argc, char *argv[])
|
||||
u32 amx_offset;
|
||||
int stage, ret;
|
||||
|
||||
vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT);
|
||||
|
||||
/* Create VM */
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_code);
|
||||
|
||||
|
@ -105,7 +105,6 @@ static void guest_code(void *arg)
|
||||
|
||||
if (cpu_has_svm()) {
|
||||
run_guest(svm->vmcb, svm->vmcb_gpa);
|
||||
svm->vmcb->save.rip += 3;
|
||||
run_guest(svm->vmcb, svm->vmcb_gpa);
|
||||
} else {
|
||||
vmlaunch();
|
||||
|
@ -463,8 +463,8 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
|
||||
link, srcu_read_lock_held(&kvm->irq_srcu))
|
||||
if (kian->gsi == gsi) {
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
return true;
|
||||
@ -480,8 +480,8 @@ void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
|
||||
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
|
||||
link, srcu_read_lock_held(&kvm->irq_srcu))
|
||||
if (kian->gsi == gsi)
|
||||
kian->irq_acked(kian);
|
||||
}
|
||||
|
@ -2248,7 +2248,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
|
||||
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
|
||||
|
||||
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
@ -2463,9 +2462,8 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn)
|
||||
}
|
||||
|
||||
static int hva_to_pfn_remapped(struct vm_area_struct *vma,
|
||||
unsigned long addr, bool *async,
|
||||
bool write_fault, bool *writable,
|
||||
kvm_pfn_t *p_pfn)
|
||||
unsigned long addr, bool write_fault,
|
||||
bool *writable, kvm_pfn_t *p_pfn)
|
||||
{
|
||||
kvm_pfn_t pfn;
|
||||
pte_t *ptep;
|
||||
@ -2575,7 +2573,7 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
|
||||
if (vma == NULL)
|
||||
pfn = KVM_PFN_ERR_FAULT;
|
||||
else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
|
||||
r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn);
|
||||
r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn);
|
||||
if (r == -EAGAIN)
|
||||
goto retry;
|
||||
if (r < 0)
|
||||
|
Loading…
Reference in New Issue
Block a user