mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-04 04:04:19 +00:00
* Doc fixes
* selftests fixes * Add runstate information to the new Xen support * Allow compiling out the Xen interface * 32-bit PAE without EPT bugfix * NULL pointer dereference bugfix -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmA+lGcUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroMaMQf/Q8bQr5vVAeNk+1MyRmzNqFEbLqbe h50f4Wd2N+svZ6XinQH1vvuQm1WYj/g616Q3nCeYwCJyY34g5tf60XcuAMnVRIzw qc2IUvSAJ3faVElMrSA5thN3bkPzJpRrdIpQGBgOd+rT+eQkPSsJlTy34JJmvbmh xFGjoVj49tYEkFfpxEbtytW6QiYtPz/ai8SARRXbEUWO/pVzdkgK5XWshRhE9vpB GLCEXUngdPokJMblRMuK4YOSFQXXHobAJAgPwSzguDV41qezXaKOGYOLe7+V+0kH z607RnQc1wGgsLanT13okYMQr09/XCjpvFkZ9CK2bIJPsyWP+ihA/37hVQ== =1GNo -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM fixes from Paolo Bonzini: - Doc fixes - selftests fixes - Add runstate information to the new Xen support - Allow compiling out the Xen interface - 32-bit PAE without EPT bugfix - NULL pointer dereference bugfix * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: SVM: Clear the CR4 register on reset KVM: x86/xen: Add support for vCPU runstate information KVM: x86/xen: Fix return code when clearing vcpu_info and vcpu_time_info selftests: kvm: Mmap the entire vcpu mmap area KVM: Documentation: Fix index for KVM_CAP_PPC_DAWR1 KVM: x86: allow compiling out the Xen hypercall interface KVM: xen: flush deferred static key before checking it KVM: x86/mmu: Set SPTE_AD_WRPROT_ONLY_MASK if and only if PML is enabled KVM: x86: hyper-v: Fix Hyper-V context null-ptr-deref KVM: x86: remove misplaced comment on active_mmu_pages KVM: Documentation: rectify rst markup in kvm_run->flags Documentation: kvm: fix messy conversion from .txt to .rst
This commit is contained in:
commit
cee407c5cc
@ -3856,49 +3856,20 @@ base 2 of the page size in the bottom 6 bits.
|
||||
-EFAULT if struct kvm_reinject_control cannot be read,
|
||||
-EINVAL if the supplied shift or flags are invalid,
|
||||
-ENOMEM if unable to allocate the new HPT,
|
||||
-ENOSPC if there was a hash collision
|
||||
|
||||
::
|
||||
|
||||
struct kvm_ppc_rmmu_info {
|
||||
struct kvm_ppc_radix_geom {
|
||||
__u8 page_shift;
|
||||
__u8 level_bits[4];
|
||||
__u8 pad[3];
|
||||
} geometries[8];
|
||||
__u32 ap_encodings[8];
|
||||
};
|
||||
|
||||
The geometries[] field gives up to 8 supported geometries for the
|
||||
radix page table, in terms of the log base 2 of the smallest page
|
||||
size, and the number of bits indexed at each level of the tree, from
|
||||
the PTE level up to the PGD level in that order. Any unused entries
|
||||
will have 0 in the page_shift field.
|
||||
|
||||
The ap_encodings gives the supported page sizes and their AP field
|
||||
encodings, encoded with the AP value in the top 3 bits and the log
|
||||
base 2 of the page size in the bottom 6 bits.
|
||||
|
||||
4.102 KVM_PPC_RESIZE_HPT_PREPARE
|
||||
--------------------------------
|
||||
|
||||
:Capability: KVM_CAP_SPAPR_RESIZE_HPT
|
||||
:Architectures: powerpc
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_ppc_resize_hpt (in)
|
||||
:Returns: 0 on successful completion,
|
||||
>0 if a new HPT is being prepared, the value is an estimated
|
||||
number of milliseconds until preparation is complete,
|
||||
-EFAULT if struct kvm_reinject_control cannot be read,
|
||||
-EINVAL if the supplied shift or flags are invalid,when moving existing
|
||||
HPT entries to the new HPT,
|
||||
-EIO on other error conditions
|
||||
|
||||
Used to implement the PAPR extension for runtime resizing of a guest's
|
||||
Hashed Page Table (HPT). Specifically this starts, stops or monitors
|
||||
the preparation of a new potential HPT for the guest, essentially
|
||||
implementing the H_RESIZE_HPT_PREPARE hypercall.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_ppc_resize_hpt {
|
||||
__u64 flags;
|
||||
__u32 shift;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
If called with shift > 0 when there is no pending HPT for the guest,
|
||||
this begins preparation of a new pending HPT of size 2^(shift) bytes.
|
||||
It then returns a positive integer with the estimated number of
|
||||
@ -3926,14 +3897,6 @@ Normally this will be called repeatedly with the same parameters until
|
||||
it returns <= 0. The first call will initiate preparation, subsequent
|
||||
ones will monitor preparation until it completes or fails.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_ppc_resize_hpt {
|
||||
__u64 flags;
|
||||
__u32 shift;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
4.103 KVM_PPC_RESIZE_HPT_COMMIT
|
||||
-------------------------------
|
||||
|
||||
@ -3956,6 +3919,14 @@ Hashed Page Table (HPT). Specifically this requests that the guest be
|
||||
transferred to working with the new HPT, essentially implementing the
|
||||
H_RESIZE_HPT_COMMIT hypercall.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_ppc_resize_hpt {
|
||||
__u64 flags;
|
||||
__u32 shift;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has
|
||||
returned 0 with the same parameters. In other cases
|
||||
KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or
|
||||
@ -3971,14 +3942,6 @@ HPT and the previous HPT will be discarded.
|
||||
|
||||
On failure, the guest will still be operating on its previous HPT.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_ppc_resize_hpt {
|
||||
__u64 flags;
|
||||
__u32 shift;
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
|
||||
-----------------------------------
|
||||
|
||||
@ -4915,6 +4878,14 @@ see KVM_XEN_HVM_SET_ATTR above.
|
||||
union {
|
||||
__u64 gpa;
|
||||
__u64 pad[4];
|
||||
struct {
|
||||
__u64 state;
|
||||
__u64 state_entry_time;
|
||||
__u64 time_running;
|
||||
__u64 time_runnable;
|
||||
__u64 time_blocked;
|
||||
__u64 time_offline;
|
||||
} runstate;
|
||||
} u;
|
||||
};
|
||||
|
||||
@ -4927,6 +4898,31 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
|
||||
Sets the guest physical address of an additional pvclock structure
|
||||
for a given vCPU. This is typically used for guest vsyscall support.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
|
||||
Sets the guest physical address of the vcpu_runstate_info for a given
|
||||
vCPU. This is how a Xen guest tracks CPU state such as steal time.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
|
||||
Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of
|
||||
the given vCPU from the .u.runstate.state member of the structure.
|
||||
KVM automatically accounts running and runnable time but blocked
|
||||
and offline states are only entered explicitly.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA
|
||||
Sets all fields of the vCPU runstate data from the .u.runstate member
|
||||
of the structure, including the current runstate. The state_entry_time
|
||||
must equal the sum of the other four times.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
|
||||
This *adds* the contents of the .u.runstate members of the structure
|
||||
to the corresponding members of the given vCPU's runstate data, thus
|
||||
permitting atomic adjustments to the runstate times. The adjustment
|
||||
to the state_entry_time must equal the sum of the adjustments to the
|
||||
other four times. The state field must be set to -1, or to a valid
|
||||
runstate value (RUNSTATE_running, RUNSTATE_runnable, RUNSTATE_blocked
|
||||
or RUNSTATE_offline) to set the current accounted state as of the
|
||||
adjusted state_entry_time.
|
||||
|
||||
4.130 KVM_XEN_VCPU_GET_ATTR
|
||||
---------------------------
|
||||
|
||||
@ -4939,6 +4935,9 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
|
||||
Allows Xen vCPU attributes to be read. For the structure and types,
|
||||
see KVM_XEN_VCPU_SET_ATTR above.
|
||||
|
||||
The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
|
||||
with the KVM_XEN_VCPU_GET_ATTR ioctl.
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
|
||||
@ -5000,7 +4999,8 @@ local APIC is not used.
|
||||
__u16 flags;
|
||||
|
||||
More architecture-specific flags detailing state of the VCPU that may
|
||||
affect the device's behavior. Current defined flags:
|
||||
affect the device's behavior. Current defined flags::
|
||||
|
||||
/* x86, set if the VCPU is in system management mode */
|
||||
#define KVM_RUN_X86_SMM (1 << 0)
|
||||
/* x86, set if bus lock detected in VM */
|
||||
@ -6217,7 +6217,7 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit
|
||||
notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
|
||||
KVM_RUN_BUS_LOCK flag is used to distinguish between them.
|
||||
|
||||
7.22 KVM_CAP_PPC_DAWR1
|
||||
7.23 KVM_CAP_PPC_DAWR1
|
||||
----------------------
|
||||
|
||||
:Architectures: ppc
|
||||
@ -6702,6 +6702,7 @@ PVHVM guests. Valid flags are::
|
||||
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
|
||||
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
|
||||
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2)
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
|
||||
ioctl is available, for the guest to set its hypercall page.
|
||||
@ -6716,3 +6717,7 @@ KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and
|
||||
KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
|
||||
for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
|
||||
vcpu_info is set.
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
|
||||
features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
|
||||
supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
|
||||
|
@ -535,10 +535,16 @@ struct kvm_vcpu_hv {
|
||||
/* Xen HVM per vcpu emulation context */
|
||||
struct kvm_vcpu_xen {
|
||||
u64 hypercall_rip;
|
||||
u32 current_runstate;
|
||||
bool vcpu_info_set;
|
||||
bool vcpu_time_info_set;
|
||||
bool runstate_set;
|
||||
struct gfn_to_hva_cache vcpu_info_cache;
|
||||
struct gfn_to_hva_cache vcpu_time_info_cache;
|
||||
struct gfn_to_hva_cache runstate_cache;
|
||||
u64 last_steal;
|
||||
u64 runstate_entry_time;
|
||||
u64 runstate_times[4];
|
||||
};
|
||||
|
||||
struct kvm_vcpu_arch {
|
||||
@ -939,9 +945,6 @@ struct kvm_arch {
|
||||
unsigned int indirect_shadow_pages;
|
||||
u8 mmu_valid_gen;
|
||||
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
|
||||
/*
|
||||
* Hash table of struct kvm_mmu_page.
|
||||
*/
|
||||
struct list_head active_mmu_pages;
|
||||
struct list_head zapped_obsolete_pages;
|
||||
struct list_head lpage_disallowed_mmu_pages;
|
||||
|
@ -103,6 +103,15 @@ config KVM_AMD_SEV
|
||||
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
|
||||
with Encrypted State (SEV-ES) on AMD processors.
|
||||
|
||||
config KVM_XEN
|
||||
bool "Support for Xen hypercall interface"
|
||||
depends on KVM
|
||||
help
|
||||
Provides KVM support for the hosting Xen HVM guests and
|
||||
passing Xen hypercalls to userspace.
|
||||
|
||||
If in doubt, say "N".
|
||||
|
||||
config KVM_MMU_AUDIT
|
||||
bool "Audit KVM MMU"
|
||||
depends on KVM && TRACEPOINTS
|
||||
|
@ -14,11 +14,12 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
|
||||
$(KVM)/dirty_ring.o
|
||||
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
|
||||
|
||||
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o xen.o \
|
||||
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
|
||||
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
|
||||
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
|
||||
mmu/spte.o
|
||||
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
|
||||
kvm-$(CONFIG_KVM_XEN) += xen.o
|
||||
|
||||
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
|
||||
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
|
||||
|
@ -159,7 +159,7 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
|
||||
struct kvm_vcpu_hv_synic *synic;
|
||||
|
||||
vcpu = get_vcpu_by_vpidx(kvm, vpidx);
|
||||
if (!vcpu)
|
||||
if (!vcpu || !to_hv_vcpu(vcpu))
|
||||
return NULL;
|
||||
synic = to_hv_synic(vcpu);
|
||||
return (synic->active) ? synic : NULL;
|
||||
|
@ -81,15 +81,15 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
|
||||
static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* When using the EPT page-modification log, the GPAs in the log
|
||||
* would come from L2 rather than L1. Therefore, we need to rely
|
||||
* on write protection to record dirty pages. This also bypasses
|
||||
* PML, since writes now result in a vmexit. Note, this helper will
|
||||
* tag SPTEs as needing write-protection even if PML is disabled or
|
||||
* unsupported, but that's ok because the tag is consumed if and only
|
||||
* if PML is enabled. Omit the PML check to save a few uops.
|
||||
* When using the EPT page-modification log, the GPAs in the CPU dirty
|
||||
* log would come from L2 rather than L1. Therefore, we need to rely
|
||||
* on write protection to record dirty pages, which bypasses PML, since
|
||||
* writes now result in a vmexit. Note, the check on CPU dirty logging
|
||||
* being enabled is mandatory as the bits used to denote WP-only SPTEs
|
||||
* are reserved for NPT w/ PAE (32-bit KVM).
|
||||
*/
|
||||
return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
|
||||
return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
|
||||
kvm_x86_ops.cpu_dirty_log_size;
|
||||
}
|
||||
|
||||
bool is_nx_huge_page_enabled(void);
|
||||
|
@ -1200,6 +1200,7 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
|
||||
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
|
||||
|
||||
svm_set_cr4(&svm->vcpu, 0);
|
||||
svm_set_efer(&svm->vcpu, 0);
|
||||
save->dr6 = 0xffff0ff0;
|
||||
kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED);
|
||||
|
@ -2957,6 +2957,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
||||
struct kvm_host_map map;
|
||||
struct kvm_steal_time *st;
|
||||
|
||||
if (kvm_xen_msr_enabled(vcpu->kvm)) {
|
||||
kvm_xen_runstate_set_running(vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
|
||||
return;
|
||||
|
||||
@ -3756,11 +3761,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
|
||||
r = 1;
|
||||
break;
|
||||
#ifdef CONFIG_KVM_XEN
|
||||
case KVM_CAP_XEN_HVM:
|
||||
r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
|
||||
KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
|
||||
KVM_XEN_HVM_CONFIG_SHARED_INFO;
|
||||
if (sched_info_on())
|
||||
r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
|
||||
break;
|
||||
#endif
|
||||
case KVM_CAP_SYNC_REGS:
|
||||
r = KVM_SYNC_X86_VALID_FIELDS;
|
||||
break;
|
||||
@ -4038,7 +4047,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
if (vcpu->preempted && !vcpu->arch.guest_state_protected)
|
||||
vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
|
||||
|
||||
kvm_steal_time_set_preempted(vcpu);
|
||||
if (kvm_xen_msr_enabled(vcpu->kvm))
|
||||
kvm_xen_runstate_set_preempted(vcpu);
|
||||
else
|
||||
kvm_steal_time_set_preempted(vcpu);
|
||||
|
||||
static_call(kvm_x86_vcpu_put)(vcpu);
|
||||
vcpu->arch.last_host_tsc = rdtsc();
|
||||
/*
|
||||
@ -5013,6 +5026,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
case KVM_GET_SUPPORTED_HV_CPUID:
|
||||
r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
|
||||
break;
|
||||
#ifdef CONFIG_KVM_XEN
|
||||
case KVM_XEN_VCPU_GET_ATTR: {
|
||||
struct kvm_xen_vcpu_attr xva;
|
||||
|
||||
@ -5033,6 +5047,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = kvm_xen_vcpu_set_attr(vcpu, &xva);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
@ -5654,6 +5669,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
kvm->arch.bsp_vcpu_id = arg;
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
#ifdef CONFIG_KVM_XEN
|
||||
case KVM_XEN_HVM_CONFIG: {
|
||||
struct kvm_xen_hvm_config xhc;
|
||||
r = -EFAULT;
|
||||
@ -5682,6 +5698,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = kvm_xen_hvm_set_attr(kvm, &xha);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case KVM_SET_CLOCK: {
|
||||
struct kvm_clock_data user_ns;
|
||||
u64 now_ns;
|
||||
@ -8040,7 +8057,10 @@ void kvm_arch_exit(void)
|
||||
kvm_mmu_module_exit();
|
||||
free_percpu(user_return_msrs);
|
||||
kmem_cache_destroy(x86_fpu_cache);
|
||||
#ifdef CONFIG_KVM_XEN
|
||||
static_key_deferred_flush(&kvm_xen_enabled);
|
||||
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
|
||||
|
@ -11,9 +11,11 @@
|
||||
#include "hyperv.h"
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/sched/stat.h>
|
||||
|
||||
#include <trace/events/kvm.h>
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/interface/vcpu.h>
|
||||
|
||||
#include "trace.h"
|
||||
|
||||
@ -61,6 +63,132 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
|
||||
{
|
||||
struct kvm_vcpu_xen *vx = &v->arch.xen;
|
||||
u64 now = get_kvmclock_ns(v->kvm);
|
||||
u64 delta_ns = now - vx->runstate_entry_time;
|
||||
u64 run_delay = current->sched_info.run_delay;
|
||||
|
||||
if (unlikely(!vx->runstate_entry_time))
|
||||
vx->current_runstate = RUNSTATE_offline;
|
||||
|
||||
/*
|
||||
* Time waiting for the scheduler isn't "stolen" if the
|
||||
* vCPU wasn't running anyway.
|
||||
*/
|
||||
if (vx->current_runstate == RUNSTATE_running) {
|
||||
u64 steal_ns = run_delay - vx->last_steal;
|
||||
|
||||
delta_ns -= steal_ns;
|
||||
|
||||
vx->runstate_times[RUNSTATE_runnable] += steal_ns;
|
||||
}
|
||||
vx->last_steal = run_delay;
|
||||
|
||||
vx->runstate_times[vx->current_runstate] += delta_ns;
|
||||
vx->current_runstate = state;
|
||||
vx->runstate_entry_time = now;
|
||||
}
|
||||
|
||||
void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
|
||||
{
|
||||
struct kvm_vcpu_xen *vx = &v->arch.xen;
|
||||
uint64_t state_entry_time;
|
||||
unsigned int offset;
|
||||
|
||||
kvm_xen_update_runstate(v, state);
|
||||
|
||||
if (!vx->runstate_set)
|
||||
return;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
|
||||
|
||||
offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* The only difference is alignment of uint64_t in 32-bit.
|
||||
* So the first field 'state' is accessed directly using
|
||||
* offsetof() (where its offset happens to be zero), while the
|
||||
* remaining fields which are all uint64_t, start at 'offset'
|
||||
* which we tweak here by adding 4.
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, time) + 4);
|
||||
|
||||
if (v->kvm->arch.xen.long_mode)
|
||||
offset = offsetof(struct vcpu_runstate_info, state_entry_time);
|
||||
#endif
|
||||
/*
|
||||
* First write the updated state_entry_time at the appropriate
|
||||
* location determined by 'offset'.
|
||||
*/
|
||||
state_entry_time = vx->runstate_entry_time;
|
||||
state_entry_time |= XEN_RUNSTATE_UPDATE;
|
||||
|
||||
BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
|
||||
sizeof(state_entry_time));
|
||||
BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
|
||||
sizeof(state_entry_time));
|
||||
|
||||
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
|
||||
&state_entry_time, offset,
|
||||
sizeof(state_entry_time)))
|
||||
return;
|
||||
smp_wmb();
|
||||
|
||||
/*
|
||||
* Next, write the new runstate. This is in the *same* place
|
||||
* for 32-bit and 64-bit guests, asserted here for paranoia.
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, state));
|
||||
BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
|
||||
sizeof(vx->current_runstate));
|
||||
BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
|
||||
sizeof(vx->current_runstate));
|
||||
|
||||
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
|
||||
&vx->current_runstate,
|
||||
offsetof(struct vcpu_runstate_info, state),
|
||||
sizeof(vx->current_runstate)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Write the actual runstate times immediately after the
|
||||
* runstate_entry_time.
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
|
||||
offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
|
||||
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
|
||||
offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
|
||||
BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
|
||||
sizeof(((struct compat_vcpu_runstate_info *)0)->time));
|
||||
BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
|
||||
sizeof(vx->runstate_times));
|
||||
|
||||
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
|
||||
&vx->runstate_times[0],
|
||||
offset + sizeof(u64),
|
||||
sizeof(vx->runstate_times)))
|
||||
return;
|
||||
|
||||
smp_wmb();
|
||||
|
||||
/*
|
||||
* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
|
||||
* runstate_entry_time field.
|
||||
*/
|
||||
|
||||
state_entry_time &= ~XEN_RUNSTATE_UPDATE;
|
||||
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
|
||||
&state_entry_time, offset,
|
||||
sizeof(state_entry_time)))
|
||||
return;
|
||||
}
|
||||
|
||||
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
|
||||
{
|
||||
u8 rc = 0;
|
||||
@ -187,9 +315,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
/* No compat necessary here. */
|
||||
BUILD_BUG_ON(sizeof(struct vcpu_info) !=
|
||||
sizeof(struct compat_vcpu_info));
|
||||
BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
|
||||
offsetof(struct compat_vcpu_info, time));
|
||||
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
vcpu->arch.xen.vcpu_info_set = false;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -206,6 +337,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
vcpu->arch.xen.vcpu_time_info_set = false;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -219,6 +351,121 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
}
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
vcpu->arch.xen.runstate_set = false;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
|
||||
&vcpu->arch.xen.runstate_cache,
|
||||
data->u.gpa,
|
||||
sizeof(struct vcpu_runstate_info));
|
||||
if (!r) {
|
||||
vcpu->arch.xen.runstate_set = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
if (data->u.runstate.state > RUNSTATE_offline) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
kvm_xen_update_runstate(vcpu, data->u.runstate.state);
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
if (data->u.runstate.state > RUNSTATE_offline) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (data->u.runstate.state_entry_time !=
|
||||
(data->u.runstate.time_running +
|
||||
data->u.runstate.time_runnable +
|
||||
data->u.runstate.time_blocked +
|
||||
data->u.runstate.time_offline)) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (get_kvmclock_ns(vcpu->kvm) <
|
||||
data->u.runstate.state_entry_time) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
vcpu->arch.xen.current_runstate = data->u.runstate.state;
|
||||
vcpu->arch.xen.runstate_entry_time =
|
||||
data->u.runstate.state_entry_time;
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_running] =
|
||||
data->u.runstate.time_running;
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
|
||||
data->u.runstate.time_runnable;
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
|
||||
data->u.runstate.time_blocked;
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
|
||||
data->u.runstate.time_offline;
|
||||
vcpu->arch.xen.last_steal = current->sched_info.run_delay;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
if (data->u.runstate.state > RUNSTATE_offline &&
|
||||
data->u.runstate.state != (u64)-1) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
/* The adjustment must add up */
|
||||
if (data->u.runstate.state_entry_time !=
|
||||
(data->u.runstate.time_running +
|
||||
data->u.runstate.time_runnable +
|
||||
data->u.runstate.time_blocked +
|
||||
data->u.runstate.time_offline)) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (get_kvmclock_ns(vcpu->kvm) <
|
||||
(vcpu->arch.xen.runstate_entry_time +
|
||||
data->u.runstate.state_entry_time)) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
vcpu->arch.xen.runstate_entry_time +=
|
||||
data->u.runstate.state_entry_time;
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
|
||||
data->u.runstate.time_running;
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
|
||||
data->u.runstate.time_runnable;
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
|
||||
data->u.runstate.time_blocked;
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
|
||||
data->u.runstate.time_offline;
|
||||
|
||||
if (data->u.runstate.state <= RUNSTATE_offline)
|
||||
kvm_xen_update_runstate(vcpu, data->u.runstate.state);
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -251,6 +498,49 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
if (vcpu->arch.xen.runstate_set) {
|
||||
data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
|
||||
r = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
data->u.runstate.state = vcpu->arch.xen.current_runstate;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
|
||||
if (!sched_info_on()) {
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
data->u.runstate.state = vcpu->arch.xen.current_runstate;
|
||||
data->u.runstate.state_entry_time =
|
||||
vcpu->arch.xen.runstate_entry_time;
|
||||
data->u.runstate.time_running =
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_running];
|
||||
data->u.runstate.time_runnable =
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
|
||||
data->u.runstate.time_blocked =
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
|
||||
data->u.runstate.time_offline =
|
||||
vcpu->arch.xen.runstate_times[RUNSTATE_offline];
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
#ifndef __ARCH_X86_KVM_XEN_H__
|
||||
#define __ARCH_X86_KVM_XEN_H__
|
||||
|
||||
#ifdef CONFIG_KVM_XEN
|
||||
#include <linux/jump_label_ratelimit.h>
|
||||
|
||||
extern struct static_key_false_deferred kvm_xen_enabled;
|
||||
@ -18,11 +19,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
|
||||
int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
|
||||
int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
|
||||
int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
|
||||
int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
|
||||
int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
|
||||
void kvm_xen_destroy_vm(struct kvm *kvm);
|
||||
|
||||
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
|
||||
{
|
||||
return static_branch_unlikely(&kvm_xen_enabled.key) &&
|
||||
kvm->arch.xen_hvm_config.msr;
|
||||
}
|
||||
|
||||
static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
|
||||
{
|
||||
return static_branch_unlikely(&kvm_xen_enabled.key) &&
|
||||
@ -38,11 +44,59 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void kvm_xen_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* 32-bit compatibility definitions, also used natively in 32-bit build */
|
||||
#include <asm/pvclock-abi.h>
|
||||
#include <asm/xen/interface.h>
|
||||
#include <xen/interface/vcpu.h>
|
||||
|
||||
void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state);
|
||||
|
||||
static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running);
|
||||
}
|
||||
|
||||
static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* If the vCPU wasn't preempted but took a normal exit for
|
||||
* some reason (hypercalls, I/O, etc.), that is accounted as
|
||||
* still RUNSTATE_running, as the VMM is still operating on
|
||||
* behalf of the vCPU. Only if the VMM does actually block
|
||||
* does it need to enter RUNSTATE_blocked.
|
||||
*/
|
||||
if (vcpu->preempted)
|
||||
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
|
||||
}
|
||||
|
||||
/* 32-bit compatibility definitions, also used natively in 32-bit build */
|
||||
struct compat_arch_vcpu_info {
|
||||
unsigned int cr2;
|
||||
unsigned int pad[5];
|
||||
@ -75,4 +129,10 @@ struct compat_shared_info {
|
||||
struct compat_arch_shared_info arch;
|
||||
};
|
||||
|
||||
struct compat_vcpu_runstate_info {
|
||||
int state;
|
||||
uint64_t state_entry_time;
|
||||
uint64_t time[4];
|
||||
} __attribute__((packed));
|
||||
|
||||
#endif /* __ARCH_X86_KVM_XEN_H__ */
|
||||
|
@ -1154,6 +1154,7 @@ struct kvm_x86_mce {
|
||||
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
|
||||
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
|
||||
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
|
||||
|
||||
struct kvm_xen_hvm_config {
|
||||
__u32 flags;
|
||||
@ -1621,12 +1622,24 @@ struct kvm_xen_vcpu_attr {
|
||||
union {
|
||||
__u64 gpa;
|
||||
__u64 pad[8];
|
||||
struct {
|
||||
__u64 state;
|
||||
__u64 state_entry_time;
|
||||
__u64 time_running;
|
||||
__u64 time_runnable;
|
||||
__u64 time_blocked;
|
||||
__u64 time_offline;
|
||||
} runstate;
|
||||
} u;
|
||||
};
|
||||
|
||||
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
|
||||
#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
|
||||
#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
|
||||
#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
|
||||
#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
|
||||
#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
|
||||
#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
|
||||
|
||||
/* Secure Encrypted Virtualization command */
|
||||
enum sev_cmd_id {
|
||||
|
@ -21,6 +21,8 @@
|
||||
#define KVM_UTIL_PGS_PER_HUGEPG 512
|
||||
#define KVM_UTIL_MIN_PFN 2
|
||||
|
||||
static int vcpu_mmap_sz(void);
|
||||
|
||||
/* Aligns x up to the next multiple of size. Size must be a power of 2. */
|
||||
static void *align(void *x, size_t size)
|
||||
{
|
||||
@ -509,7 +511,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
|
||||
vcpu->dirty_gfns = NULL;
|
||||
}
|
||||
|
||||
ret = munmap(vcpu->state, sizeof(*vcpu->state));
|
||||
ret = munmap(vcpu->state, vcpu_mmap_sz());
|
||||
TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
|
||||
"errno: %i", ret, errno);
|
||||
close(vcpu->fd);
|
||||
@ -978,7 +980,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
|
||||
"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
|
||||
vcpu_mmap_sz(), sizeof(*vcpu->state));
|
||||
vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
|
||||
vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
|
||||
TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
|
||||
"vcpu id: %u errno: %i", vcpuid, errno);
|
||||
|
@ -13,19 +13,27 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
#include <sched.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#define VCPU_ID 5
|
||||
|
||||
#define SHINFO_REGION_GVA 0xc0000000ULL
|
||||
#define SHINFO_REGION_GPA 0xc0000000ULL
|
||||
#define SHINFO_REGION_SLOT 10
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
|
||||
#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
|
||||
|
||||
#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
|
||||
|
||||
static struct kvm_vm *vm;
|
||||
|
||||
#define XEN_HYPERCALL_MSR 0x40000000
|
||||
|
||||
#define MIN_STEAL_TIME 50000
|
||||
|
||||
struct pvclock_vcpu_time_info {
|
||||
u32 version;
|
||||
u32 pad0;
|
||||
@ -43,11 +51,67 @@ struct pvclock_wall_clock {
|
||||
u32 nsec;
|
||||
} __attribute__((__packed__));
|
||||
|
||||
struct vcpu_runstate_info {
|
||||
uint32_t state;
|
||||
uint64_t state_entry_time;
|
||||
uint64_t time[4];
|
||||
};
|
||||
|
||||
#define RUNSTATE_running 0
|
||||
#define RUNSTATE_runnable 1
|
||||
#define RUNSTATE_blocked 2
|
||||
#define RUNSTATE_offline 3
|
||||
|
||||
static void guest_code(void)
|
||||
{
|
||||
struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
|
||||
|
||||
/* Test having the host set runstates manually */
|
||||
GUEST_SYNC(RUNSTATE_runnable);
|
||||
GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
|
||||
GUEST_ASSERT(rs->state == 0);
|
||||
|
||||
GUEST_SYNC(RUNSTATE_blocked);
|
||||
GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
|
||||
GUEST_ASSERT(rs->state == 0);
|
||||
|
||||
GUEST_SYNC(RUNSTATE_offline);
|
||||
GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
|
||||
GUEST_ASSERT(rs->state == 0);
|
||||
|
||||
/* Test runstate time adjust */
|
||||
GUEST_SYNC(4);
|
||||
GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
|
||||
GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
|
||||
|
||||
/* Test runstate time set */
|
||||
GUEST_SYNC(5);
|
||||
GUEST_ASSERT(rs->state_entry_time >= 0x8000);
|
||||
GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
|
||||
GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
|
||||
GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
|
||||
|
||||
/* sched_yield() should result in some 'runnable' time */
|
||||
GUEST_SYNC(6);
|
||||
GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static long get_run_delay(void)
|
||||
{
|
||||
char path[64];
|
||||
long val[2];
|
||||
FILE *fp;
|
||||
|
||||
sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
|
||||
fp = fopen(path, "r");
|
||||
fscanf(fp, "%ld %ld ", &val[0], &val[1]);
|
||||
fclose(fp);
|
||||
|
||||
return val[1];
|
||||
}
|
||||
|
||||
static int cmp_timespec(struct timespec *a, struct timespec *b)
|
||||
{
|
||||
if (a->tv_sec > b->tv_sec)
|
||||
@ -66,12 +130,14 @@ int main(int argc, char *argv[])
|
||||
{
|
||||
struct timespec min_ts, max_ts, vm_ts;
|
||||
|
||||
if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
|
||||
KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
|
||||
int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
|
||||
if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
|
||||
print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
|
||||
bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
|
||||
|
||||
clock_gettime(CLOCK_REALTIME, &min_ts);
|
||||
|
||||
vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
|
||||
@ -80,6 +146,7 @@ int main(int argc, char *argv[])
|
||||
/* Map a region for the shared_info page */
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
|
||||
virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
|
||||
|
||||
struct kvm_xen_hvm_config hvmc = {
|
||||
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
|
||||
@ -111,6 +178,17 @@ int main(int argc, char *argv[])
|
||||
};
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
|
||||
|
||||
if (do_runstate_tests) {
|
||||
struct kvm_xen_vcpu_attr st = {
|
||||
.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
|
||||
.u.gpa = RUNSTATE_ADDR,
|
||||
};
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
|
||||
}
|
||||
|
||||
struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);;
|
||||
rs->state = 0x5a;
|
||||
|
||||
for (;;) {
|
||||
volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
|
||||
struct ucall uc;
|
||||
@ -126,8 +204,56 @@ int main(int argc, char *argv[])
|
||||
case UCALL_ABORT:
|
||||
TEST_FAIL("%s", (const char *)uc.args[0]);
|
||||
/* NOT REACHED */
|
||||
case UCALL_SYNC:
|
||||
case UCALL_SYNC: {
|
||||
struct kvm_xen_vcpu_attr rst;
|
||||
long rundelay;
|
||||
|
||||
/* If no runstate support, bail out early */
|
||||
if (!do_runstate_tests)
|
||||
goto done;
|
||||
|
||||
TEST_ASSERT(rs->state_entry_time == rs->time[0] +
|
||||
rs->time[1] + rs->time[2] + rs->time[3],
|
||||
"runstate times don't add up");
|
||||
|
||||
switch (uc.args[1]) {
|
||||
case RUNSTATE_running...RUNSTATE_offline:
|
||||
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
|
||||
rst.u.runstate.state = uc.args[1];
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
|
||||
break;
|
||||
case 4:
|
||||
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
|
||||
memset(&rst.u, 0, sizeof(rst.u));
|
||||
rst.u.runstate.state = (uint64_t)-1;
|
||||
rst.u.runstate.time_blocked =
|
||||
0x5a - rs->time[RUNSTATE_blocked];
|
||||
rst.u.runstate.time_offline =
|
||||
0x6b6b - rs->time[RUNSTATE_offline];
|
||||
rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
|
||||
rst.u.runstate.time_offline;
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
|
||||
break;
|
||||
|
||||
case 5:
|
||||
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
|
||||
memset(&rst.u, 0, sizeof(rst.u));
|
||||
rst.u.runstate.state = RUNSTATE_running;
|
||||
rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
|
||||
rst.u.runstate.time_blocked = 0x6b6b;
|
||||
rst.u.runstate.time_offline = 0x5a;
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
|
||||
break;
|
||||
case 6:
|
||||
/* Yield until scheduler delay exceeds target */
|
||||
rundelay = get_run_delay() + MIN_STEAL_TIME;
|
||||
do {
|
||||
sched_yield();
|
||||
} while (get_run_delay() < rundelay);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case UCALL_DONE:
|
||||
goto done;
|
||||
default:
|
||||
@ -162,6 +288,33 @@ int main(int argc, char *argv[])
|
||||
TEST_ASSERT(ti2->version && !(ti2->version & 1),
|
||||
"Bad time_info version %x", ti->version);
|
||||
|
||||
if (do_runstate_tests) {
|
||||
/*
|
||||
* Fetch runstate and check sanity. Strictly speaking in the
|
||||
* general case we might not expect the numbers to be identical
|
||||
* but in this case we know we aren't running the vCPU any more.
|
||||
*/
|
||||
struct kvm_xen_vcpu_attr rst = {
|
||||
.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
|
||||
};
|
||||
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
|
||||
|
||||
TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
|
||||
TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
|
||||
"State entry time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
|
||||
"Running time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
|
||||
"Runnable time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
|
||||
"Blocked time mismatch");
|
||||
TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
|
||||
"Offline time mismatch");
|
||||
|
||||
TEST_ASSERT(rs->state_entry_time == rs->time[0] +
|
||||
rs->time[1] + rs->time[2] + rs->time[3],
|
||||
"runstate times don't add up");
|
||||
}
|
||||
kvm_vm_free(vm);
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user