mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
KVM: Steal time implementation
To implement steal time, we need the hypervisor to pass the guest information about how much time was spent running other processes outside the VM, while the vcpu had meaningful work to do - halt time does not count. This information is acquired through the run_delay field of delayacct/schedstats infrastructure, that counts time spent in a runqueue but not running. Steal time is a per-cpu information, so the traditional MSR-based infrastructure is used. A new msr, KVM_MSR_STEAL_TIME, holds the memory area address containing information about steal time This patch contains the hypervisor part of the steal time infrasructure, and can be backported independently of the guest portion. [avi, yongjie: export delayacct_on, to avoid build failures in some configs] Signed-off-by: Glauber Costa <glommer@redhat.com> Tested-by: Eric B Munson <emunson@mgebm.net> CC: Rik van Riel <riel@redhat.com> CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> CC: Peter Zijlstra <peterz@infradead.org> CC: Anthony Liguori <aliguori@us.ibm.com> Signed-off-by: Yongjie Ren <yongjie.ren@intel.com> Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
parent
9ddabbe72e
commit
c9aaa8957f
@ -389,6 +389,15 @@ struct kvm_vcpu_arch {
|
|||||||
unsigned int hw_tsc_khz;
|
unsigned int hw_tsc_khz;
|
||||||
unsigned int time_offset;
|
unsigned int time_offset;
|
||||||
struct page *time_page;
|
struct page *time_page;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
u64 msr_val;
|
||||||
|
u64 last_steal;
|
||||||
|
u64 accum_steal;
|
||||||
|
struct gfn_to_hva_cache stime;
|
||||||
|
struct kvm_steal_time steal;
|
||||||
|
} st;
|
||||||
|
|
||||||
u64 last_guest_tsc;
|
u64 last_guest_tsc;
|
||||||
u64 last_kernel_ns;
|
u64 last_kernel_ns;
|
||||||
u64 last_tsc_nsec;
|
u64 last_tsc_nsec;
|
||||||
|
@ -45,6 +45,10 @@ struct kvm_steal_time {
|
|||||||
__u32 pad[12];
|
__u32 pad[12];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define KVM_STEAL_ALIGNMENT_BITS 5
|
||||||
|
#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
|
||||||
|
#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
|
||||||
|
|
||||||
#define KVM_MAX_MMU_OP_BATCH 32
|
#define KVM_MAX_MMU_OP_BATCH 32
|
||||||
|
|
||||||
#define KVM_ASYNC_PF_ENABLED (1 << 0)
|
#define KVM_ASYNC_PF_ENABLED (1 << 0)
|
||||||
|
@ -31,6 +31,7 @@ config KVM
|
|||||||
select KVM_ASYNC_PF
|
select KVM_ASYNC_PF
|
||||||
select USER_RETURN_NOTIFIER
|
select USER_RETURN_NOTIFIER
|
||||||
select KVM_MMIO
|
select KVM_MMIO
|
||||||
|
select TASK_DELAY_ACCT
|
||||||
---help---
|
---help---
|
||||||
Support hosting fully virtualized guest machines using hardware
|
Support hosting fully virtualized guest machines using hardware
|
||||||
virtualization extensions. You will need a fairly recent
|
virtualization extensions. You will need a fairly recent
|
||||||
|
@ -808,12 +808,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
|
|||||||
* kvm-specific. Those are put in the beginning of the list.
|
* kvm-specific. Those are put in the beginning of the list.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define KVM_SAVE_MSRS_BEGIN 8
|
#define KVM_SAVE_MSRS_BEGIN 9
|
||||||
static u32 msrs_to_save[] = {
|
static u32 msrs_to_save[] = {
|
||||||
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
|
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
|
||||||
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
|
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
|
||||||
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
|
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
|
||||||
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN,
|
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
|
||||||
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
|
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
|
||||||
MSR_STAR,
|
MSR_STAR,
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
@ -1488,6 +1488,35 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void accumulate_steal_time(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
u64 delta;
|
||||||
|
|
||||||
|
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
|
||||||
|
return;
|
||||||
|
|
||||||
|
delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
|
||||||
|
vcpu->arch.st.last_steal = current->sched_info.run_delay;
|
||||||
|
vcpu->arch.st.accum_steal = delta;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void record_steal_time(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
|
||||||
|
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
|
||||||
|
return;
|
||||||
|
|
||||||
|
vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
|
||||||
|
vcpu->arch.st.steal.version += 2;
|
||||||
|
vcpu->arch.st.accum_steal = 0;
|
||||||
|
|
||||||
|
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
|
||||||
|
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
|
||||||
|
}
|
||||||
|
|
||||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||||
{
|
{
|
||||||
switch (msr) {
|
switch (msr) {
|
||||||
@ -1570,6 +1599,33 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||||||
if (kvm_pv_enable_async_pf(vcpu, data))
|
if (kvm_pv_enable_async_pf(vcpu, data))
|
||||||
return 1;
|
return 1;
|
||||||
break;
|
break;
|
||||||
|
case MSR_KVM_STEAL_TIME:
|
||||||
|
|
||||||
|
if (unlikely(!sched_info_on()))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (data & KVM_STEAL_RESERVED_MASK)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
|
||||||
|
data & KVM_STEAL_VALID_BITS))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
vcpu->arch.st.msr_val = data;
|
||||||
|
|
||||||
|
if (!(data & KVM_MSR_ENABLED))
|
||||||
|
break;
|
||||||
|
|
||||||
|
vcpu->arch.st.last_steal = current->sched_info.run_delay;
|
||||||
|
|
||||||
|
preempt_disable();
|
||||||
|
accumulate_steal_time(vcpu);
|
||||||
|
preempt_enable();
|
||||||
|
|
||||||
|
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
case MSR_IA32_MCG_CTL:
|
case MSR_IA32_MCG_CTL:
|
||||||
case MSR_IA32_MCG_STATUS:
|
case MSR_IA32_MCG_STATUS:
|
||||||
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
|
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
|
||||||
@ -1855,6 +1911,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|||||||
case MSR_KVM_ASYNC_PF_EN:
|
case MSR_KVM_ASYNC_PF_EN:
|
||||||
data = vcpu->arch.apf.msr_val;
|
data = vcpu->arch.apf.msr_val;
|
||||||
break;
|
break;
|
||||||
|
case MSR_KVM_STEAL_TIME:
|
||||||
|
data = vcpu->arch.st.msr_val;
|
||||||
|
break;
|
||||||
case MSR_IA32_P5_MC_ADDR:
|
case MSR_IA32_P5_MC_ADDR:
|
||||||
case MSR_IA32_P5_MC_TYPE:
|
case MSR_IA32_P5_MC_TYPE:
|
||||||
case MSR_IA32_MCG_CAP:
|
case MSR_IA32_MCG_CAP:
|
||||||
@ -2166,6 +2225,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||||||
kvm_migrate_timers(vcpu);
|
kvm_migrate_timers(vcpu);
|
||||||
vcpu->cpu = cpu;
|
vcpu->cpu = cpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
accumulate_steal_time(vcpu);
|
||||||
|
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||||
@ -2487,6 +2549,10 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||||||
(1 << KVM_FEATURE_CLOCKSOURCE2) |
|
(1 << KVM_FEATURE_CLOCKSOURCE2) |
|
||||||
(1 << KVM_FEATURE_ASYNC_PF) |
|
(1 << KVM_FEATURE_ASYNC_PF) |
|
||||||
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
|
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
|
||||||
|
|
||||||
|
if (sched_info_on())
|
||||||
|
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
|
||||||
|
|
||||||
entry->ebx = 0;
|
entry->ebx = 0;
|
||||||
entry->ecx = 0;
|
entry->ecx = 0;
|
||||||
entry->edx = 0;
|
entry->edx = 0;
|
||||||
@ -5470,6 +5536,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||||||
r = 1;
|
r = 1;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
|
||||||
|
record_steal_time(vcpu);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
r = kvm_mmu_reload(vcpu);
|
r = kvm_mmu_reload(vcpu);
|
||||||
@ -6206,6 +6275,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||||
vcpu->arch.apf.msr_val = 0;
|
vcpu->arch.apf.msr_val = 0;
|
||||||
|
vcpu->arch.st.msr_val = 0;
|
||||||
|
|
||||||
kvmclock_reset(vcpu);
|
kvmclock_reset(vcpu);
|
||||||
|
|
||||||
|
@ -47,6 +47,7 @@
|
|||||||
#define KVM_REQ_DEACTIVATE_FPU 10
|
#define KVM_REQ_DEACTIVATE_FPU 10
|
||||||
#define KVM_REQ_EVENT 11
|
#define KVM_REQ_EVENT 11
|
||||||
#define KVM_REQ_APF_HALT 12
|
#define KVM_REQ_APF_HALT 12
|
||||||
|
#define KVM_REQ_STEAL_UPDATE 13
|
||||||
|
|
||||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||||
|
|
||||||
|
@ -19,8 +19,10 @@
|
|||||||
#include <linux/time.h>
|
#include <linux/time.h>
|
||||||
#include <linux/sysctl.h>
|
#include <linux/sysctl.h>
|
||||||
#include <linux/delayacct.h>
|
#include <linux/delayacct.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
|
||||||
int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */
|
int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */
|
||||||
|
EXPORT_SYMBOL_GPL(delayacct_on);
|
||||||
struct kmem_cache *delayacct_cache;
|
struct kmem_cache *delayacct_cache;
|
||||||
|
|
||||||
static int __init delayacct_setup_disable(char *str)
|
static int __init delayacct_setup_disable(char *str)
|
||||||
|
Loading…
Reference in New Issue
Block a user