mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-04 04:04:19 +00:00
x86:
* Fixes for missing TLB flushes with TDP MMU * Fixes for race conditions in nested SVM * Fixes for lockdep splat with Xen emulation * Fix for kvmclock underflow * Fix srcdir != builddir builds * Other small cleanups ARM: * Fix GICv3 MMIO compatibility probing * Prevent guests from using the ARMv8.4 self-hosted tracing extension -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmBlum4UHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroM5sgf9HmO3FOAhMZg6byK8lVBd5M+voNnx 0oC2EWhcT4uuEJ6MZN8CYGorHBtiMFGya5+USCINM9Te2u92jgBhqVaOsc3SRVfE GPDbwcaSM2LP8T1Ao2ilaMSbcBEbphBrLbiBw2bToIuqDnFXUwL6psdBHyKKYRv+ LbtjfrapdB8lyll9BOhF4Iq0l74jcJEAkD/y7FlMCEgDLFCVpfbkA1HcdV/1oXsJ +d6WKlAH9643V8HrMoX7jiXamnJVafkX2Q75Lay6xkkHtdB5wnbRFzfJGXELv9qi 6eJ7Oh5oNmrSUIrtdFkeGMdZZoJJgE9GwCXpeXM49VeqTUKkUEx9v9GAsg== =5B67 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "It's a bit larger than I (and probably you) would like by the time we get to -rc6, but perhaps not entirely unexpected since the changes in the last merge window were larger than usual. x86: - Fixes for missing TLB flushes with TDP MMU - Fixes for race conditions in nested SVM - Fixes for lockdep splat with Xen emulation - Fix for kvmclock underflow - Fix srcdir != builddir builds - Other small cleanups ARM: - Fix GICv3 MMIO compatibility probing - Prevent guests from using the ARMv8.4 self-hosted tracing extension" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: selftests: kvm: Check that TSC page value is small after KVM_SET_CLOCK(0) KVM: x86: Prevent 'hv_clock->system_time' from going negative in kvm_guest_time_update() KVM: x86: disable interrupts while pvclock_gtod_sync_lock is taken KVM: x86: reduce pvclock_gtod_sync_lock critical sections KVM: SVM: ensure that EFER.SVME is set when running nested guest or on nested vmexit KVM: SVM: load control fields from VMCB12 before checking them KVM: x86/mmu: Don't allow TDP MMU to yield when recovering NX pages KVM: x86/mmu: Ensure TLBs are flushed for TDP MMU during NX zapping KVM: x86/mmu: Ensure TLBs are flushed when yielding during GFN range zap KVM: make: Fix out-of-source module builds selftests: kvm: make hardware_disable_test less verbose KVM: x86/vPMU: Forbid writing to MSR_F15H_PERF MSRs when guest doesn't have X86_FEATURE_PERFCTR_CORE KVM: x86: remove unused declaration of kvm_write_tsc() KVM: clean up the unused argument tools/kvm_stat: Add restart delay KVM: arm64: Fix CPU interface MMIO compatibility detection KVM: arm64: Disable guest access to trace filter controls KVM: arm64: Hide system instruction access to Trace registers
This commit is contained in:
commit
6905b1dc3c
@ -278,6 +278,7 @@
|
||||
#define CPTR_EL2_DEFAULT CPTR_EL2_RES1
|
||||
|
||||
/* Hyp Debug Configuration Register bits */
|
||||
#define MDCR_EL2_TTRF (1 << 19)
|
||||
#define MDCR_EL2_TPMS (1 << 14)
|
||||
#define MDCR_EL2_E2PB_MASK (UL(0x3))
|
||||
#define MDCR_EL2_E2PB_SHIFT (UL(12))
|
||||
|
@ -383,7 +383,6 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
|
||||
* of support.
|
||||
*/
|
||||
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
@ -89,6 +89,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
|
||||
* - Debug ROM Address (MDCR_EL2_TDRA)
|
||||
* - OS related registers (MDCR_EL2_TDOSA)
|
||||
* - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB)
|
||||
* - Self-hosted Trace Filter controls (MDCR_EL2_TTRF)
|
||||
*
|
||||
* Additionally, KVM only traps guest accesses to the debug registers if
|
||||
* the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
|
||||
@ -112,6 +113,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
|
||||
vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
|
||||
MDCR_EL2_TPMS |
|
||||
MDCR_EL2_TTRF |
|
||||
MDCR_EL2_TPMCR |
|
||||
MDCR_EL2_TDRA |
|
||||
MDCR_EL2_TDOSA);
|
||||
|
@ -429,6 +429,13 @@ u64 __vgic_v3_get_gic_config(void)
|
||||
if (has_vhe())
|
||||
flags = local_daif_save();
|
||||
|
||||
/*
|
||||
* Table 11-2 "Permitted ICC_SRE_ELx.SRE settings" indicates
|
||||
* that to be able to set ICC_SRE_EL1.SRE to 0, all the
|
||||
* interrupt overrides must be set. You've got to love this.
|
||||
*/
|
||||
sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO);
|
||||
isb();
|
||||
write_gicreg(0, ICC_SRE_EL1);
|
||||
isb();
|
||||
|
||||
@ -436,6 +443,8 @@ u64 __vgic_v3_get_gic_config(void)
|
||||
|
||||
write_gicreg(sre, ICC_SRE_EL1);
|
||||
isb();
|
||||
sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0);
|
||||
isb();
|
||||
|
||||
if (has_vhe())
|
||||
local_daif_restore(flags);
|
||||
|
@ -1,6 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
ccflags-y += -Iarch/x86/kvm
|
||||
ccflags-y += -I $(srctree)/arch/x86/kvm
|
||||
ccflags-$(CONFIG_KVM_WERROR) += -Werror
|
||||
|
||||
ifeq ($(CONFIG_FRAME_POINTER),y)
|
||||
|
@ -5884,6 +5884,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
|
||||
struct kvm_mmu_page *sp;
|
||||
unsigned int ratio;
|
||||
LIST_HEAD(invalid_list);
|
||||
bool flush = false;
|
||||
ulong to_zap;
|
||||
|
||||
rcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
@ -5905,19 +5906,19 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
|
||||
lpage_disallowed_link);
|
||||
WARN_ON_ONCE(!sp->lpage_disallowed);
|
||||
if (is_tdp_mmu_page(sp)) {
|
||||
kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
|
||||
sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
|
||||
flush = kvm_tdp_mmu_zap_sp(kvm, sp);
|
||||
} else {
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
WARN_ON_ONCE(sp->lpage_disallowed);
|
||||
}
|
||||
|
||||
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
|
||||
cond_resched_rwlock_write(&kvm->mmu_lock);
|
||||
flush = false;
|
||||
}
|
||||
}
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
|
||||
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, rcu_idx);
|
||||
|
@ -86,7 +86,7 @@ static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
|
||||
list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)
|
||||
|
||||
static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
gfn_t start, gfn_t end, bool can_yield);
|
||||
gfn_t start, gfn_t end, bool can_yield, bool flush);
|
||||
|
||||
void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
|
||||
{
|
||||
@ -99,7 +99,7 @@ void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
|
||||
|
||||
list_del(&root->link);
|
||||
|
||||
zap_gfn_range(kvm, root, 0, max_gfn, false);
|
||||
zap_gfn_range(kvm, root, 0, max_gfn, false, false);
|
||||
|
||||
free_page((unsigned long)root->spt);
|
||||
kmem_cache_free(mmu_page_header_cache, root);
|
||||
@ -668,20 +668,21 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
||||
* scheduler needs the CPU or there is contention on the MMU lock. If this
|
||||
* function cannot yield, it will not release the MMU lock or reschedule and
|
||||
* the caller must ensure it does not supply too large a GFN range, or the
|
||||
* operation can cause a soft lockup.
|
||||
* operation can cause a soft lockup. Note, in some use cases a flush may be
|
||||
* required by prior actions. Ensure the pending flush is performed prior to
|
||||
* yielding.
|
||||
*/
|
||||
static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
gfn_t start, gfn_t end, bool can_yield)
|
||||
gfn_t start, gfn_t end, bool can_yield, bool flush)
|
||||
{
|
||||
struct tdp_iter iter;
|
||||
bool flush_needed = false;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_pte(iter, root, start, end) {
|
||||
if (can_yield &&
|
||||
tdp_mmu_iter_cond_resched(kvm, &iter, flush_needed)) {
|
||||
flush_needed = false;
|
||||
tdp_mmu_iter_cond_resched(kvm, &iter, flush)) {
|
||||
flush = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -699,11 +700,11 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
continue;
|
||||
|
||||
tdp_mmu_set_spte(kvm, &iter, 0);
|
||||
flush_needed = true;
|
||||
flush = true;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return flush_needed;
|
||||
return flush;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -712,13 +713,14 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
* SPTEs have been cleared and a TLB flush is needed before releasing the
|
||||
* MMU lock.
|
||||
*/
|
||||
bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
|
||||
bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||
bool can_yield)
|
||||
{
|
||||
struct kvm_mmu_page *root;
|
||||
bool flush = false;
|
||||
|
||||
for_each_tdp_mmu_root_yield_safe(kvm, root)
|
||||
flush |= zap_gfn_range(kvm, root, start, end, true);
|
||||
flush = zap_gfn_range(kvm, root, start, end, can_yield, flush);
|
||||
|
||||
return flush;
|
||||
}
|
||||
@ -930,7 +932,7 @@ static int zap_gfn_range_hva_wrapper(struct kvm *kvm,
|
||||
struct kvm_mmu_page *root, gfn_t start,
|
||||
gfn_t end, unsigned long unused)
|
||||
{
|
||||
return zap_gfn_range(kvm, root, start, end, false);
|
||||
return zap_gfn_range(kvm, root, start, end, false, false);
|
||||
}
|
||||
|
||||
int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start,
|
||||
|
@ -8,7 +8,29 @@
|
||||
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
|
||||
void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);
|
||||
|
||||
bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end);
|
||||
bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||
bool can_yield);
|
||||
static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start,
|
||||
gfn_t end)
|
||||
{
|
||||
return __kvm_tdp_mmu_zap_gfn_range(kvm, start, end, true);
|
||||
}
|
||||
static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
gfn_t end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);
|
||||
|
||||
/*
|
||||
* Don't allow yielding, as the caller may have a flush pending. Note,
|
||||
* if mmu_lock is held for write, zapping will never yield in this case,
|
||||
* but explicitly disallow it for safety. The TDP MMU does not yield
|
||||
* until it has made forward progress (steps sideways), and when zapping
|
||||
* a single shadow page that it's guaranteed to see (thus the mmu_lock
|
||||
* requirement), its "step sideways" will always step beyond the bounds
|
||||
* of the shadow page's gfn range and stop iterating before yielding.
|
||||
*/
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
return __kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, end, false);
|
||||
}
|
||||
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
|
||||
|
||||
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
|
@ -246,11 +246,18 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
|
||||
static bool nested_vmcb_check_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
bool vmcb12_lma;
|
||||
|
||||
/*
|
||||
* FIXME: these should be done after copying the fields,
|
||||
* to avoid TOC/TOU races. For these save area checks
|
||||
* the possible damage is limited since kvm_set_cr0 and
|
||||
* kvm_set_cr4 handle failure; EFER_SVME is an exception
|
||||
* so it is force-set later in nested_prepare_vmcb_save.
|
||||
*/
|
||||
if ((vmcb12->save.efer & EFER_SVME) == 0)
|
||||
return false;
|
||||
|
||||
@ -271,7 +278,7 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
|
||||
if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
|
||||
return false;
|
||||
|
||||
return nested_vmcb_check_controls(&vmcb12->control);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void load_nested_vmcb_control(struct vcpu_svm *svm,
|
||||
@ -396,7 +403,14 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
|
||||
svm->vmcb->save.gdtr = vmcb12->save.gdtr;
|
||||
svm->vmcb->save.idtr = vmcb12->save.idtr;
|
||||
kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
|
||||
svm_set_efer(&svm->vcpu, vmcb12->save.efer);
|
||||
|
||||
/*
|
||||
* Force-set EFER_SVME even though it is checked earlier on the
|
||||
* VMCB12, because the guest can flip the bit between the check
|
||||
* and now. Clearing EFER_SVME would call svm_free_nested.
|
||||
*/
|
||||
svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME);
|
||||
|
||||
svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
|
||||
svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
|
||||
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = vmcb12->save.cr2;
|
||||
@ -468,7 +482,6 @@ int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa,
|
||||
|
||||
|
||||
svm->nested.vmcb12_gpa = vmcb12_gpa;
|
||||
load_nested_vmcb_control(svm, &vmcb12->control);
|
||||
nested_prepare_vmcb_control(svm);
|
||||
nested_prepare_vmcb_save(svm, vmcb12);
|
||||
|
||||
@ -515,7 +528,10 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
|
||||
if (WARN_ON_ONCE(!svm->nested.initialized))
|
||||
return -EINVAL;
|
||||
|
||||
if (!nested_vmcb_checks(svm, vmcb12)) {
|
||||
load_nested_vmcb_control(svm, &vmcb12->control);
|
||||
|
||||
if (!nested_vmcb_check_save(svm, vmcb12) ||
|
||||
!nested_vmcb_check_controls(&svm->nested.ctl)) {
|
||||
vmcb12->control.exit_code = SVM_EXIT_ERR;
|
||||
vmcb12->control.exit_code_hi = 0;
|
||||
vmcb12->control.exit_info_1 = 0;
|
||||
@ -1209,6 +1225,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
|
||||
*/
|
||||
if (!(save->cr0 & X86_CR0_PG))
|
||||
goto out_free;
|
||||
if (!(save->efer & EFER_SVME))
|
||||
goto out_free;
|
||||
|
||||
/*
|
||||
* All checks done, we can enter guest mode. L1 control fields
|
||||
|
@ -98,6 +98,8 @@ static enum index msr_to_index(u32 msr)
|
||||
static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
|
||||
enum pmu_type type)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
|
||||
|
||||
switch (msr) {
|
||||
case MSR_F15H_PERF_CTL0:
|
||||
case MSR_F15H_PERF_CTL1:
|
||||
@ -105,6 +107,9 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
|
||||
case MSR_F15H_PERF_CTL3:
|
||||
case MSR_F15H_PERF_CTL4:
|
||||
case MSR_F15H_PERF_CTL5:
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
|
||||
return NULL;
|
||||
fallthrough;
|
||||
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
|
||||
if (type != PMU_TYPE_EVNTSEL)
|
||||
return NULL;
|
||||
@ -115,6 +120,9 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
|
||||
case MSR_F15H_PERF_CTR3:
|
||||
case MSR_F15H_PERF_CTR4:
|
||||
case MSR_F15H_PERF_CTR5:
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
|
||||
return NULL;
|
||||
fallthrough;
|
||||
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
|
||||
if (type != PMU_TYPE_COUNTER)
|
||||
return NULL;
|
||||
|
@ -271,8 +271,7 @@ static struct kmem_cache *x86_emulator_cache;
|
||||
* When called, it means the previous get/set msr reached an invalid msr.
|
||||
* Return true if we want to ignore/silent this failed msr access.
|
||||
*/
|
||||
static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
|
||||
u64 data, bool write)
|
||||
static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write)
|
||||
{
|
||||
const char *op = write ? "wrmsr" : "rdmsr";
|
||||
|
||||
@ -1445,7 +1444,7 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
|
||||
if (r == KVM_MSR_RET_INVALID) {
|
||||
/* Unconditionally clear the output for simplicity */
|
||||
*data = 0;
|
||||
if (kvm_msr_ignored_check(vcpu, index, 0, false))
|
||||
if (kvm_msr_ignored_check(index, 0, false))
|
||||
r = 0;
|
||||
}
|
||||
|
||||
@ -1620,7 +1619,7 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
|
||||
int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
|
||||
|
||||
if (ret == KVM_MSR_RET_INVALID)
|
||||
if (kvm_msr_ignored_check(vcpu, index, data, true))
|
||||
if (kvm_msr_ignored_check(index, data, true))
|
||||
ret = 0;
|
||||
|
||||
return ret;
|
||||
@ -1658,7 +1657,7 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
|
||||
if (ret == KVM_MSR_RET_INVALID) {
|
||||
/* Unconditionally clear *data for simplicity */
|
||||
*data = 0;
|
||||
if (kvm_msr_ignored_check(vcpu, index, 0, false))
|
||||
if (kvm_msr_ignored_check(index, 0, false))
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
@ -2329,7 +2328,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
kvm_vcpu_write_tsc_offset(vcpu, offset);
|
||||
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
|
||||
|
||||
spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
|
||||
if (!matched) {
|
||||
kvm->arch.nr_vcpus_matched_tsc = 0;
|
||||
} else if (!already_matched) {
|
||||
@ -2337,7 +2336,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
}
|
||||
|
||||
kvm_track_tsc_matching(vcpu);
|
||||
spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
|
||||
}
|
||||
|
||||
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
|
||||
@ -2559,13 +2558,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
unsigned long flags;
|
||||
|
||||
kvm_hv_invalidate_tsc_page(kvm);
|
||||
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
kvm_make_mclock_inprogress_request(kvm);
|
||||
|
||||
/* no guest entries from this point */
|
||||
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
|
||||
pvclock_update_vm_gtod_copy(kvm);
|
||||
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
@ -2573,8 +2575,6 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
|
||||
/* guest entries allowed */
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
|
||||
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2582,17 +2582,18 @@ u64 get_kvmclock_ns(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
struct pvclock_vcpu_time_info hv_clock;
|
||||
unsigned long flags;
|
||||
u64 ret;
|
||||
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
|
||||
if (!ka->use_master_clock) {
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
|
||||
return get_kvmclock_base_ns() + ka->kvmclock_offset;
|
||||
}
|
||||
|
||||
hv_clock.tsc_timestamp = ka->master_cycle_now;
|
||||
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
|
||||
|
||||
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
|
||||
get_cpu();
|
||||
@ -2686,13 +2687,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
* If the host uses TSC clock, then passthrough TSC as stable
|
||||
* to the guest.
|
||||
*/
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
|
||||
use_master_clock = ka->use_master_clock;
|
||||
if (use_master_clock) {
|
||||
host_tsc = ka->master_cycle_now;
|
||||
kernel_ns = ka->master_kernel_ns;
|
||||
}
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
|
||||
|
||||
/* Keep irq disabled to prevent changes to the clock */
|
||||
local_irq_save(flags);
|
||||
@ -5726,6 +5727,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
}
|
||||
#endif
|
||||
case KVM_SET_CLOCK: {
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
struct kvm_clock_data user_ns;
|
||||
u64 now_ns;
|
||||
|
||||
@ -5744,8 +5746,22 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
* pvclock_update_vm_gtod_copy().
|
||||
*/
|
||||
kvm_gen_update_masterclock(kvm);
|
||||
now_ns = get_kvmclock_ns(kvm);
|
||||
kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
|
||||
|
||||
/*
|
||||
* This pairs with kvm_guest_time_update(): when masterclock is
|
||||
* in use, we use master_kernel_ns + kvmclock_offset to set
|
||||
* unsigned 'system_time' so if we use get_kvmclock_ns() (which
|
||||
* is slightly ahead) here we risk going negative on unsigned
|
||||
* 'system_time' when 'user_ns.clock' is very small.
|
||||
*/
|
||||
spin_lock_irq(&ka->pvclock_gtod_sync_lock);
|
||||
if (kvm->arch.use_master_clock)
|
||||
now_ns = ka->master_kernel_ns;
|
||||
else
|
||||
now_ns = get_kvmclock_base_ns();
|
||||
ka->kvmclock_offset = user_ns.clock - now_ns;
|
||||
spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
|
||||
|
||||
kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
|
||||
break;
|
||||
}
|
||||
@ -7724,6 +7740,7 @@ static void kvm_hyperv_tsc_notifier(void)
|
||||
struct kvm *kvm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
|
||||
mutex_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list)
|
||||
@ -7739,17 +7756,15 @@ static void kvm_hyperv_tsc_notifier(void)
|
||||
list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
|
||||
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
|
||||
pvclock_update_vm_gtod_copy(kvm);
|
||||
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
|
||||
|
||||
kvm_for_each_vcpu(cpu, vcpu, kvm)
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
|
||||
kvm_for_each_vcpu(cpu, vcpu, kvm)
|
||||
kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
|
||||
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
}
|
||||
mutex_unlock(&kvm_lock);
|
||||
}
|
||||
|
@ -250,7 +250,6 @@ static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
|
||||
void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs);
|
||||
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
u64 get_kvmclock_ns(struct kvm *kvm);
|
||||
|
||||
int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
|
||||
|
@ -9,6 +9,7 @@ Type=simple
|
||||
ExecStart=/usr/bin/kvm_stat -dtcz -s 10 -L /var/log/kvm_stat.csv
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
Restart=always
|
||||
RestartSec=60s
|
||||
SyslogIdentifier=kvm_stat
|
||||
SyslogLevel=debug
|
||||
|
||||
|
@ -108,7 +108,7 @@ static void run_test(uint32_t run)
|
||||
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
|
||||
vm_create_irqchip(vm);
|
||||
|
||||
fprintf(stderr, "%s: [%d] start vcpus\n", __func__, run);
|
||||
pr_debug("%s: [%d] start vcpus\n", __func__, run);
|
||||
for (i = 0; i < VCPU_NUM; ++i) {
|
||||
vm_vcpu_add_default(vm, i, guest_code);
|
||||
payloads[i].vm = vm;
|
||||
@ -124,7 +124,7 @@ static void run_test(uint32_t run)
|
||||
check_set_affinity(throw_away, &cpu_set);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "%s: [%d] all threads launched\n", __func__, run);
|
||||
pr_debug("%s: [%d] all threads launched\n", __func__, run);
|
||||
sem_post(sem);
|
||||
for (i = 0; i < VCPU_NUM; ++i)
|
||||
check_join(threads[i], &b);
|
||||
@ -147,16 +147,16 @@ int main(int argc, char **argv)
|
||||
if (pid == 0)
|
||||
run_test(i); /* This function always exits */
|
||||
|
||||
fprintf(stderr, "%s: [%d] waiting semaphore\n", __func__, i);
|
||||
pr_debug("%s: [%d] waiting semaphore\n", __func__, i);
|
||||
sem_wait(sem);
|
||||
r = (rand() % DELAY_US_MAX) + 1;
|
||||
fprintf(stderr, "%s: [%d] waiting %dus\n", __func__, i, r);
|
||||
pr_debug("%s: [%d] waiting %dus\n", __func__, i, r);
|
||||
usleep(r);
|
||||
r = waitpid(pid, &s, WNOHANG);
|
||||
TEST_ASSERT(r != pid,
|
||||
"%s: [%d] child exited unexpectedly status: [%d]",
|
||||
__func__, i, s);
|
||||
fprintf(stderr, "%s: [%d] killing child\n", __func__, i);
|
||||
pr_debug("%s: [%d] killing child\n", __func__, i);
|
||||
kill(pid, SIGKILL);
|
||||
}
|
||||
|
||||
|
@ -80,19 +80,24 @@ static inline void check_tsc_msr_rdtsc(void)
|
||||
GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
|
||||
}
|
||||
|
||||
static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
|
||||
{
|
||||
return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
|
||||
}
|
||||
|
||||
static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
|
||||
{
|
||||
u64 r1, r2, t1, t2;
|
||||
|
||||
/* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
|
||||
t1 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
|
||||
t1 = get_tscpage_ts(tsc_page);
|
||||
r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
|
||||
|
||||
/* 10 ms tolerance */
|
||||
GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
|
||||
nop_loop();
|
||||
|
||||
t2 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
|
||||
t2 = get_tscpage_ts(tsc_page);
|
||||
r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
|
||||
GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
|
||||
}
|
||||
@ -130,7 +135,11 @@ static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_
|
||||
|
||||
tsc_offset = tsc_page->tsc_offset;
|
||||
/* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
|
||||
|
||||
GUEST_SYNC(7);
|
||||
/* Sanity check TSC page timestamp, it should be close to 0 */
|
||||
GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
|
||||
|
||||
GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
|
||||
|
||||
nop_loop();
|
||||
|
Loading…
Reference in New Issue
Block a user