mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-10 15:10:38 +00:00
RISC-V:
* Fix VM hang in case of timer delta being zero ARM: * Fixes for the MMU: * Read the MMU notifier seq before dropping the mmap lock to guard against reading a potentially stale VMA * Disable interrupts when walking user page tables to protect against the page table being freed * Read the MTE permissions for the VMA within the mmap lock critical section, avoiding the use of a potentally stale VMA pointer * Fixes for the vPMU: * Return the sum of the current perf event value and PMC snapshot for reads from userspace * Don't save the value of guest writes to PMCR_EL0.{C,P}, which could otherwise lead to userspace erroneously resetting the vPMU during VM save/restore -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmQh12wUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroMPyAgAhe9P5efFlkg/N/BUoxCdiQjhTEGO BbNtZFbSgdqsTA0No5sjSsq8+dYuj4IpYHxvTRNXffxpn/6x736x9ff6fH3QwY0P n65q37m3XrBzbaUv2XlX6l4GJC/KHWpXsawVszmnQpw8wYUYglo9JWnWVvpo/vLV byWXcG9Rt0MQVnV13bYtjnpdNYCdVuhZuvjDBvbBa2I9BRKShvWyzcRcc48K1RNq UI5PCEb8c2NUOB/6b9lRkEaNYLssVLOxpI3abGJP+IVSl/WE+hgCbFpz7ZkYwt79 AYm4wnZTSKBMQb5P2IOlgkfasvEyFiZ4Jj7x3RLXcXSWIB6g0J20iNasYg== =qX9k -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "RISC-V: - Fix VM hang in case of timer delta being zero ARM: - MMU fixes: - Read the MMU notifier seq before dropping the mmap lock to guard against reading a potentially stale VMA - Disable interrupts when walking user page tables to protect against the page table being freed - Read the MTE permissions for the VMA within the mmap lock critical section, avoiding the use of a potentally stale VMA pointer - vPMU fixes: - Return the sum of the current perf event value and PMC snapshot for reads from userspace - Don't save the value of guest writes to PMCR_EL0.{C,P}, which could otherwise lead to userspace erroneously resetting the vPMU during VM save/restore" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: riscv/kvm: Fix VM hang in case of timer delta being zero. KVM: arm64: Check for kvm_vma_mte_allowed in the critical section KVM: arm64: Disable interrupts while walking userspace PTs KVM: arm64: Retry fault if vma_lookup() results become invalid KVM: arm64: PMU: Don't save PMCR_EL0.{C,P} for the vCPU KVM: arm64: PMU: Fix GET_ONE_REG for vPMC regs to return the current value
This commit is contained in:
commit
3a93e40326
@ -666,14 +666,33 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
|
||||
CONFIG_PGTABLE_LEVELS),
|
||||
.mm_ops = &kvm_user_mm_ops,
|
||||
};
|
||||
unsigned long flags;
|
||||
kvm_pte_t pte = 0; /* Keep GCC quiet... */
|
||||
u32 level = ~0;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Disable IRQs so that we hazard against a concurrent
|
||||
* teardown of the userspace page tables (which relies on
|
||||
* IPI-ing threads).
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
|
||||
VM_BUG_ON(ret);
|
||||
VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
|
||||
VM_BUG_ON(!(pte & PTE_VALID));
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Not seeing an error, but not updating level? Something went
|
||||
* deeply wrong...
|
||||
*/
|
||||
if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
|
||||
return -EFAULT;
|
||||
|
||||
/* Oops, the userspace PTs are gone... Replay the fault */
|
||||
if (!kvm_pte_valid(pte))
|
||||
return -EAGAIN;
|
||||
|
||||
return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
|
||||
}
|
||||
@ -1079,7 +1098,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
|
||||
*
|
||||
* Returns the size of the mapping.
|
||||
*/
|
||||
static unsigned long
|
||||
static long
|
||||
transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long hva, kvm_pfn_t *pfnp,
|
||||
phys_addr_t *ipap)
|
||||
@ -1091,8 +1110,15 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
* sure that the HVA and IPA are sufficiently aligned and that the
|
||||
* block map is contained within the memslot.
|
||||
*/
|
||||
if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
|
||||
get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
|
||||
if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
|
||||
int sz = get_user_mapping_size(kvm, hva);
|
||||
|
||||
if (sz < 0)
|
||||
return sz;
|
||||
|
||||
if (sz < PMD_SIZE)
|
||||
return PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* The address we faulted on is backed by a transparent huge
|
||||
* page. However, because we map the compound huge page and
|
||||
@ -1192,7 +1218,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
{
|
||||
int ret = 0;
|
||||
bool write_fault, writable, force_pte = false;
|
||||
bool exec_fault;
|
||||
bool exec_fault, mte_allowed;
|
||||
bool device = false;
|
||||
unsigned long mmu_seq;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
@ -1203,7 +1229,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
kvm_pfn_t pfn;
|
||||
bool logging_active = memslot_is_logging(memslot);
|
||||
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
|
||||
unsigned long vma_pagesize, fault_granule;
|
||||
long vma_pagesize, fault_granule;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
|
||||
struct kvm_pgtable *pgt;
|
||||
|
||||
@ -1217,6 +1243,20 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Permission faults just need to update the existing leaf entry,
|
||||
* and so normally don't require allocations from the memcache. The
|
||||
* only exception to this is when dirty logging is enabled at runtime
|
||||
* and a write fault needs to collapse a block entry into a table.
|
||||
*/
|
||||
if (fault_status != ESR_ELx_FSC_PERM ||
|
||||
(logging_active && write_fault)) {
|
||||
ret = kvm_mmu_topup_memory_cache(memcache,
|
||||
kvm_mmu_cache_min_pages(kvm));
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Let's check if we will get back a huge page backed by hugetlbfs, or
|
||||
* get block mapping for device MMIO region.
|
||||
@ -1269,37 +1309,21 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
fault_ipa &= ~(vma_pagesize - 1);
|
||||
|
||||
gfn = fault_ipa >> PAGE_SHIFT;
|
||||
mmap_read_unlock(current->mm);
|
||||
mte_allowed = kvm_vma_mte_allowed(vma);
|
||||
|
||||
/* Don't use the VMA after the unlock -- it may have vanished */
|
||||
vma = NULL;
|
||||
|
||||
/*
|
||||
* Permission faults just need to update the existing leaf entry,
|
||||
* and so normally don't require allocations from the memcache. The
|
||||
* only exception to this is when dirty logging is enabled at runtime
|
||||
* and a write fault needs to collapse a block entry into a table.
|
||||
*/
|
||||
if (fault_status != ESR_ELx_FSC_PERM ||
|
||||
(logging_active && write_fault)) {
|
||||
ret = kvm_mmu_topup_memory_cache(memcache,
|
||||
kvm_mmu_cache_min_pages(kvm));
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
|
||||
/*
|
||||
* Ensure the read of mmu_invalidate_seq happens before we call
|
||||
* gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
|
||||
* the page we just got a reference to gets unmapped before we have a
|
||||
* chance to grab the mmu_lock, which ensure that if the page gets
|
||||
* unmapped afterwards, the call to kvm_unmap_gfn will take it away
|
||||
* from us again properly. This smp_rmb() interacts with the smp_wmb()
|
||||
* in kvm_mmu_notifier_invalidate_<page|range_end>.
|
||||
* Read mmu_invalidate_seq so that KVM can detect if the results of
|
||||
* vma_lookup() or __gfn_to_pfn_memslot() become stale prior to
|
||||
* acquiring kvm->mmu_lock.
|
||||
*
|
||||
* Besides, __gfn_to_pfn_memslot() instead of gfn_to_pfn_prot() is
|
||||
* used to avoid unnecessary overhead introduced to locate the memory
|
||||
* slot because it's always fixed even @gfn is adjusted for huge pages.
|
||||
* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
|
||||
* with the smp_wmb() in kvm_mmu_invalidate_end().
|
||||
*/
|
||||
smp_rmb();
|
||||
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
|
||||
mmap_read_unlock(current->mm);
|
||||
|
||||
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
|
||||
write_fault, &writable, NULL);
|
||||
@ -1350,11 +1374,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
|
||||
hva, &pfn,
|
||||
&fault_ipa);
|
||||
|
||||
if (vma_pagesize < 0) {
|
||||
ret = vma_pagesize;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new disallowed VMA */
|
||||
if (kvm_vma_mte_allowed(vma)) {
|
||||
if (mte_allowed) {
|
||||
sanitise_mte_tags(kvm, pfn, vma_pagesize);
|
||||
} else {
|
||||
ret = -EFAULT;
|
||||
|
@ -538,7 +538,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
||||
if (!kvm_pmu_is_3p5(vcpu))
|
||||
val &= ~ARMV8_PMU_PMCR_LP;
|
||||
|
||||
__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
|
||||
/* The reset bits don't indicate any state, and shouldn't be saved. */
|
||||
__vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
|
||||
|
||||
if (val & ARMV8_PMU_PMCR_E) {
|
||||
kvm_pmu_enable_counter_mask(vcpu,
|
||||
|
@ -856,6 +856,22 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
|
||||
return true;
|
||||
}
|
||||
|
||||
static int get_pmu_evcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 *val)
|
||||
{
|
||||
u64 idx;
|
||||
|
||||
if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 0)
|
||||
/* PMCCNTR_EL0 */
|
||||
idx = ARMV8_PMU_CYCLE_IDX;
|
||||
else
|
||||
/* PMEVCNTRn_EL0 */
|
||||
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
|
||||
|
||||
*val = kvm_pmu_get_counter_value(vcpu, idx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
@ -1072,7 +1088,7 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
/* Macro to expand the PMEVCNTRn_EL0 register */
|
||||
#define PMU_PMEVCNTR_EL0(n) \
|
||||
{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \
|
||||
.reset = reset_pmevcntr, \
|
||||
.reset = reset_pmevcntr, .get_user = get_pmu_evcntr, \
|
||||
.access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
|
||||
|
||||
/* Macro to expand the PMEVTYPERn_EL0 register */
|
||||
@ -1982,7 +1998,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
{ PMU_SYS_REG(SYS_PMCEID1_EL0),
|
||||
.access = access_pmceid, .reset = NULL },
|
||||
{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
|
||||
.access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 },
|
||||
.access = access_pmu_evcntr, .reset = reset_unknown,
|
||||
.reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr},
|
||||
{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
|
||||
.access = access_pmu_evtyper, .reset = NULL },
|
||||
{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
|
||||
|
@ -147,10 +147,8 @@ static void kvm_riscv_vcpu_timer_blocking(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t);
|
||||
if (delta_ns) {
|
||||
hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL);
|
||||
t->next_set = true;
|
||||
}
|
||||
hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL);
|
||||
t->next_set = true;
|
||||
}
|
||||
|
||||
static void kvm_riscv_vcpu_timer_unblocking(struct kvm_vcpu *vcpu)
|
||||
|
Loading…
x
Reference in New Issue
Block a user