mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-16 10:17:32 +00:00
ARM:
* Address some fallout of the locking rework, this time affecting the way the vgic is configured * Fix an issue where the page table walker frees a subtree and then proceeds with walking what it has just freed... * Check that a given PA donated to the guest is actually memory (only affecting pKVM) * Correctly handle MTE CMOs by Set/Way * Fix the reported address of a watchpoint forwarded to userspace * Fix the freeing of the root of stage-2 page tables * Stop creating spurious PMU events to perform detection of the default PMU and use the existing PMU list instead. x86: * Fix a memslot lookup bug in the NX recovery thread that could theoretically let userspace bypass the NX hugepage mitigation * Fix a s/BLOCKING/PENDING bug in SVM's vNMI support * Account exit stats for fastpath VM-Exits that never leave the super tight run-loop * Fix an out-of-bounds bug in the optimized APIC map code, and add a regression test for the race. -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmR7k1QUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroNblwf/faUVOBMv7mQBGsGa7FNcmaNhYeIT U1k4pFNlo7dNNuNJrGdpo+sOGP5A8CRLNSVvlyjgCHF1Qc9gVtXNvZ9PnA6nAYmB qqvUz/TDw9/NLTlJEkbSs05B4am4yfd5pV6R/32jrPIbXOW++6ae2LpILS/NPBrB y0tGiVUJrO3zVXdBKa4PFmlO8jsXPmMEiicEJa5v2Boeo5SFyFfErw9zDNwSMsQc 27bzbs3O2daXTNMFnwVCCpWUxt1EqWYUXGvBjsChAUI0K10F2/GW9f6YeFsGXqKI d8g1QuCukSt/CvN0pT+g/540mR6i0Azpek1myQfuCu2IhQ1jCJaSWOjoEw== =8VrO -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "ARM: - Address some fallout of the locking rework, this time affecting the way the vgic is configured - Fix an issue where the page table walker frees a subtree and then proceeds with walking what it has just freed... - Check that a given PA donated to the guest is actually memory (only affecting pKVM) - Correctly handle MTE CMOs by Set/Way - Fix the reported address of a watchpoint forwarded to userspace - Fix the freeing of the root of stage-2 page tables - Stop creating spurious PMU events to perform detection of the default PMU and use the existing PMU list instead x86: - Fix a memslot lookup bug in the NX recovery thread that could theoretically let userspace bypass the NX hugepage mitigation - Fix a s/BLOCKING/PENDING bug in SVM's vNMI support - Account exit stats for fastpath VM-Exits that never leave the super tight run-loop - Fix an out-of-bounds bug in the optimized APIC map code, and add a regression test for the race" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: selftests: Add test for race in kvm_recalculate_apic_map() KVM: x86: Bail from kvm_recalculate_phys_map() if x2APIC ID is out-of-bounds KVM: x86: Account fastpath-only VM-Exits in vCPU stats KVM: SVM: vNMI pending bit is V_NMI_PENDING_MASK not V_NMI_BLOCKING_MASK KVM: x86/mmu: Grab memslot for correct address space in NX recovery worker KVM: arm64: Document default vPMU behavior on heterogeneous systems KVM: arm64: Iterate arm_pmus list to probe for default PMU KVM: arm64: Drop last page ref in kvm_pgtable_stage2_free_removed() KVM: arm64: Populate fault info for watchpoint KVM: arm64: Reload PTE after invoking walker callback on preorder traversal KVM: arm64: Handle trap of tagged Set/Way CMOs arm64: Add missing Set/Way CMO encodings KVM: arm64: Prevent unconditional donation of unmapped regions from the host KVM: arm64: vgic: Fix a comment KVM: arm64: vgic: Fix locking comment KVM: arm64: vgic: Wrap vgic_its_create() with config_lock KVM: arm64: vgic: Fix a circular locking issue
This commit is contained in:
commit
b066935bf8
@ -632,9 +632,9 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
||||
*
|
||||
* The walker will walk the page-table entries corresponding to the input
|
||||
* address range specified, visiting entries according to the walker flags.
|
||||
* Invalid entries are treated as leaf entries. Leaf entries are reloaded
|
||||
* after invoking the walker callback, allowing the walker to descend into
|
||||
* a newly installed table.
|
||||
* Invalid entries are treated as leaf entries. The visited page table entry is
|
||||
* reloaded after invoking the walker callback, allowing the walker to descend
|
||||
* into a newly installed table.
|
||||
*
|
||||
* Returning a negative error code from the walker callback function will
|
||||
* terminate the walk immediately with the same error code.
|
||||
|
@ -115,8 +115,14 @@
|
||||
#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
|
||||
|
||||
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
|
||||
#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
|
||||
#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
|
||||
#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
|
||||
#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4)
|
||||
#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6)
|
||||
#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
|
||||
#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
|
||||
#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
|
||||
|
||||
/*
|
||||
* Automatically generated definitions for system registers, the
|
||||
|
@ -412,17 +412,21 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (!__populate_fault_info(vcpu))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
__alias(kvm_hyp_handle_memory_fault);
|
||||
static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
__alias(kvm_hyp_handle_memory_fault);
|
||||
|
||||
static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (!__populate_fault_info(vcpu))
|
||||
if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
|
||||
return true;
|
||||
|
||||
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
|
||||
|
@ -575,7 +575,7 @@ struct pkvm_mem_donation {
|
||||
|
||||
struct check_walk_data {
|
||||
enum pkvm_page_state desired;
|
||||
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
|
||||
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
|
||||
};
|
||||
|
||||
static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
@ -583,10 +583,7 @@ static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
{
|
||||
struct check_walk_data *d = ctx->arg;
|
||||
|
||||
if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old)))
|
||||
return -EINVAL;
|
||||
|
||||
return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM;
|
||||
return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
|
||||
}
|
||||
|
||||
static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
@ -601,8 +598,11 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
return kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
}
|
||||
|
||||
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
|
||||
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
|
||||
{
|
||||
if (!addr_is_allowed_memory(addr))
|
||||
return PKVM_NOPAGE;
|
||||
|
||||
if (!kvm_pte_valid(pte) && pte)
|
||||
return PKVM_NOPAGE;
|
||||
|
||||
@ -709,7 +709,7 @@ static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx
|
||||
return host_stage2_set_owner_locked(addr, size, host_id);
|
||||
}
|
||||
|
||||
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
|
||||
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
|
||||
{
|
||||
if (!kvm_pte_valid(pte))
|
||||
return PKVM_NOPAGE;
|
||||
|
@ -186,6 +186,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
|
||||
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
|
||||
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
|
||||
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
|
||||
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
|
||||
};
|
||||
|
||||
@ -196,6 +197,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
|
||||
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
|
||||
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
|
||||
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
|
||||
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
|
||||
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
|
||||
};
|
||||
|
||||
|
@ -209,14 +209,26 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
|
||||
.flags = flags,
|
||||
};
|
||||
int ret = 0;
|
||||
bool reload = false;
|
||||
kvm_pteref_t childp;
|
||||
bool table = kvm_pte_table(ctx.old, level);
|
||||
|
||||
if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE))
|
||||
if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
|
||||
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE);
|
||||
reload = true;
|
||||
}
|
||||
|
||||
if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) {
|
||||
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF);
|
||||
reload = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reload the page table after invoking the walker callback for leaf
|
||||
* entries or after pre-order traversal, to allow the walker to descend
|
||||
* into a newly installed or replaced table.
|
||||
*/
|
||||
if (reload) {
|
||||
ctx.old = READ_ONCE(*ptep);
|
||||
table = kvm_pte_table(ctx.old, level);
|
||||
}
|
||||
@ -1320,4 +1332,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
|
||||
};
|
||||
|
||||
WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1));
|
||||
|
||||
WARN_ON(mm_ops->page_count(pgtable) != 1);
|
||||
mm_ops->put_page(pgtable);
|
||||
}
|
||||
|
@ -110,6 +110,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
|
||||
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
|
||||
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
|
||||
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
|
||||
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
|
||||
};
|
||||
|
||||
|
@ -694,45 +694,23 @@ out_unlock:
|
||||
|
||||
static struct arm_pmu *kvm_pmu_probe_armpmu(void)
|
||||
{
|
||||
struct perf_event_attr attr = { };
|
||||
struct perf_event *event;
|
||||
struct arm_pmu *pmu = NULL;
|
||||
struct arm_pmu *tmp, *pmu = NULL;
|
||||
struct arm_pmu_entry *entry;
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* Create a dummy event that only counts user cycles. As we'll never
|
||||
* leave this function with the event being live, it will never
|
||||
* count anything. But it allows us to probe some of the PMU
|
||||
* details. Yes, this is terrible.
|
||||
*/
|
||||
attr.type = PERF_TYPE_RAW;
|
||||
attr.size = sizeof(attr);
|
||||
attr.pinned = 1;
|
||||
attr.disabled = 0;
|
||||
attr.exclude_user = 0;
|
||||
attr.exclude_kernel = 1;
|
||||
attr.exclude_hv = 1;
|
||||
attr.exclude_host = 1;
|
||||
attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
|
||||
attr.sample_period = GENMASK(63, 0);
|
||||
mutex_lock(&arm_pmus_lock);
|
||||
|
||||
event = perf_event_create_kernel_counter(&attr, -1, current,
|
||||
kvm_pmu_perf_overflow, &attr);
|
||||
cpu = smp_processor_id();
|
||||
list_for_each_entry(entry, &arm_pmus, entry) {
|
||||
tmp = entry->arm_pmu;
|
||||
|
||||
if (IS_ERR(event)) {
|
||||
pr_err_once("kvm: pmu event creation failed %ld\n",
|
||||
PTR_ERR(event));
|
||||
return NULL;
|
||||
if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) {
|
||||
pmu = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (event->pmu) {
|
||||
pmu = to_arm_pmu(event->pmu);
|
||||
if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
|
||||
pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
|
||||
pmu = NULL;
|
||||
}
|
||||
|
||||
perf_event_disable(event);
|
||||
perf_event_release_kernel(event);
|
||||
mutex_unlock(&arm_pmus_lock);
|
||||
|
||||
return pmu;
|
||||
}
|
||||
@ -912,7 +890,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
||||
return -EBUSY;
|
||||
|
||||
if (!kvm->arch.arm_pmu) {
|
||||
/* No PMU set, get the default one */
|
||||
/*
|
||||
* No PMU set, get the default one.
|
||||
*
|
||||
* The observant among you will notice that the supported_cpus
|
||||
* mask does not get updated for the default PMU even though it
|
||||
* is quite possible the selected instance supports only a
|
||||
* subset of cores in the system. This is intentional, and
|
||||
* upholds the preexisting behavior on heterogeneous systems
|
||||
* where vCPUs can be scheduled on any core but the guest
|
||||
* counters could stop working.
|
||||
*/
|
||||
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
|
||||
if (!kvm->arch.arm_pmu)
|
||||
return -ENODEV;
|
||||
|
@ -211,6 +211,19 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_dcgsw(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (!kvm_has_mte(vcpu->kvm)) {
|
||||
kvm_inject_undefined(vcpu);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Treat MTE S/W ops as we treat the classic ones: with contempt */
|
||||
return access_dcsw(vcpu, p, r);
|
||||
}
|
||||
|
||||
static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift)
|
||||
{
|
||||
switch (r->aarch32_map) {
|
||||
@ -1756,8 +1769,14 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
|
||||
*/
|
||||
static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
|
||||
{ SYS_DESC(SYS_DC_IGSW), access_dcgsw },
|
||||
{ SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
|
||||
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
|
||||
{ SYS_DESC(SYS_DC_CGSW), access_dcgsw },
|
||||
{ SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
|
||||
{ SYS_DESC(SYS_DC_CISW), access_dcsw },
|
||||
{ SYS_DESC(SYS_DC_CIGSW), access_dcgsw },
|
||||
{ SYS_DESC(SYS_DC_CIGDSW), access_dcgsw },
|
||||
|
||||
DBG_BCR_BVR_WCR_WVR_EL1(0),
|
||||
DBG_BCR_BVR_WCR_WVR_EL1(1),
|
||||
|
@ -235,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
* KVM io device for the redistributor that belongs to this VCPU.
|
||||
*/
|
||||
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
||||
mutex_lock(&vcpu->kvm->arch.config_lock);
|
||||
mutex_lock(&vcpu->kvm->slots_lock);
|
||||
ret = vgic_register_redist_iodev(vcpu);
|
||||
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
||||
mutex_unlock(&vcpu->kvm->slots_lock);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -406,7 +406,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
|
||||
|
||||
/**
|
||||
* vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
|
||||
* is a GICv2. A GICv3 must be explicitly initialized by the guest using the
|
||||
* is a GICv2. A GICv3 must be explicitly initialized by userspace using the
|
||||
* KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
|
||||
* @kvm: kvm struct pointer
|
||||
*/
|
||||
@ -446,11 +446,13 @@ int vgic_lazy_init(struct kvm *kvm)
|
||||
int kvm_vgic_map_resources(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
gpa_t dist_base;
|
||||
int ret = 0;
|
||||
|
||||
if (likely(vgic_ready(kvm)))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
mutex_lock(&kvm->arch.config_lock);
|
||||
if (vgic_ready(kvm))
|
||||
goto out;
|
||||
@ -463,13 +465,26 @@ int kvm_vgic_map_resources(struct kvm *kvm)
|
||||
else
|
||||
ret = vgic_v3_map_resources(kvm);
|
||||
|
||||
if (ret)
|
||||
if (ret) {
|
||||
__kvm_vgic_destroy(kvm);
|
||||
else
|
||||
dist->ready = true;
|
||||
goto out;
|
||||
}
|
||||
dist->ready = true;
|
||||
dist_base = dist->vgic_dist_base;
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
|
||||
ret = vgic_register_dist_iodev(kvm, dist_base,
|
||||
kvm_vgic_global_state.type);
|
||||
if (ret) {
|
||||
kvm_err("Unable to register VGIC dist MMIO regions\n");
|
||||
kvm_vgic_destroy(kvm);
|
||||
}
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return ret;
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1936,6 +1936,7 @@ void vgic_lpi_translation_cache_destroy(struct kvm *kvm)
|
||||
|
||||
static int vgic_its_create(struct kvm_device *dev, u32 type)
|
||||
{
|
||||
int ret;
|
||||
struct vgic_its *its;
|
||||
|
||||
if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
|
||||
@ -1945,9 +1946,12 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
|
||||
if (!its)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&dev->kvm->arch.config_lock);
|
||||
|
||||
if (vgic_initialized(dev->kvm)) {
|
||||
int ret = vgic_v4_init(dev->kvm);
|
||||
ret = vgic_v4_init(dev->kvm);
|
||||
if (ret < 0) {
|
||||
mutex_unlock(&dev->kvm->arch.config_lock);
|
||||
kfree(its);
|
||||
return ret;
|
||||
}
|
||||
@ -1960,12 +1964,10 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
|
||||
|
||||
/* Yep, even more trickery for lock ordering... */
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
mutex_lock(&dev->kvm->arch.config_lock);
|
||||
mutex_lock(&its->cmd_lock);
|
||||
mutex_lock(&its->its_lock);
|
||||
mutex_unlock(&its->its_lock);
|
||||
mutex_unlock(&its->cmd_lock);
|
||||
mutex_unlock(&dev->kvm->arch.config_lock);
|
||||
#endif
|
||||
|
||||
its->vgic_its_base = VGIC_ADDR_UNDEF;
|
||||
@ -1986,7 +1988,11 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
|
||||
|
||||
dev->private = its;
|
||||
|
||||
return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
|
||||
ret = vgic_its_set_abi(its, NR_ITS_ABIS - 1);
|
||||
|
||||
mutex_unlock(&dev->kvm->arch.config_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vgic_its_destroy(struct kvm_device *kvm_dev)
|
||||
|
@ -102,7 +102,11 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
|
||||
if (get_user(addr, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
mutex_lock(&kvm->arch.config_lock);
|
||||
/*
|
||||
* Since we can't hold config_lock while registering the redistributor
|
||||
* iodevs, take the slots_lock immediately.
|
||||
*/
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
switch (attr->attr) {
|
||||
case KVM_VGIC_V2_ADDR_TYPE_DIST:
|
||||
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
@ -182,6 +186,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
mutex_lock(&kvm->arch.config_lock);
|
||||
if (write) {
|
||||
r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size);
|
||||
if (!r)
|
||||
@ -189,9 +194,10 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
|
||||
} else {
|
||||
addr = *addr_ptr;
|
||||
}
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
if (!r && !write)
|
||||
r = put_user(addr, uaddr);
|
||||
|
@ -769,10 +769,13 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
|
||||
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
|
||||
struct vgic_redist_region *rdreg;
|
||||
gpa_t rd_base;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
mutex_lock(&kvm->arch.config_lock);
|
||||
|
||||
if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
|
||||
return 0;
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* We may be creating VCPUs before having set the base address for the
|
||||
@ -782,10 +785,12 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions);
|
||||
if (!rdreg)
|
||||
return 0;
|
||||
goto out_unlock;
|
||||
|
||||
if (!vgic_v3_check_base(kvm))
|
||||
return -EINVAL;
|
||||
if (!vgic_v3_check_base(kvm)) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
vgic_cpu->rdreg = rdreg;
|
||||
vgic_cpu->rdreg_index = rdreg->free_index;
|
||||
@ -799,16 +804,20 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
|
||||
rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
|
||||
rd_dev->redist_vcpu = vcpu;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
|
||||
2 * SZ_64K, &rd_dev->dev);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Protected by slots_lock */
|
||||
rdreg->free_index++;
|
||||
return 0;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
|
||||
@ -834,12 +843,10 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
|
||||
/* The current c failed, so iterate over the previous ones. */
|
||||
int i;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
for (i = 0; i < c; i++) {
|
||||
vcpu = kvm_get_vcpu(kvm, i);
|
||||
vgic_unregister_redist_iodev(vcpu);
|
||||
}
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -938,7 +945,9 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&kvm->arch.config_lock);
|
||||
ret = vgic_v3_alloc_redist_region(kvm, index, addr, count);
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -950,8 +959,10 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
|
||||
if (ret) {
|
||||
struct vgic_redist_region *rdreg;
|
||||
|
||||
mutex_lock(&kvm->arch.config_lock);
|
||||
rdreg = vgic_v3_rdist_region_from_index(kvm, index);
|
||||
vgic_v3_free_redist_region(rdreg);
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1096,7 +1096,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
|
||||
enum vgic_type type)
|
||||
{
|
||||
struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev;
|
||||
int ret = 0;
|
||||
unsigned int len;
|
||||
|
||||
switch (type) {
|
||||
@ -1114,10 +1113,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
|
||||
io_device->iodev_type = IODEV_DIST;
|
||||
io_device->redist_vcpu = NULL;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
|
||||
len, &io_device->dev);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return ret;
|
||||
return kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
|
||||
len, &io_device->dev);
|
||||
}
|
||||
|
@ -312,12 +312,6 @@ int vgic_v2_map_resources(struct kvm *kvm)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2);
|
||||
if (ret) {
|
||||
kvm_err("Unable to register VGIC MMIO regions\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) {
|
||||
ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
|
||||
kvm_vgic_global_state.vcpu_base,
|
||||
|
@ -539,7 +539,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int ret = 0;
|
||||
unsigned long c;
|
||||
|
||||
kvm_for_each_vcpu(c, vcpu, kvm) {
|
||||
@ -569,12 +568,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3);
|
||||
if (ret) {
|
||||
kvm_err("Unable to register VGICv3 dist MMIO regions\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (kvm_vgic_global_state.has_gicv4_1)
|
||||
vgic_v4_configure_vsgis(kvm);
|
||||
|
||||
|
@ -184,13 +184,14 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
/* Must be called with the kvm lock held */
|
||||
void vgic_v4_configure_vsgis(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct kvm_vcpu *vcpu;
|
||||
unsigned long i;
|
||||
|
||||
lockdep_assert_held(&kvm->arch.config_lock);
|
||||
|
||||
kvm_arm_halt_guest(kvm);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
|
@ -228,6 +228,23 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
|
||||
u32 xapic_id = kvm_xapic_id(apic);
|
||||
u32 physical_id;
|
||||
|
||||
/*
|
||||
* For simplicity, KVM always allocates enough space for all possible
|
||||
* xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on
|
||||
* without the optimized map.
|
||||
*/
|
||||
if (WARN_ON_ONCE(xapic_id > new->max_apic_id))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Bail if a vCPU was added and/or enabled its APIC between allocating
|
||||
* the map and doing the actual calculations for the map. Note, KVM
|
||||
* hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if
|
||||
* the compiler decides to reload x2apic_id after this check.
|
||||
*/
|
||||
if (x2apic_id > new->max_apic_id)
|
||||
return -E2BIG;
|
||||
|
||||
/*
|
||||
* Deliberately truncate the vCPU ID when detecting a mismatched APIC
|
||||
* ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a
|
||||
@ -253,8 +270,7 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
|
||||
*/
|
||||
if (vcpu->kvm->arch.x2apic_format) {
|
||||
/* See also kvm_apic_match_physical_addr(). */
|
||||
if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
|
||||
x2apic_id <= new->max_apic_id)
|
||||
if (apic_x2apic_mode(apic) || x2apic_id > 0xff)
|
||||
new->phys_map[x2apic_id] = apic;
|
||||
|
||||
if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
|
||||
|
@ -7091,7 +7091,10 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
|
||||
*/
|
||||
slot = NULL;
|
||||
if (atomic_read(&kvm->nr_memslots_dirty_logging)) {
|
||||
slot = gfn_to_memslot(kvm, sp->gfn);
|
||||
struct kvm_memslots *slots;
|
||||
|
||||
slots = kvm_memslots_for_spte_role(kvm, sp->role);
|
||||
slot = __gfn_to_memslot(slots, sp->gfn);
|
||||
WARN_ON_ONCE(!slot);
|
||||
}
|
||||
|
||||
|
@ -3510,7 +3510,7 @@ static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu)
|
||||
if (!is_vnmi_enabled(svm))
|
||||
return false;
|
||||
|
||||
return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK);
|
||||
return !!(svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK);
|
||||
}
|
||||
|
||||
static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)
|
||||
|
@ -10758,6 +10758,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Note, VM-Exits that go down the "slow" path are accounted below. */
|
||||
++vcpu->stat.exits;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -116,6 +116,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/amx_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
|
||||
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
|
||||
TEST_GEN_PROGS_x86_64 += demand_paging_test
|
||||
TEST_GEN_PROGS_x86_64 += dirty_log_test
|
||||
|
74
tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
Normal file
74
tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
Normal file
@ -0,0 +1,74 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Test edge cases and race conditions in kvm_recalculate_apic_map().
|
||||
*/
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <pthread.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "processor.h"
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "apic.h"
|
||||
|
||||
#define TIMEOUT 5 /* seconds */
|
||||
|
||||
#define LAPIC_DISABLED 0
|
||||
#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
|
||||
#define MAX_XAPIC_ID 0xff
|
||||
|
||||
static void *race(void *arg)
|
||||
{
|
||||
struct kvm_lapic_state lapic = {};
|
||||
struct kvm_vcpu *vcpu = arg;
|
||||
|
||||
while (1) {
|
||||
/* Trigger kvm_recalculate_apic_map(). */
|
||||
vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
|
||||
pthread_testcancel();
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
|
||||
struct kvm_vcpu *vcpuN;
|
||||
struct kvm_vm *vm;
|
||||
pthread_t thread;
|
||||
time_t t;
|
||||
int i;
|
||||
|
||||
kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
|
||||
|
||||
/*
|
||||
* Create the max number of vCPUs supported by selftests so that KVM
|
||||
* has decent amount of work to do when recalculating the map, i.e. to
|
||||
* make the problematic window large enough to hit.
|
||||
*/
|
||||
vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
|
||||
|
||||
/*
|
||||
* Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
|
||||
* due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
|
||||
*/
|
||||
for (i = 0; i < KVM_MAX_VCPUS; i++)
|
||||
vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
|
||||
|
||||
ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
|
||||
|
||||
vcpuN = vcpus[KVM_MAX_VCPUS - 1];
|
||||
for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
|
||||
vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
|
||||
vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
|
||||
}
|
||||
|
||||
ASSERT_EQ(pthread_cancel(thread), 0);
|
||||
ASSERT_EQ(pthread_join(thread, NULL), 0);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user