mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-16 21:35:07 +00:00
ARM:
* Take care of faults occuring between the PARange and IPA range by injecting an exception * Fix S2 faults taken from a host EL0 in protected mode * Work around Oops caused by a PMU access from a 32bit guest when PMU has been created. This is a temporary bodge until we fix it for good. x86: * Fix potential races when walking host page table * Fix shadow page table leak when KVM runs nested * Work around bug in userspace when KVM synthesizes leaf 0x80000021 on older (pre-EPYC) or Intel processors Generic (but affects only RISC-V): * Fix bad user ABI for KVM_EXIT_SYSTEM_EVENT -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmJuxI4UHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroNjfQf/X4Rn6+sTkXRS0UHWEu+q9FjJ+mIx ZUWdbncf0brUB1RPAFfKaiQHo0t2Req+iTlpqZL0nVQ4myNUelHYube/sZdK/aBR WOjKZE0hugGyMH3js2bsTdgzbcphThyYAX97qGZNb7tsPGhBiw7c98KhjxlieJab D8LMNtM3uzPDxg422GfOm8ge2VbpySS5oRoGHfbD+4FiLYlXoCYfZuzlFwFFIGxw uHm5zzfX5jshayFpFYVSJHtARXlpwJWKz9yl63QjHrhVitW4m5j4re3aNfboL6Pd F5Z9K+DKhJLAH5cqmgiPPe2CGMvmRwKrN3F9MqV91xDPBT8J4rrowEeboQ== =SwSU -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "ARM: - Take care of faults occuring between the PARange and IPA range by injecting an exception - Fix S2 faults taken from a host EL0 in protected mode - Work around Oops caused by a PMU access from a 32bit guest when PMU has been created. This is a temporary bodge until we fix it for good. x86: - Fix potential races when walking host page table - Fix shadow page table leak when KVM runs nested - Work around bug in userspace when KVM synthesizes leaf 0x80000021 on older (pre-EPYC) or Intel processors Generic (but affects only RISC-V): - Fix bad user ABI for KVM_EXIT_SYSTEM_EVENT" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: work around QEMU issue with synthetic CPUID leaves Revert "x86/mm: Introduce lookup_address_in_mm()" KVM: x86/mmu: fix potential races when walking host page table KVM: fix bad user ABI for KVM_EXIT_SYSTEM_EVENT KVM: x86/mmu: Do not create SPTEs for GFNs that exceed host.MAXPHYADDR KVM: arm64: Inject exception on out-of-IPA-range translation fault KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set KVM: arm64: Handle host stage-2 faults from 32-bit EL0
This commit is contained in:
commit
b6b2648911
@ -5986,16 +5986,16 @@ should put the acknowledged interrupt vector into the 'epr' field.
|
||||
#define KVM_SYSTEM_EVENT_RESET 2
|
||||
#define KVM_SYSTEM_EVENT_CRASH 3
|
||||
__u32 type;
|
||||
__u64 flags;
|
||||
__u32 ndata;
|
||||
__u64 data[16];
|
||||
} system_event;
|
||||
|
||||
If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
|
||||
a system-level event using some architecture specific mechanism (hypercall
|
||||
or some special instruction). In case of ARM64, this is triggered using
|
||||
HVC instruction based PSCI call from the vcpu. The 'type' field describes
|
||||
the system-level event type. The 'flags' field describes architecture
|
||||
specific flags for the system-level event.
|
||||
HVC instruction based PSCI call from the vcpu.
|
||||
|
||||
The 'type' field describes the system-level event type.
|
||||
Valid values for 'type' are:
|
||||
|
||||
- KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
|
||||
@ -6010,10 +6010,20 @@ Valid values for 'type' are:
|
||||
to ignore the request, or to gather VM memory core dump and/or
|
||||
reset/shutdown of the VM.
|
||||
|
||||
Valid flags are:
|
||||
If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain
|
||||
architecture specific information for the system-level event. Only
|
||||
the first `ndata` items (possibly zero) of the data array are valid.
|
||||
|
||||
- KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (arm64 only) -- the guest issued
|
||||
a SYSTEM_RESET2 call according to v1.1 of the PSCI specification.
|
||||
- for arm64, data[0] is set to KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 if
|
||||
the guest issued a SYSTEM_RESET2 call according to v1.1 of the PSCI
|
||||
specification.
|
||||
|
||||
- for RISC-V, data[0] is set to the value of the second argument of the
|
||||
``sbi_system_reset`` call.
|
||||
|
||||
Previous versions of Linux defined a `flags` member in this struct. The
|
||||
field is now aliased to `data[0]`. Userspace can assume that it is only
|
||||
written if ndata is greater than 0.
|
||||
|
||||
::
|
||||
|
||||
|
@ -40,6 +40,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
|
||||
void kvm_inject_vabt(struct kvm_vcpu *vcpu);
|
||||
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
|
||||
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
|
||||
void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -198,15 +198,15 @@ SYM_CODE_START(__kvm_hyp_host_vector)
|
||||
invalid_host_el2_vect // FIQ EL2h
|
||||
invalid_host_el2_vect // Error EL2h
|
||||
|
||||
host_el1_sync_vect // Synchronous 64-bit EL1
|
||||
invalid_host_el1_vect // IRQ 64-bit EL1
|
||||
invalid_host_el1_vect // FIQ 64-bit EL1
|
||||
invalid_host_el1_vect // Error 64-bit EL1
|
||||
host_el1_sync_vect // Synchronous 64-bit EL1/EL0
|
||||
invalid_host_el1_vect // IRQ 64-bit EL1/EL0
|
||||
invalid_host_el1_vect // FIQ 64-bit EL1/EL0
|
||||
invalid_host_el1_vect // Error 64-bit EL1/EL0
|
||||
|
||||
invalid_host_el1_vect // Synchronous 32-bit EL1
|
||||
invalid_host_el1_vect // IRQ 32-bit EL1
|
||||
invalid_host_el1_vect // FIQ 32-bit EL1
|
||||
invalid_host_el1_vect // Error 32-bit EL1
|
||||
host_el1_sync_vect // Synchronous 32-bit EL1/EL0
|
||||
invalid_host_el1_vect // IRQ 32-bit EL1/EL0
|
||||
invalid_host_el1_vect // FIQ 32-bit EL1/EL0
|
||||
invalid_host_el1_vect // Error 32-bit EL1/EL0
|
||||
SYM_CODE_END(__kvm_hyp_host_vector)
|
||||
|
||||
/*
|
||||
|
@ -145,6 +145,34 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
|
||||
inject_abt64(vcpu, true, addr);
|
||||
}
|
||||
|
||||
void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long addr, esr;
|
||||
|
||||
addr = kvm_vcpu_get_fault_ipa(vcpu);
|
||||
addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
|
||||
|
||||
if (kvm_vcpu_trap_is_iabt(vcpu))
|
||||
kvm_inject_pabt(vcpu, addr);
|
||||
else
|
||||
kvm_inject_dabt(vcpu, addr);
|
||||
|
||||
/*
|
||||
* If AArch64 or LPAE, set FSC to 0 to indicate an Address
|
||||
* Size Fault at level 0, as if exceeding PARange.
|
||||
*
|
||||
* Non-LPAE guests will only get the external abort, as there
|
||||
* is no way to to describe the ASF.
|
||||
*/
|
||||
if (vcpu_el1_is_32bit(vcpu) &&
|
||||
!(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
|
||||
return;
|
||||
|
||||
esr = vcpu_read_sys_reg(vcpu, ESR_EL1);
|
||||
esr &= ~GENMASK_ULL(5, 0);
|
||||
vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_inject_undefined - inject an undefined instruction into the guest
|
||||
* @vcpu: The vCPU in which to inject the exception
|
||||
|
@ -1337,6 +1337,25 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
|
||||
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
|
||||
|
||||
if (fault_status == FSC_FAULT) {
|
||||
/* Beyond sanitised PARange (which is the IPA limit) */
|
||||
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
|
||||
kvm_inject_size_fault(vcpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Falls between the IPA range and the PARange? */
|
||||
if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) {
|
||||
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
|
||||
|
||||
if (is_iabt)
|
||||
kvm_inject_pabt(vcpu, fault_ipa);
|
||||
else
|
||||
kvm_inject_dabt(vcpu, fault_ipa);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Synchronous External Abort? */
|
||||
if (kvm_vcpu_abt_issea(vcpu)) {
|
||||
/*
|
||||
|
@ -177,6 +177,9 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return 0;
|
||||
|
||||
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
|
||||
|
||||
if (kvm_pmu_pmc_is_chained(pmc) &&
|
||||
@ -198,6 +201,9 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
|
||||
{
|
||||
u64 reg;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return;
|
||||
|
||||
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
|
||||
__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
|
||||
@ -322,6 +328,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return;
|
||||
|
||||
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
|
||||
return;
|
||||
|
||||
@ -357,7 +366,7 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
if (!val)
|
||||
if (!kvm_vcpu_has_pmu(vcpu) || !val)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
|
||||
@ -527,6 +536,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
int i;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return;
|
||||
|
||||
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
|
||||
return;
|
||||
|
||||
@ -576,6 +588,9 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return;
|
||||
|
||||
if (val & ARMV8_PMU_PMCR_E) {
|
||||
kvm_pmu_enable_counter_mask(vcpu,
|
||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
|
||||
@ -739,6 +754,9 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
|
||||
{
|
||||
u64 reg, mask;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return;
|
||||
|
||||
mask = ARMV8_PMU_EVTYPE_MASK;
|
||||
mask &= ~ARMV8_PMU_EVTYPE_EVENT;
|
||||
mask |= kvm_pmu_event_mask(vcpu->kvm);
|
||||
@ -827,6 +845,9 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
|
||||
u64 val, mask = 0;
|
||||
int base, i, nr_events;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return 0;
|
||||
|
||||
if (!pmceid1) {
|
||||
val = read_sysreg(pmceid0_el0);
|
||||
base = 0;
|
||||
|
@ -181,7 +181,8 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
|
||||
|
||||
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
|
||||
vcpu->run->system_event.type = type;
|
||||
vcpu->run->system_event.flags = flags;
|
||||
vcpu->run->system_event.ndata = 1;
|
||||
vcpu->run->system_event.data[0] = flags;
|
||||
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
|
||||
}
|
||||
|
||||
|
@ -83,7 +83,7 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
|
||||
void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *run,
|
||||
u32 type, u64 flags)
|
||||
u32 type, u64 reason)
|
||||
{
|
||||
unsigned long i;
|
||||
struct kvm_vcpu *tmp;
|
||||
@ -94,7 +94,8 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
|
||||
|
||||
memset(&run->system_event, 0, sizeof(run->system_event));
|
||||
run->system_event.type = type;
|
||||
run->system_event.flags = flags;
|
||||
run->system_event.ndata = 1;
|
||||
run->system_event.data[0] = reason;
|
||||
run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
|
||||
}
|
||||
|
||||
|
@ -559,10 +559,6 @@ static inline void update_page_count(int level, unsigned long pages) { }
|
||||
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
|
||||
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
|
||||
unsigned int *level);
|
||||
|
||||
struct mm_struct;
|
||||
extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
|
||||
unsigned int *level);
|
||||
extern pmd_t *lookup_pmd_address(unsigned long address);
|
||||
extern phys_addr_t slow_virt_to_phys(void *__address);
|
||||
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
|
||||
|
@ -1085,12 +1085,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
case 0x80000000:
|
||||
entry->eax = min(entry->eax, 0x80000021);
|
||||
/*
|
||||
* Serializing LFENCE is reported in a multitude of ways,
|
||||
* and NullSegClearsBase is not reported in CPUID on Zen2;
|
||||
* help userspace by providing the CPUID leaf ourselves.
|
||||
* Serializing LFENCE is reported in a multitude of ways, and
|
||||
* NullSegClearsBase is not reported in CPUID on Zen2; help
|
||||
* userspace by providing the CPUID leaf ourselves.
|
||||
*
|
||||
* However, only do it if the host has CPUID leaf 0x8000001d.
|
||||
* QEMU thinks that it can query the host blindly for that
|
||||
* CPUID leaf if KVM reports that it supports 0x8000001d or
|
||||
* above. The processor merrily returns values from the
|
||||
* highest Intel leaf which QEMU tries to use as the guest's
|
||||
* 0x8000001d. Even worse, this can result in an infinite
|
||||
* loop if said highest leaf has no subleaves indexed by ECX.
|
||||
*/
|
||||
if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC)
|
||||
|| !static_cpu_has_bug(X86_BUG_NULL_SEG))
|
||||
if (entry->eax >= 0x8000001d &&
|
||||
(static_cpu_has(X86_FEATURE_LFENCE_RDTSC)
|
||||
|| !static_cpu_has_bug(X86_BUG_NULL_SEG)))
|
||||
entry->eax = max(entry->eax, 0x80000021);
|
||||
break;
|
||||
case 0x80000001:
|
||||
|
@ -65,6 +65,30 @@ static __always_inline u64 rsvd_bits(int s, int e)
|
||||
return ((2ULL << (e - s)) - 1) << s;
|
||||
}
|
||||
|
||||
/*
|
||||
* The number of non-reserved physical address bits irrespective of features
|
||||
* that repurpose legal bits, e.g. MKTME.
|
||||
*/
|
||||
extern u8 __read_mostly shadow_phys_bits;
|
||||
|
||||
static inline gfn_t kvm_mmu_max_gfn(void)
|
||||
{
|
||||
/*
|
||||
* Note that this uses the host MAXPHYADDR, not the guest's.
|
||||
* EPT/NPT cannot support GPAs that would exceed host.MAXPHYADDR;
|
||||
* assuming KVM is running on bare metal, guest accesses beyond
|
||||
* host.MAXPHYADDR will hit a #PF(RSVD) and never cause a vmexit
|
||||
* (either EPT Violation/Misconfig or #NPF), and so KVM will never
|
||||
* install a SPTE for such addresses. If KVM is running as a VM
|
||||
* itself, on the other hand, it might see a MAXPHYADDR that is less
|
||||
* than hardware's real MAXPHYADDR. Using the host MAXPHYADDR
|
||||
* disallows such SPTEs entirely and simplifies the TDP MMU.
|
||||
*/
|
||||
int max_gpa_bits = likely(tdp_enabled) ? shadow_phys_bits : 52;
|
||||
|
||||
return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1;
|
||||
}
|
||||
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
|
||||
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
|
||||
|
||||
|
@ -2804,8 +2804,12 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
|
||||
const struct kvm_memory_slot *slot)
|
||||
{
|
||||
unsigned long hva;
|
||||
pte_t *pte;
|
||||
int level;
|
||||
unsigned long flags;
|
||||
int level = PG_LEVEL_4K;
|
||||
pgd_t pgd;
|
||||
p4d_t p4d;
|
||||
pud_t pud;
|
||||
pmd_t pmd;
|
||||
|
||||
if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn))
|
||||
return PG_LEVEL_4K;
|
||||
@ -2820,10 +2824,43 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
|
||||
*/
|
||||
hva = __gfn_to_hva_memslot(slot, gfn);
|
||||
|
||||
pte = lookup_address_in_mm(kvm->mm, hva, &level);
|
||||
if (unlikely(!pte))
|
||||
return PG_LEVEL_4K;
|
||||
/*
|
||||
* Lookup the mapping level in the current mm. The information
|
||||
* may become stale soon, but it is safe to use as long as
|
||||
* 1) mmu_notifier_retry was checked after taking mmu_lock, and
|
||||
* 2) mmu_lock is taken now.
|
||||
*
|
||||
* We still need to disable IRQs to prevent concurrent tear down
|
||||
* of page tables.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
pgd = READ_ONCE(*pgd_offset(kvm->mm, hva));
|
||||
if (pgd_none(pgd))
|
||||
goto out;
|
||||
|
||||
p4d = READ_ONCE(*p4d_offset(&pgd, hva));
|
||||
if (p4d_none(p4d) || !p4d_present(p4d))
|
||||
goto out;
|
||||
|
||||
pud = READ_ONCE(*pud_offset(&p4d, hva));
|
||||
if (pud_none(pud) || !pud_present(pud))
|
||||
goto out;
|
||||
|
||||
if (pud_large(pud)) {
|
||||
level = PG_LEVEL_1G;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pmd = READ_ONCE(*pmd_offset(&pud, hva));
|
||||
if (pmd_none(pmd) || !pmd_present(pmd))
|
||||
goto out;
|
||||
|
||||
if (pmd_large(pmd))
|
||||
level = PG_LEVEL_2M;
|
||||
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
return level;
|
||||
}
|
||||
|
||||
@ -2992,9 +3029,15 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
|
||||
/*
|
||||
* If MMIO caching is disabled, emulate immediately without
|
||||
* touching the shadow page tables as attempting to install an
|
||||
* MMIO SPTE will just be an expensive nop.
|
||||
* MMIO SPTE will just be an expensive nop. Do not cache MMIO
|
||||
* whose gfn is greater than host.MAXPHYADDR, any guest that
|
||||
* generates such gfns is running nested and is being tricked
|
||||
* by L0 userspace (you can observe gfn > L1.MAXPHYADDR if
|
||||
* and only if L1's MAXPHYADDR is inaccurate with respect to
|
||||
* the hardware's).
|
||||
*/
|
||||
if (unlikely(!shadow_mmio_value)) {
|
||||
if (unlikely(!shadow_mmio_value) ||
|
||||
unlikely(fault->gfn > kvm_mmu_max_gfn())) {
|
||||
*ret_val = RET_PF_EMULATE;
|
||||
return true;
|
||||
}
|
||||
|
@ -201,12 +201,6 @@ static inline bool is_removed_spte(u64 spte)
|
||||
*/
|
||||
extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
|
||||
|
||||
/*
|
||||
* The number of non-reserved physical address bits irrespective of features
|
||||
* that repurpose legal bits, e.g. MKTME.
|
||||
*/
|
||||
extern u8 __read_mostly shadow_phys_bits;
|
||||
|
||||
static inline bool is_mmio_spte(u64 spte)
|
||||
{
|
||||
return (spte & shadow_mmio_mask) == shadow_mmio_value &&
|
||||
|
@ -815,14 +815,15 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
||||
return iter->yielded;
|
||||
}
|
||||
|
||||
static inline gfn_t tdp_mmu_max_gfn_host(void)
|
||||
static inline gfn_t tdp_mmu_max_gfn_exclusive(void)
|
||||
{
|
||||
/*
|
||||
* Bound TDP MMU walks at host.MAXPHYADDR, guest accesses beyond that
|
||||
* will hit a #PF(RSVD) and never hit an EPT Violation/Misconfig / #NPF,
|
||||
* and so KVM will never install a SPTE for such addresses.
|
||||
* Bound TDP MMU walks at host.MAXPHYADDR. KVM disallows memslots with
|
||||
* a gpa range that would exceed the max gfn, and KVM does not create
|
||||
* MMIO SPTEs for "impossible" gfns, instead sending such accesses down
|
||||
* the slow emulation path every time.
|
||||
*/
|
||||
return 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
||||
return kvm_mmu_max_gfn() + 1;
|
||||
}
|
||||
|
||||
static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
@ -830,7 +831,7 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
{
|
||||
struct tdp_iter iter;
|
||||
|
||||
gfn_t end = tdp_mmu_max_gfn_host();
|
||||
gfn_t end = tdp_mmu_max_gfn_exclusive();
|
||||
gfn_t start = 0;
|
||||
|
||||
for_each_tdp_pte_min_level(iter, root, zap_level, start, end) {
|
||||
@ -923,7 +924,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
{
|
||||
struct tdp_iter iter;
|
||||
|
||||
end = min(end, tdp_mmu_max_gfn_host());
|
||||
end = min(end, tdp_mmu_max_gfn_exclusive());
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
|
@ -10020,12 +10020,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
|
||||
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
|
||||
vcpu->run->system_event.ndata = 0;
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
|
||||
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
|
||||
vcpu->run->system_event.ndata = 0;
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
@ -12009,8 +12011,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
|
||||
if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) {
|
||||
if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn())
|
||||
return -EINVAL;
|
||||
|
||||
return kvm_alloc_memslot_metadata(kvm, new);
|
||||
}
|
||||
|
||||
if (change == KVM_MR_FLAGS_ONLY)
|
||||
memcpy(&new->arch, &old->arch, sizeof(old->arch));
|
||||
|
@ -638,17 +638,6 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lookup_address);
|
||||
|
||||
/*
|
||||
* Lookup the page table entry for a virtual address in a given mm. Return a
|
||||
* pointer to the entry and the level of the mapping.
|
||||
*/
|
||||
pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
|
||||
unsigned int *level)
|
||||
{
|
||||
return lookup_address_in_pgd(pgd_offset(mm, address), address, level);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lookup_address_in_mm);
|
||||
|
||||
static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
|
||||
unsigned int *level)
|
||||
{
|
||||
|
@ -445,7 +445,13 @@ struct kvm_run {
|
||||
#define KVM_SYSTEM_EVENT_RESET 2
|
||||
#define KVM_SYSTEM_EVENT_CRASH 3
|
||||
__u32 type;
|
||||
__u64 flags;
|
||||
__u32 ndata;
|
||||
union {
|
||||
#ifndef __KERNEL__
|
||||
__u64 flags;
|
||||
#endif
|
||||
__u64 data[16];
|
||||
};
|
||||
} system_event;
|
||||
/* KVM_EXIT_S390_STSI */
|
||||
struct {
|
||||
@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_S390_MEM_OP_EXTENSION 211
|
||||
#define KVM_CAP_PMU_CAPABILITY 212
|
||||
#define KVM_CAP_DISABLE_QUIRKS2 213
|
||||
/* #define KVM_CAP_VM_TSC_CONTROL 214 */
|
||||
#define KVM_CAP_SYSTEM_EVENT_DATA 215
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -4354,6 +4354,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
||||
return 0;
|
||||
#endif
|
||||
case KVM_CAP_BINARY_STATS_FD:
|
||||
case KVM_CAP_SYSTEM_EVENT_DATA:
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user