Merge branch kvm-arm64/nv-prefix into kvmarm/next

* kvm-arm64/nv-prefix:
  : Preamble to NV support, courtesy of Marc Zyngier.
  :
  : This brings in a set of prerequisite patches for supporting nested
  : virtualization in KVM/arm64. Of course, there is a long way to go until
  : NV is actually enabled in KVM.
  :
  :  - Introduce cpucap / vCPU feature flag to pivot the NV code on
  :
  :  - Add support for EL2 vCPU register state
  :
  :  - Basic nested exception handling
  :
  :  - Hide unsupported features from the ID registers for NV-capable VMs
  KVM: arm64: nv: Use reg_to_encoding() to get sysreg ID
  KVM: arm64: nv: Only toggle cache for virtual EL2 when SCTLR_EL2 changes
  KVM: arm64: nv: Filter out unsupported features from ID regs
  KVM: arm64: nv: Emulate EL12 register accesses from the virtual EL2
  KVM: arm64: nv: Allow a sysreg to be hidden from userspace only
  KVM: arm64: nv: Emulate PSTATE.M for a guest hypervisor
  KVM: arm64: nv: Add accessors for SPSR_EL1, ELR_EL1 and VBAR_EL1 from virtual EL2
  KVM: arm64: nv: Handle SMCs taken from virtual EL2
  KVM: arm64: nv: Handle trapped ERET from virtual EL2
  KVM: arm64: nv: Inject HVC exceptions to the virtual EL2
  KVM: arm64: nv: Support virtual EL2 exceptions
  KVM: arm64: nv: Handle HCR_EL2.NV system register traps
  KVM: arm64: nv: Add nested virt VCPU primitives for vEL2 VCPU state
  KVM: arm64: nv: Add EL2 system registers to vcpu context
  KVM: arm64: nv: Allow userspace to set PSR_MODE_EL2x
  KVM: arm64: nv: Reset VCPU to EL2 registers if VCPU nested virt is set
  KVM: arm64: nv: Introduce nested virtualization VCPU feature
  KVM: arm64: Use the S2 MMU context to iterate over S2 table
  arm64: Add ARM64_HAS_NESTED_VIRT cpufeature

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
This commit is contained in:
Oliver Upton 2023-02-13 23:33:41 +00:00
commit 0d3b2b4d23
26 changed files with 1042 additions and 47 deletions

View File

@ -2553,9 +2553,14 @@
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
nested: VHE-based mode with support for nested
virtualization. Requires at least ARMv8.3
hardware.
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation
for the host.
for the host. "nested" is experimental and should be
used with extreme caution.
kvm-arm.vgic_v3_group0_trap=
[KVM,ARM] Trap guest accesses to GICv3 group-0

View File

@ -272,6 +272,10 @@
(((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \
ESR_ELx_SYS64_ISS_OP2_SHIFT))
/* ISS field definitions for ERET/ERETAA/ERETAB trapping */
#define ESR_ELx_ERET_ISS_ERET 0x2
#define ESR_ELx_ERET_ISS_ERETA 0x1
/*
* ISS field definitions for floating-point exception traps
* (FP_EXC_32/FP_EXC_64).

View File

@ -345,9 +345,26 @@
ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
CPACR_EL1_ZEN_EL1EN)
#define kvm_mode_names \
{ PSR_MODE_EL0t, "EL0t" }, \
{ PSR_MODE_EL1t, "EL1t" }, \
{ PSR_MODE_EL1h, "EL1h" }, \
{ PSR_MODE_EL2t, "EL2t" }, \
{ PSR_MODE_EL2h, "EL2h" }, \
{ PSR_MODE_EL3t, "EL3t" }, \
{ PSR_MODE_EL3h, "EL3h" }, \
{ PSR_AA32_MODE_USR, "32-bit USR" }, \
{ PSR_AA32_MODE_FIQ, "32-bit FIQ" }, \
{ PSR_AA32_MODE_IRQ, "32-bit IRQ" }, \
{ PSR_AA32_MODE_SVC, "32-bit SVC" }, \
{ PSR_AA32_MODE_ABT, "32-bit ABT" }, \
{ PSR_AA32_MODE_HYP, "32-bit HYP" }, \
{ PSR_AA32_MODE_UND, "32-bit UND" }, \
{ PSR_AA32_MODE_SYS, "32-bit SYS" }
#endif /* __ARM64_KVM_ARM_H__ */

View File

@ -33,6 +33,12 @@ enum exception_type {
except_type_serror = 0x180,
};
#define kvm_exception_type_names \
{ except_type_sync, "SYNC" }, \
{ except_type_irq, "IRQ" }, \
{ except_type_fiq, "FIQ" }, \
{ except_type_serror, "SERROR" }
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
void kvm_skip_instr32(struct kvm_vcpu *vcpu);
@ -44,6 +50,10 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
@ -179,6 +189,62 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
vcpu_gp_regs(vcpu)->regs[reg_num] = val;
}
static inline bool vcpu_is_el2_ctxt(const struct kvm_cpu_context *ctxt)
{
switch (ctxt->regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) {
case PSR_MODE_EL2h:
case PSR_MODE_EL2t:
return true;
default:
return false;
}
}
static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
{
return vcpu_is_el2_ctxt(&vcpu->arch.ctxt);
}
static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt)
{
return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H;
}
static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
{
return __vcpu_el2_e2h_is_set(&vcpu->arch.ctxt);
}
static inline bool __vcpu_el2_tge_is_set(const struct kvm_cpu_context *ctxt)
{
return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_TGE;
}
static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
{
return __vcpu_el2_tge_is_set(&vcpu->arch.ctxt);
}
static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
{
/*
* We are in a hypervisor context if the vcpu mode is EL2 or
* E2H and TGE bits are set. The latter means we are in the user space
* of the VHE kernel. ARMv8.1 ARM describes this as 'InHost'
*
* Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the
* rest of the KVM code, and will result in a misbehaving guest.
*/
return vcpu_is_el2_ctxt(ctxt) ||
(__vcpu_el2_e2h_is_set(ctxt) && __vcpu_el2_tge_is_set(ctxt)) ||
__vcpu_el2_tge_is_set(ctxt);
}
static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
{
return __is_hyp_ctxt(&vcpu->arch.ctxt);
}
/*
* The layout of SPSR for an AArch32 state is different when observed from an
* AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32

View File

@ -60,9 +60,14 @@
enum kvm_mode {
KVM_MODE_DEFAULT,
KVM_MODE_PROTECTED,
KVM_MODE_NV,
KVM_MODE_NONE,
};
#ifdef CONFIG_KVM
enum kvm_mode kvm_get_mode(void);
#else
static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
#endif
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
@ -321,12 +326,43 @@ enum vcpu_sysreg {
TFSR_EL1, /* Tag Fault Status Register (EL1) */
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
/* 32bit specific registers. Keep them at the end of the range */
/* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
FPEXC32_EL2, /* Floating-Point Exception Control Register */
DBGVCR32_EL2, /* Debug Vector Catch Register */
/* EL2 registers */
VPIDR_EL2, /* Virtualization Processor ID Register */
VMPIDR_EL2, /* Virtualization Multiprocessor ID Register */
SCTLR_EL2, /* System Control Register (EL2) */
ACTLR_EL2, /* Auxiliary Control Register (EL2) */
HCR_EL2, /* Hypervisor Configuration Register */
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
HSTR_EL2, /* Hypervisor System Trap Register */
HACR_EL2, /* Hypervisor Auxiliary Control Register */
TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
TCR_EL2, /* Translation Control Register (EL2) */
VTTBR_EL2, /* Virtualization Translation Table Base Register */
VTCR_EL2, /* Virtualization Translation Control Register */
SPSR_EL2, /* EL2 saved program status register */
ELR_EL2, /* EL2 exception link register */
AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */
AFSR1_EL2, /* Auxiliary Fault Status Register 1 (EL2) */
ESR_EL2, /* Exception Syndrome Register (EL2) */
FAR_EL2, /* Fault Address Register (EL2) */
HPFAR_EL2, /* Hypervisor IPA Fault Address Register */
MAIR_EL2, /* Memory Attribute Indirection Register (EL2) */
AMAIR_EL2, /* Auxiliary Memory Attribute Indirection Register (EL2) */
VBAR_EL2, /* Vector Base Address Register (EL2) */
RVBAR_EL2, /* Reset Vector Base Address Register */
CONTEXTIDR_EL2, /* Context ID Register (EL2) */
TPIDR_EL2, /* EL2 Software Thread ID Register */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
NR_SYS_REGS /* Nothing after this line! */
};
@ -602,7 +638,7 @@ struct kvm_vcpu_arch {
#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1)
#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2)
#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3)
/* For AArch64 with NV (one day): */
/* For AArch64 with NV: */
#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4)
#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
@ -613,6 +649,8 @@ struct kvm_vcpu_arch {
#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
/* Save TRBE context if active */
#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
/* vcpu running in HYP context */
#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7))
/* SVE enabled for host EL0 */
#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))

View File

@ -115,6 +115,7 @@ alternative_cb_end
#include <asm/cache.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
void kvm_update_va_mask(struct alt_instr *alt,
@ -192,7 +193,15 @@ struct kvm;
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
u64 cache_bits = SCTLR_ELx_M | SCTLR_ELx_C;
int reg;
if (vcpu_is_el2(vcpu))
reg = SCTLR_EL2;
else
reg = SCTLR_EL1;
return (vcpu_read_sys_reg(vcpu, reg) & cache_bits) == cache_bits;
}
static inline void __clean_dcache_guest_page(void *va, size_t size)

View File

@ -0,0 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ARM64_KVM_NESTED_H
#define __ARM64_KVM_NESTED_H
#include <linux/kvm_host.h>
static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
{
return (!__is_defined(__KVM_NVHE_HYPERVISOR__) &&
cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
}
struct sys_reg_params;
struct sys_reg_desc;
void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
const struct sys_reg_desc *r);
#endif /* __ARM64_KVM_NESTED_H */

View File

@ -489,23 +489,51 @@
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0)
#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5)
#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1)
#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1)
#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2)
#define SYS_HSTR_EL2 sys_reg(3, 4, 1, 1, 3)
#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4)
#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5)
#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6)
#define SYS_HACR_EL2 sys_reg(3, 4, 1, 1, 7)
#define SYS_TTBR0_EL2 sys_reg(3, 4, 2, 0, 0)
#define SYS_TTBR1_EL2 sys_reg(3, 4, 2, 0, 1)
#define SYS_TCR_EL2 sys_reg(3, 4, 2, 0, 2)
#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0)
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0)
#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1)
#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0)
#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0)
#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0)
#define SYS_HPFAR_EL2 sys_reg(3, 4, 6, 0, 4)
#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0)
#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0)
#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0)
#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1)
#define SYS_RMR_EL2 sys_reg(3, 4, 12, 0, 2)
#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0)
#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1)
@ -547,6 +575,12 @@
#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6)
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1)
#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2)
#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3)
#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
/* VHE encodings for architectural EL0/1 system registers */
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
@ -569,6 +603,8 @@
#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1)
#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2)
#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0)
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))

View File

@ -109,6 +109,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */
struct kvm_vcpu_init {
__u32 target;

View File

@ -1966,6 +1966,20 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
}
static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
int scope)
{
if (kvm_get_mode() != KVM_MODE_NV)
return false;
if (!has_cpuid_feature(cap, scope)) {
pr_warn("unavailable: %s\n", cap->desc);
return false;
}
return true;
}
#ifdef CONFIG_ARM64_PAN
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
{
@ -2225,6 +2239,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = runs_at_el2,
.cpu_enable = cpu_copy_el2regs,
},
{
.desc = "Nested Virtualization Support",
.capability = ARM64_HAS_NESTED_VIRT,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_nested_virt_support,
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64MMFR2_EL1_NV_SHIFT,
.field_width = 4,
.min_field_value = ID_AA64MMFR2_EL1_NV_IMP,
},
{
.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,

View File

@ -14,7 +14,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
arch_timer.o trng.o vmid.o \
arch_timer.o trng.o vmid.o emulate-nested.o nested.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \

View File

@ -2322,6 +2322,11 @@ static int __init early_kvm_mode_cfg(char *arg)
return 0;
}
if (strcmp(arg, "nested") == 0 && !WARN_ON(!is_kernel_in_hyp_mode())) {
kvm_mode = KVM_MODE_NV;
return 0;
}
return -EINVAL;
}
early_param("kvm-arm.mode", early_kvm_mode_cfg);

View File

@ -0,0 +1,203 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2016 - Linaro and Columbia University
* Author: Jintack Lim <jintack.lim@linaro.org>
*/
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_nested.h>
#include "hyp/include/hyp/adjust_pc.h"
#include "trace.h"
static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
{
u64 mode = spsr & PSR_MODE_MASK;
/*
* Possible causes for an Illegal Exception Return from EL2:
* - trying to return to EL3
* - trying to return to an illegal M value
* - trying to return to a 32bit EL
* - trying to return to EL1 with HCR_EL2.TGE set
*/
if (mode == PSR_MODE_EL3t || mode == PSR_MODE_EL3h ||
mode == 0b00001 || (mode & BIT(1)) ||
(spsr & PSR_MODE32_BIT) ||
(vcpu_el2_tge_is_set(vcpu) && (mode == PSR_MODE_EL1t ||
mode == PSR_MODE_EL1h))) {
/*
* The guest is playing with our nerves. Preserve EL, SP,
* masks, flags from the existing PSTATE, and set IL.
* The HW will then generate an Illegal State Exception
* immediately after ERET.
*/
spsr = *vcpu_cpsr(vcpu);
spsr &= (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT |
PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT |
PSR_MODE_MASK | PSR_MODE32_BIT);
spsr |= PSR_IL_BIT;
}
return spsr;
}
void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
{
u64 spsr, elr, mode;
bool direct_eret;
/*
* Going through the whole put/load motions is a waste of time
* if this is a VHE guest hypervisor returning to its own
* userspace, or the hypervisor performing a local exception
* return. No need to save/restore registers, no need to
* switch S2 MMU. Just do the canonical ERET.
*/
spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
spsr = kvm_check_illegal_exception_return(vcpu, spsr);
mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
direct_eret = (mode == PSR_MODE_EL0t &&
vcpu_el2_e2h_is_set(vcpu) &&
vcpu_el2_tge_is_set(vcpu));
direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
if (direct_eret) {
*vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2);
*vcpu_cpsr(vcpu) = spsr;
trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr);
return;
}
preempt_disable();
kvm_arch_vcpu_put(vcpu);
elr = __vcpu_sys_reg(vcpu, ELR_EL2);
trace_kvm_nested_eret(vcpu, elr, spsr);
/*
* Note that the current exception level is always the virtual EL2,
* since we set HCR_EL2.NV bit only when entering the virtual EL2.
*/
*vcpu_pc(vcpu) = elr;
*vcpu_cpsr(vcpu) = spsr;
kvm_arch_vcpu_load(vcpu, smp_processor_id());
preempt_enable();
}
static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
enum exception_type type)
{
trace_kvm_inject_nested_exception(vcpu, esr_el2, type);
switch (type) {
case except_type_sync:
kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
vcpu_write_sys_reg(vcpu, esr_el2, ESR_EL2);
break;
case except_type_irq:
kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_IRQ);
break;
default:
WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type);
}
}
/*
* Emulate taking an exception to EL2.
* See ARM ARM J8.1.2 AArch64.TakeException()
*/
static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
enum exception_type type)
{
u64 pstate, mode;
bool direct_inject;
if (!vcpu_has_nv(vcpu)) {
kvm_err("Unexpected call to %s for the non-nesting configuration\n",
__func__);
return -EINVAL;
}
/*
* As for ERET, we can avoid doing too much on the injection path by
* checking that we either took the exception from a VHE host
* userspace or from vEL2. In these cases, there is no change in
* translation regime (or anything else), so let's do as little as
* possible.
*/
pstate = *vcpu_cpsr(vcpu);
mode = pstate & (PSR_MODE_MASK | PSR_MODE32_BIT);
direct_inject = (mode == PSR_MODE_EL0t &&
vcpu_el2_e2h_is_set(vcpu) &&
vcpu_el2_tge_is_set(vcpu));
direct_inject |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
if (direct_inject) {
kvm_inject_el2_exception(vcpu, esr_el2, type);
return 1;
}
preempt_disable();
/*
* We may have an exception or PC update in the EL0/EL1 context.
* Commit it before entering EL2.
*/
__kvm_adjust_pc(vcpu);
kvm_arch_vcpu_put(vcpu);
kvm_inject_el2_exception(vcpu, esr_el2, type);
/*
* A hard requirement is that a switch between EL1 and EL2
* contexts has to happen between a put/load, so that we can
* pick the correct timer and interrupt configuration, among
* other things.
*
* Make sure the exception actually took place before we load
* the new context.
*/
__kvm_adjust_pc(vcpu);
kvm_arch_vcpu_load(vcpu, smp_processor_id());
preempt_enable();
return 1;
}
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2)
{
return kvm_inject_nested(vcpu, esr_el2, except_type_sync);
}
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu)
{
/*
* Do not inject an irq if the:
* - Current exception level is EL2, and
* - virtual HCR_EL2.TGE == 0
* - virtual HCR_EL2.IMO == 0
*
* See Table D1-17 "Physical interrupt target and masking when EL3 is
* not implemented and EL2 is implemented" in ARM DDI 0487C.a.
*/
if (vcpu_is_el2(vcpu) && !vcpu_el2_tge_is_set(vcpu) &&
!(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_IMO))
return 1;
/* esr_el2 value doesn't matter for exits due to irqs. */
return kvm_inject_nested(vcpu, 0, except_type_irq);
}

View File

@ -24,6 +24,7 @@
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_nested.h>
#include <asm/sigcontext.h>
#include "trace.h"
@ -253,6 +254,11 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (!vcpu_el1_is_32bit(vcpu))
return -EINVAL;
break;
case PSR_MODE_EL2h:
case PSR_MODE_EL2t:
if (!vcpu_has_nv(vcpu))
return -EINVAL;
fallthrough;
case PSR_MODE_EL0t:
case PSR_MODE_EL1t:
case PSR_MODE_EL1h:

View File

@ -16,6 +16,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
#include <asm/debug-monitors.h>
#include <asm/stacktrace/nvhe.h>
#include <asm/traps.h>
@ -41,6 +42,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
kvm_vcpu_hvc_get_imm(vcpu));
vcpu->stat.hvc_exit_stat++;
/* Forward hvc instructions to the virtual EL2 if the guest has EL2. */
if (vcpu_has_nv(vcpu)) {
if (vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_HCD)
kvm_inject_undefined(vcpu);
else
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return 1;
}
ret = kvm_hvc_call_handler(vcpu);
if (ret < 0) {
vcpu_set_reg(vcpu, 0, ~0UL);
@ -52,6 +63,8 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
static int handle_smc(struct kvm_vcpu *vcpu)
{
int ret;
/*
* "If an SMC instruction executed at Non-secure EL1 is
* trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
@ -59,10 +72,30 @@ static int handle_smc(struct kvm_vcpu *vcpu)
*
* We need to advance the PC after the trap, as it would
* otherwise return to the same address...
*
* Only handle SMCs from the virtual EL2 with an immediate of zero and
* skip it otherwise.
*/
vcpu_set_reg(vcpu, 0, ~0UL);
if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) {
vcpu_set_reg(vcpu, 0, ~0UL);
kvm_incr_pc(vcpu);
return 1;
}
/*
* If imm is zero then it is likely an SMCCC call.
*
* Note that on ARMv8.3, even if EL3 is not implemented, SMC executed
* at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than
* being treated as UNDEFINED.
*/
ret = kvm_hvc_call_handler(vcpu);
if (ret < 0)
vcpu_set_reg(vcpu, 0, ~0UL);
kvm_incr_pc(vcpu);
return 1;
return ret;
}
/*
@ -196,6 +229,15 @@ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
return 1;
}
static int kvm_handle_eret(struct kvm_vcpu *vcpu)
{
if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
return kvm_handle_ptrauth(vcpu);
kvm_emulate_nested_eret(vcpu);
return 1;
}
static exit_handle_fn arm_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
@ -211,6 +253,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_SMC64] = handle_smc,
[ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
[ESR_ELx_EC_SVE] = handle_sve,
[ESR_ELx_EC_ERET] = kvm_handle_eret,
[ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,

View File

@ -14,6 +14,7 @@
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
#if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__)
#error Hypervisor code only!
@ -23,7 +24,9 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
{
u64 val;
if (__vcpu_read_sys_reg_from_cpu(reg, &val))
if (unlikely(vcpu_has_nv(vcpu)))
return vcpu_read_sys_reg(vcpu, reg);
else if (__vcpu_read_sys_reg_from_cpu(reg, &val))
return val;
return __vcpu_sys_reg(vcpu, reg);
@ -31,18 +34,25 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
if (__vcpu_write_sys_reg_to_cpu(val, reg))
return;
__vcpu_sys_reg(vcpu, reg) = val;
if (unlikely(vcpu_has_nv(vcpu)))
vcpu_write_sys_reg(vcpu, val, reg);
else if (!__vcpu_write_sys_reg_to_cpu(val, reg))
__vcpu_sys_reg(vcpu, reg) = val;
}
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode,
u64 val)
{
if (has_vhe())
if (unlikely(vcpu_has_nv(vcpu))) {
if (target_mode == PSR_MODE_EL1h)
vcpu_write_sys_reg(vcpu, val, SPSR_EL1);
else
vcpu_write_sys_reg(vcpu, val, SPSR_EL2);
} else if (has_vhe()) {
write_sysreg_el1(val, SYS_SPSR);
else
} else {
__vcpu_sys_reg(vcpu, SPSR_EL1) = val;
}
}
static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
@ -101,6 +111,11 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
__vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
break;
case PSR_MODE_EL2h:
vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL2);
sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL2);
__vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL2);
break;
default:
/* Don't do that */
BUG();
@ -153,7 +168,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
new |= target_mode;
*vcpu_cpsr(vcpu) = new;
__vcpu_write_spsr(vcpu, old);
__vcpu_write_spsr(vcpu, target_mode, old);
}
/*
@ -323,11 +338,20 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
break;
case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC):
enter_exception64(vcpu, PSR_MODE_EL2h, except_type_sync);
break;
case unpack_vcpu_flag(EXCEPT_AA64_EL2_IRQ):
enter_exception64(vcpu, PSR_MODE_EL2h, except_type_irq);
break;
default:
/*
* Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ}
* will be implemented at some point. Everything
* else gets silently ignored.
* Only EL1_SYNC and EL2_{SYNC,IRQ} makes
* sense so far. Everything else gets silently
* ignored.
*/
break;
}

View File

@ -154,9 +154,26 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR);
}
/* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */
static inline u64 to_hw_pstate(const struct kvm_cpu_context *ctxt)
{
u64 mode = ctxt->regs.pstate & (PSR_MODE_MASK | PSR_MODE32_BIT);
switch (mode) {
case PSR_MODE_EL2t:
mode = PSR_MODE_EL1t;
break;
case PSR_MODE_EL2h:
mode = PSR_MODE_EL1h;
break;
}
return (ctxt->regs.pstate & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
}
static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
{
u64 pstate = ctxt->regs.pstate;
u64 pstate = to_hw_pstate(ctxt);
u64 mode = pstate & PSR_AA32_MODE_MASK;
/*

View File

@ -120,6 +120,25 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
/*
* If we were in HYP context on entry, adjust the PSTATE view
* so that the usual helpers work correctly.
*/
if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) {
u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
switch (mode) {
case PSR_MODE_EL1t:
mode = PSR_MODE_EL2t;
break;
case PSR_MODE_EL1h:
mode = PSR_MODE_EL2h;
break;
}
*vcpu_cpsr(vcpu) &= ~(PSR_MODE_MASK | PSR_MODE32_BIT);
*vcpu_cpsr(vcpu) |= mode;
}
}
/* Switch to the guest for VHE systems running in EL2 */
@ -154,6 +173,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
if (is_hyp_ctxt(vcpu))
vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT);
else
vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT);
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);

View File

@ -12,17 +12,55 @@
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_nested.h>
#include <asm/esr.h>
static void pend_sync_exception(struct kvm_vcpu *vcpu)
{
/* If not nesting, EL1 is the only possible exception target */
if (likely(!vcpu_has_nv(vcpu))) {
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
return;
}
/*
* With NV, we need to pick between EL1 and EL2. Note that we
* never deal with a nesting exception here, hence never
* changing context, and the exception itself can be delayed
* until the next entry.
*/
switch(*vcpu_cpsr(vcpu) & PSR_MODE_MASK) {
case PSR_MODE_EL2h:
case PSR_MODE_EL2t:
kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
break;
case PSR_MODE_EL1h:
case PSR_MODE_EL1t:
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
break;
case PSR_MODE_EL0t:
if (vcpu_el2_tge_is_set(vcpu))
kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
else
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
break;
default:
BUG();
}
}
static bool match_target_el(struct kvm_vcpu *vcpu, unsigned long target)
{
return (vcpu_get_flag(vcpu, EXCEPT_MASK) == target);
}
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u64 esr = 0;
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
pend_sync_exception(vcpu);
/*
* Build an {i,d}abort, depending on the level and the
@ -43,14 +81,22 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
if (!is_iabt)
esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1);
esr |= ESR_ELx_FSC_EXTABT;
if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) {
vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
} else {
vcpu_write_sys_reg(vcpu, addr, FAR_EL2);
vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
}
}
static void inject_undef64(struct kvm_vcpu *vcpu)
{
u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
pend_sync_exception(vcpu);
/*
* Build an unknown exception, depending on the instruction
@ -59,7 +105,10 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
if (kvm_vcpu_trap_il_is32bit(vcpu))
esr |= ESR_ELx_IL;
vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC)))
vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
else
vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
}
#define DFSR_FSC_EXTABT_LPAE 0x10

View File

@ -46,16 +46,17 @@ static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
* long will also starve other vCPUs. We have to also make sure that the page
* tables are not freed while we released the lock.
*/
static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
static int stage2_apply_range(struct kvm_s2_mmu *mmu, phys_addr_t addr,
phys_addr_t end,
int (*fn)(struct kvm_pgtable *, u64, u64),
bool resched)
{
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
int ret;
u64 next;
do {
struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
struct kvm_pgtable *pgt = mmu->pgt;
if (!pgt)
return -EINVAL;
@ -71,8 +72,8 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
return ret;
}
#define stage2_apply_range_resched(kvm, addr, end, fn) \
stage2_apply_range(kvm, addr, end, fn, true)
#define stage2_apply_range_resched(mmu, addr, end, fn) \
stage2_apply_range(mmu, addr, end, fn, true)
static bool memslot_is_logging(struct kvm_memory_slot *memslot)
{
@ -235,7 +236,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
lockdep_assert_held_write(&kvm->mmu_lock);
WARN_ON(size & ~PAGE_MASK);
WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap,
WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap,
may_block));
}
@ -250,7 +251,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_flush);
stage2_apply_range_resched(&kvm->arch.mmu, addr, end, kvm_pgtable_stage2_flush);
}
/**
@ -934,8 +935,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
*/
static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
{
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_wrprotect);
stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect);
}
/**

161
arch/arm64/kvm/nested.c Normal file
View File

@ -0,0 +1,161 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2017 - Columbia University and Linaro Ltd.
* Author: Jintack Lim <jintack.lim@linaro.org>
*/
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_nested.h>
#include <asm/sysreg.h>
#include "sys_regs.h"
/* Protection against the sysreg repainting madness... */
#define NV_FTR(r, f) ID_AA64##r##_EL1_##f
/*
* Our emulated CPU doesn't support all the possible features. For the
* sake of simplicity (and probably mental sanity), wipe out a number
* of feature bits we don't intend to support for the time being.
* This list should get updated as new features get added to the NV
* support, and new extension to the architecture.
*/
void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u32 id = reg_to_encoding(r);
u64 val, tmp;
val = p->regval;
switch (id) {
case SYS_ID_AA64ISAR0_EL1:
/* Support everything but TME, O.S. and Range TLBIs */
val &= ~(NV_FTR(ISAR0, TLB) |
NV_FTR(ISAR0, TME));
break;
case SYS_ID_AA64ISAR1_EL1:
/* Support everything but PtrAuth and Spec Invalidation */
val &= ~(GENMASK_ULL(63, 56) |
NV_FTR(ISAR1, SPECRES) |
NV_FTR(ISAR1, GPI) |
NV_FTR(ISAR1, GPA) |
NV_FTR(ISAR1, API) |
NV_FTR(ISAR1, APA));
break;
case SYS_ID_AA64PFR0_EL1:
/* No AMU, MPAM, S-EL2, RAS or SVE */
val &= ~(GENMASK_ULL(55, 52) |
NV_FTR(PFR0, AMU) |
NV_FTR(PFR0, MPAM) |
NV_FTR(PFR0, SEL2) |
NV_FTR(PFR0, RAS) |
NV_FTR(PFR0, SVE) |
NV_FTR(PFR0, EL3) |
NV_FTR(PFR0, EL2) |
NV_FTR(PFR0, EL1));
/* 64bit EL1/EL2/EL3 only */
val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
break;
case SYS_ID_AA64PFR1_EL1:
/* Only support SSBS */
val &= NV_FTR(PFR1, SSBS);
break;
case SYS_ID_AA64MMFR0_EL1:
/* Hide ECV, FGT, ExS, Secure Memory */
val &= ~(GENMASK_ULL(63, 43) |
NV_FTR(MMFR0, TGRAN4_2) |
NV_FTR(MMFR0, TGRAN16_2) |
NV_FTR(MMFR0, TGRAN64_2) |
NV_FTR(MMFR0, SNSMEM));
/* Disallow unsupported S2 page sizes */
switch (PAGE_SIZE) {
case SZ_64K:
val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001);
fallthrough;
case SZ_16K:
val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001);
fallthrough;
case SZ_4K:
/* Support everything */
break;
}
/*
* Since we can't support a guest S2 page size smaller than
* the host's own page size (due to KVM only populating its
* own S2 using the kernel's page size), advertise the
* limitation using FEAT_GTG.
*/
switch (PAGE_SIZE) {
case SZ_4K:
val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010);
fallthrough;
case SZ_16K:
val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010);
fallthrough;
case SZ_64K:
val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010);
break;
}
/* Cap PARange to 48bits */
tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val);
if (tmp > 0b0101) {
val &= ~NV_FTR(MMFR0, PARANGE);
val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101);
}
break;
case SYS_ID_AA64MMFR1_EL1:
val &= (NV_FTR(MMFR1, PAN) |
NV_FTR(MMFR1, LO) |
NV_FTR(MMFR1, HPDS) |
NV_FTR(MMFR1, VH) |
NV_FTR(MMFR1, VMIDBits));
break;
case SYS_ID_AA64MMFR2_EL1:
val &= ~(NV_FTR(MMFR2, EVT) |
NV_FTR(MMFR2, BBM) |
NV_FTR(MMFR2, TTL) |
GENMASK_ULL(47, 44) |
NV_FTR(MMFR2, ST) |
NV_FTR(MMFR2, CCIDX) |
NV_FTR(MMFR2, VARange));
/* Force TTL support */
val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
break;
case SYS_ID_AA64DFR0_EL1:
/* Only limited support for PMU, Debug, BPs and WPs */
val &= (NV_FTR(DFR0, PMUVer) |
NV_FTR(DFR0, WRPs) |
NV_FTR(DFR0, BRPs) |
NV_FTR(DFR0, DebugVer));
/* Cap Debug to ARMv8.1 */
tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
if (tmp > 0b0111) {
val &= ~NV_FTR(DFR0, DebugVer);
val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111);
}
break;
default:
/* Unknown register, just wipe it clean */
val = 0;
break;
}
p->regval = val;
}

View File

@ -27,6 +27,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
#include <asm/virt.h>
/* Maximum phys_shift supported for any VM on this host */
@ -38,6 +39,9 @@ static u32 __ro_after_init kvm_ipa_limit;
#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
PSR_F_BIT | PSR_D_BIT)
#define VCPU_RESET_PSTATE_EL2 (PSR_MODE_EL2h | PSR_A_BIT | PSR_I_BIT | \
PSR_F_BIT | PSR_D_BIT)
#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
PSR_AA32_I_BIT | PSR_AA32_F_BIT)
@ -221,6 +225,10 @@ static int kvm_set_vm_width(struct kvm_vcpu *vcpu)
if (kvm_has_mte(kvm) && is32bit)
return -EINVAL;
/* NV is incompatible with AArch32 */
if (vcpu_has_nv(vcpu) && is32bit)
return -EINVAL;
if (is32bit)
set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
@ -273,6 +281,12 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
if (loaded)
kvm_arch_vcpu_put(vcpu);
/* Disallow NV+SVE for the time being */
if (vcpu_has_nv(vcpu) && vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
ret = -EINVAL;
goto out;
}
if (!kvm_arm_vcpu_sve_finalized(vcpu)) {
if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
ret = kvm_vcpu_enable_sve(vcpu);
@ -295,6 +309,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
default:
if (vcpu_el1_is_32bit(vcpu)) {
pstate = VCPU_RESET_PSTATE_SVC;
} else if (vcpu_has_nv(vcpu)) {
pstate = VCPU_RESET_PSTATE_EL2;
} else {
pstate = VCPU_RESET_PSTATE_EL1;
}

View File

@ -25,6 +25,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
#include <asm/perf_event.h>
#include <asm/sysreg.h>
@ -175,6 +176,18 @@ static int set_ccsidr(struct kvm_vcpu *vcpu, u32 csselr, u32 val)
return 0;
}
static bool access_rw(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write)
vcpu_write_sys_reg(vcpu, p->regval, r->reg);
else
p->regval = vcpu_read_sys_reg(vcpu, r->reg);
return true;
}
/*
* See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
*/
@ -333,6 +346,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
static bool trap_undef(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
kvm_inject_undefined(vcpu);
return false;
}
/*
* ARMv8.1 mandates at least a trivial LORegion implementation, where all the
* RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
@ -443,12 +464,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
vcpu_write_sys_reg(vcpu, p->regval, r->reg);
access_rw(vcpu, p, r);
if (p->is_write)
vcpu_set_flag(vcpu, DEBUG_DIRTY);
} else {
p->regval = vcpu_read_sys_reg(vcpu, r->reg);
}
trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
@ -1286,6 +1304,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
return write_to_read_only(vcpu, p, r);
p->regval = read_id_reg(vcpu, r);
if (vcpu_has_nv(vcpu))
access_nested_id_reg(vcpu, p, r);
return true;
}
@ -1583,6 +1604,44 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
.visibility = mte_visibility, \
}
static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
if (vcpu_has_nv(vcpu))
return 0;
return REG_HIDDEN;
}
#define EL2_REG(name, acc, rst, v) { \
SYS_DESC(SYS_##name), \
.access = acc, \
.reset = rst, \
.reg = name, \
.visibility = el2_visibility, \
.val = v, \
}
/*
* EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when
* HCR_EL2.E2H==1, and only in the sysreg table for convenience of
* handling traps. Given that, they are always hidden from userspace.
*/
static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
return REG_HIDDEN_USER;
}
#define EL12_REG(name, acc, rst, v) { \
SYS_DESC(SYS_##name##_EL12), \
.access = acc, \
.reset = rst, \
.reg = name##_EL1, \
.val = v, \
.visibility = elx2_visibility, \
}
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define ID_SANITISED(name) { \
SYS_DESC(SYS_##name), \
@ -1627,6 +1686,42 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
.visibility = raz_visibility, \
}
static bool access_sp_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write)
__vcpu_sys_reg(vcpu, SP_EL1) = p->regval;
else
p->regval = __vcpu_sys_reg(vcpu, SP_EL1);
return true;
}
static bool access_elr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write)
vcpu_write_sys_reg(vcpu, p->regval, ELR_EL1);
else
p->regval = vcpu_read_sys_reg(vcpu, ELR_EL1);
return true;
}
static bool access_spsr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write)
__vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval;
else
p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1);
return true;
}
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@ -1783,6 +1878,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
PTRAUTH_KEY(APDB),
PTRAUTH_KEY(APGA),
{ SYS_DESC(SYS_SPSR_EL1), access_spsr},
{ SYS_DESC(SYS_ELR_EL1), access_elr},
{ SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
@ -1830,7 +1928,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_LORC_EL1), trap_loregion },
{ SYS_DESC(SYS_LORID_EL1), trap_loregion },
{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
{ SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
@ -2052,9 +2150,67 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper,
.reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 },
EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0),
EL2_REG(VMPIDR_EL2, access_rw, reset_unknown, 0),
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
EL2_REG(HCR_EL2, access_rw, reset_val, 0),
EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_EL2_DEFAULT ),
EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
EL2_REG(HACR_EL2, access_rw, reset_val, 0),
EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
EL2_REG(VTTBR_EL2, access_rw, reset_val, 0),
EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
EL2_REG(ELR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
{ SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 },
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
EL2_REG(ESR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 },
EL2_REG(FAR_EL2, access_rw, reset_val, 0),
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
EL2_REG(MAIR_EL2, access_rw, reset_val, 0),
EL2_REG(AMAIR_EL2, access_rw, reset_val, 0),
EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_RMR_EL2), trap_undef },
EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0),
EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
EL12_REG(SCTLR, access_vm_reg, reset_val, 0x00C50078),
EL12_REG(CPACR, access_rw, reset_val, 0),
EL12_REG(TTBR0, access_vm_reg, reset_unknown, 0),
EL12_REG(TTBR1, access_vm_reg, reset_unknown, 0),
EL12_REG(TCR, access_vm_reg, reset_val, 0),
{ SYS_DESC(SYS_SPSR_EL12), access_spsr},
{ SYS_DESC(SYS_ELR_EL12), access_elr},
EL12_REG(AFSR0, access_vm_reg, reset_unknown, 0),
EL12_REG(AFSR1, access_vm_reg, reset_unknown, 0),
EL12_REG(ESR, access_vm_reg, reset_unknown, 0),
EL12_REG(FAR, access_vm_reg, reset_unknown, 0),
EL12_REG(MAIR, access_vm_reg, reset_unknown, 0),
EL12_REG(AMAIR, access_vm_reg, reset_amair_el1, 0),
EL12_REG(VBAR, access_rw, reset_val, 0),
EL12_REG(CONTEXTIDR, access_vm_reg, reset_val, 0),
EL12_REG(CNTKCTL, access_rw, reset_val, 0),
EL2_REG(SP_EL2, NULL, reset_unknown, 0),
};
static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
@ -2976,7 +3132,7 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
int ret;
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
if (!r)
if (!r || sysreg_hidden_user(vcpu, r))
return -ENOENT;
if (r->get_user) {
@ -3020,7 +3176,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
return -EFAULT;
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
if (!r)
if (!r || sysreg_hidden_user(vcpu, r))
return -ENOENT;
if (sysreg_user_write_ignore(vcpu, r))
@ -3106,7 +3262,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
if (!(rd->reg || rd->get_user))
return 0;
if (sysreg_hidden(vcpu, rd))
if (sysreg_hidden_user(vcpu, rd))
return 0;
if (!copy_reg_to_user(rd, uind))

View File

@ -85,8 +85,9 @@ struct sys_reg_desc {
};
#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */
#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */
#define REG_USER_WI (1 << 2) /* WI from userspace only */
#define REG_HIDDEN_USER (1 << 1) /* hidden from userspace only */
#define REG_RAZ (1 << 2) /* RAZ from userspace and guest */
#define REG_USER_WI (1 << 3) /* WI from userspace only */
static __printf(2, 3)
inline void print_sys_reg_msg(const struct sys_reg_params *p,
@ -152,6 +153,15 @@ static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
return sysreg_visibility(vcpu, r) & REG_HIDDEN;
}
static inline bool sysreg_hidden_user(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (likely(!r->visibility))
return false;
return r->visibility(vcpu, r) & (REG_HIDDEN | REG_HIDDEN_USER);
}
static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{

View File

@ -2,6 +2,7 @@
#if !defined(_TRACE_ARM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_ARM_ARM64_KVM_H
#include <asm/kvm_emulate.h>
#include <kvm/arm_arch_timer.h>
#include <linux/tracepoint.h>
@ -301,6 +302,64 @@ TRACE_EVENT(kvm_timer_emulate,
__entry->timer_idx, __entry->should_fire)
);
TRACE_EVENT(kvm_nested_eret,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned long elr_el2,
unsigned long spsr_el2),
TP_ARGS(vcpu, elr_el2, spsr_el2),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(unsigned long, elr_el2)
__field(unsigned long, spsr_el2)
__field(unsigned long, target_mode)
__field(unsigned long, hcr_el2)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->elr_el2 = elr_el2;
__entry->spsr_el2 = spsr_el2;
__entry->target_mode = spsr_el2 & (PSR_MODE_MASK | PSR_MODE32_BIT);
__entry->hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
),
TP_printk("elr_el2: 0x%lx spsr_el2: 0x%08lx (M: %s) hcr_el2: %lx",
__entry->elr_el2, __entry->spsr_el2,
__print_symbolic(__entry->target_mode, kvm_mode_names),
__entry->hcr_el2)
);
TRACE_EVENT(kvm_inject_nested_exception,
TP_PROTO(struct kvm_vcpu *vcpu, u64 esr_el2, int type),
TP_ARGS(vcpu, esr_el2, type),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(unsigned long, esr_el2)
__field(int, type)
__field(unsigned long, spsr_el2)
__field(unsigned long, pc)
__field(unsigned long, source_mode)
__field(unsigned long, hcr_el2)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->esr_el2 = esr_el2;
__entry->type = type;
__entry->spsr_el2 = *vcpu_cpsr(vcpu);
__entry->pc = *vcpu_pc(vcpu);
__entry->source_mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
__entry->hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
),
TP_printk("%s: esr_el2 0x%lx elr_el2: 0x%lx spsr_el2: 0x%08lx (M: %s) hcr_el2: %lx",
__print_symbolic(__entry->type, kvm_exception_type_names),
__entry->esr_el2, __entry->pc, __entry->spsr_el2,
__print_symbolic(__entry->source_mode, kvm_mode_names),
__entry->hcr_el2)
);
#endif /* _TRACE_ARM_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH

View File

@ -31,6 +31,7 @@ HAS_GENERIC_AUTH_IMP_DEF
HAS_IRQ_PRIO_MASKING
HAS_LDAPR
HAS_LSE_ATOMICS
HAS_NESTED_VIRT
HAS_NO_FPSIMD
HAS_NO_HW_PREFETCH
HAS_PAN