mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-07 21:53:44 +00:00
KVM/arm64 updates for 6.2
- Enable the per-vcpu dirty-ring tracking mechanism, together with an option to keep the good old dirty log around for pages that are dirtied by something other than a vcpu. - Switch to the relaxed parallel fault handling, using RCU to delay page table reclaim and giving better performance under load. - Relax the MTE ABI, allowing a VMM to use the MAP_SHARED mapping option, which multi-process VMMs such as crosvm rely on. - Merge the pKVM shadow vcpu state tracking that allows the hypervisor to have its own view of a vcpu, keeping that state private. - Add support for the PMUv3p5 architecture revision, bringing support for 64bit counters on systems that support it, and fix the no-quite-compliant CHAIN-ed counter support for the machines that actually exist out there. - Fix a handful of minor issues around 52bit VA/PA support (64kB pages only) as a prefix of the oncoming support for 4kB and 16kB pages. - Add/Enable/Fix a bunch of selftests covering memslots, breakpoints, stage-2 faults and access tracking. You name it, we got it, we probably broke it. - Pick a small set of documentation and spelling fixes, because no good merge window would be complete without those. As a side effect, this tag also drags: - The 'kvmarm-fixes-6.1-3' tag as a dependency to the dirty-ring series - A shared branch with the arm64 tree that repaints all the system registers to match the ARM ARM's naming, and resulting in interesting conflicts -----BEGIN PGP SIGNATURE----- iQJDBAABCgAtFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAmOODb0PHG1hekBrZXJu ZWwub3JnAAoJECPQ0LrRPXpDztsQAInRnsgLl57/SpqhZzExNCllN6AT/bdeB3uz rnw3ScJOV174uNKp8lnPWoTvu2YUGiVtBp6tFHhDI8le7zHX438ZT8KE5mcs8p5i KfFKnb8SHV2DDpqkcy24c0Xl/6vsg1qkKrdfJb49yl5ZakRITDpynW/7tn6dXsxX wASeGFdCYeW4g2xMQzsCbtx6LgeQ8uomBmzRfPrOtZHYYxAn6+4Mj4595EC1sWxM AQnbp8tW3Vw46saEZAQvUEOGOW9q0Nls7G21YqQ52IA+ZVDK1LmAF2b1XY3edjkk pX8EsXOURfqdasBxfSfF3SgnUazoz9GHpSzp1cTVTktrPp40rrT7Ldtml0ktq69d 1malPj47KVMDsIq0kNJGnMxciXFgAHw+VaCQX+k4zhIatNwviMbSop2fEoxj22jc 4YGgGOxaGrnvmAJhreCIbr4CkZk5CJ8Zvmtfg+QM6npIp8BY8896nvORx/d4i6tT H4caadd8AAR56ANUyd3+KqF3x0WrkaU0PLHJLy1tKwOXJUUTjcpvIfahBAAeUlSR qEFrtb+EEMPgAwLfNOICcNkPZR/yyuYvM+FiUQNVy5cNiwFkpztpIctfOFaHySGF K07O2/a1F6xKL0OKRUg7hGKknF9ecmux4vHhiUMuIk9VOgNTWobHozBDorLKXMzC aWa6oGVC =iIPT -----END PGP SIGNATURE----- Merge tag 'kvmarm-6.2' of https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD KVM/arm64 updates for 6.2 - Enable the per-vcpu dirty-ring tracking mechanism, together with an option to keep the good old dirty log around for pages that are dirtied by something other than a vcpu. - Switch to the relaxed parallel fault handling, using RCU to delay page table reclaim and giving better performance under load. - Relax the MTE ABI, allowing a VMM to use the MAP_SHARED mapping option, which multi-process VMMs such as crosvm rely on. - Merge the pKVM shadow vcpu state tracking that allows the hypervisor to have its own view of a vcpu, keeping that state private. - Add support for the PMUv3p5 architecture revision, bringing support for 64bit counters on systems that support it, and fix the no-quite-compliant CHAIN-ed counter support for the machines that actually exist out there. - Fix a handful of minor issues around 52bit VA/PA support (64kB pages only) as a prefix of the oncoming support for 4kB and 16kB pages. - Add/Enable/Fix a bunch of selftests covering memslots, breakpoints, stage-2 faults and access tracking. You name it, we got it, we probably broke it. - Pick a small set of documentation and spelling fixes, because no good merge window would be complete without those. As a side effect, this tag also drags: - The 'kvmarm-fixes-6.1-3' tag as a dependency to the dirty-ring series - A shared branch with the arm64 tree that repaints all the system registers to match the ARM ARM's naming, and resulting in interesting conflicts
This commit is contained in:
commit
eb5618911a
@ -7418,8 +7418,9 @@ hibernation of the host; however the VMM needs to manually save/restore the
|
||||
tags as appropriate if the VM is migrated.
|
||||
|
||||
When this capability is enabled all memory in memslots must be mapped as
|
||||
not-shareable (no MAP_SHARED), attempts to create a memslot with a
|
||||
MAP_SHARED mmap will result in an -EINVAL return.
|
||||
``MAP_ANONYMOUS`` or with a RAM-based file mapping (``tmpfs``, ``memfd``),
|
||||
attempts to create a memslot with an invalid mmap will result in an
|
||||
-EINVAL return.
|
||||
|
||||
When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
|
||||
perform a bulk copy of tags to/from the guest.
|
||||
@ -7954,7 +7955,7 @@ regardless of what has actually been exposed through the CPUID leaf.
|
||||
8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
|
||||
----------------------------------------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Architectures: x86, arm64
|
||||
:Parameters: args[0] - size of the dirty log ring
|
||||
|
||||
KVM is capable of tracking dirty memory using ring buffers that are
|
||||
@ -8036,13 +8037,6 @@ flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one
|
||||
needs to kick the vcpu out of KVM_RUN using a signal. The resulting
|
||||
vmexit ensures that all dirty GFNs are flushed to the dirty rings.
|
||||
|
||||
NOTE: the capability KVM_CAP_DIRTY_LOG_RING and the corresponding
|
||||
ioctl KVM_RESET_DIRTY_RINGS are mutual exclusive to the existing ioctls
|
||||
KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG. After enabling
|
||||
KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual
|
||||
machine will switch to ring-buffer dirty page tracking and further
|
||||
KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail.
|
||||
|
||||
NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that
|
||||
should be exposed by weakly ordered architecture, in order to indicate
|
||||
the additional memory ordering requirements imposed on userspace when
|
||||
@ -8051,6 +8045,33 @@ Architecture with TSO-like ordering (such as x86) are allowed to
|
||||
expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL
|
||||
to userspace.
|
||||
|
||||
After enabling the dirty rings, the userspace needs to detect the
|
||||
capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the
|
||||
ring structures can be backed by per-slot bitmaps. With this capability
|
||||
advertised, it means the architecture can dirty guest pages without
|
||||
vcpu/ring context, so that some of the dirty information will still be
|
||||
maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP
|
||||
can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL
|
||||
hasn't been enabled, or any memslot has been existing.
|
||||
|
||||
Note that the bitmap here is only a backup of the ring structure. The
|
||||
use of the ring and bitmap combination is only beneficial if there is
|
||||
only a very small amount of memory that is dirtied out of vcpu/ring
|
||||
context. Otherwise, the stand-alone per-slot bitmap mechanism needs to
|
||||
be considered.
|
||||
|
||||
To collect dirty bits in the backup bitmap, userspace can use the same
|
||||
KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all
|
||||
the generation of the dirty bits is done in a single pass. Collecting
|
||||
the dirty bitmap should be the very last thing that the VMM does before
|
||||
considering the state as complete. VMM needs to ensure that the dirty
|
||||
state is final and avoid missing dirty pages from another ioctl ordered
|
||||
after the bitmap collection.
|
||||
|
||||
NOTE: One example of using the backup bitmap is saving arm64 vgic/its
|
||||
tables through KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} command on
|
||||
KVM device "kvm-arm-vgic-its" when dirty ring is enabled.
|
||||
|
||||
8.30 KVM_CAP_XEN_HVM
|
||||
--------------------
|
||||
|
||||
|
@ -23,21 +23,23 @@ the PV_TIME_FEATURES hypercall should be probed using the SMCCC 1.1
|
||||
ARCH_FEATURES mechanism before calling it.
|
||||
|
||||
PV_TIME_FEATURES
|
||||
============= ======== ==========
|
||||
|
||||
============= ======== =================================================
|
||||
Function ID: (uint32) 0xC5000020
|
||||
PV_call_id: (uint32) The function to query for support.
|
||||
Currently only PV_TIME_ST is supported.
|
||||
Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant
|
||||
PV-time feature is supported by the hypervisor.
|
||||
============= ======== ==========
|
||||
============= ======== =================================================
|
||||
|
||||
PV_TIME_ST
|
||||
============= ======== ==========
|
||||
|
||||
============= ======== ==============================================
|
||||
Function ID: (uint32) 0xC5000021
|
||||
Return value: (int64) IPA of the stolen time data structure for this
|
||||
VCPU. On failure:
|
||||
NOT_SUPPORTED (-1)
|
||||
============= ======== ==========
|
||||
============= ======== ==============================================
|
||||
|
||||
The IPA returned by PV_TIME_ST should be mapped by the guest as normal memory
|
||||
with inner and outer write back caching attributes, in the inner shareable
|
||||
@ -76,5 +78,5 @@ It is advisable that one or more 64k pages are set aside for the purpose of
|
||||
these structures and not used for other purposes, this enables the guest to map
|
||||
the region using 64k pages and avoids conflicting attributes with other memory.
|
||||
|
||||
For the user space interface see Documentation/virt/kvm/devices/vcpu.rst
|
||||
section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL".
|
||||
For the user space interface see
|
||||
:ref:`Documentation/virt/kvm/devices/vcpu.rst <kvm_arm_vcpu_pvtime_ctrl>`.
|
@ -52,7 +52,10 @@ KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
|
||||
KVM_DEV_ARM_ITS_SAVE_TABLES
|
||||
save the ITS table data into guest RAM, at the location provisioned
|
||||
by the guest in corresponding registers/table entries.
|
||||
by the guest in corresponding registers/table entries. Should userspace
|
||||
require a form of dirty tracking to identify which pages are modified
|
||||
by the saving process, it should use a bitmap even if using another
|
||||
mechanism to track the memory dirtied by the vCPUs.
|
||||
|
||||
The layout of the tables in guest memory defines an ABI. The entries
|
||||
are laid out in little endian format as described in the last paragraph.
|
||||
|
@ -171,6 +171,8 @@ configured values on other VCPUs. Userspace should configure the interrupt
|
||||
numbers on at least one VCPU after creating all VCPUs and before running any
|
||||
VCPUs.
|
||||
|
||||
.. _kvm_arm_vcpu_pvtime_ctrl:
|
||||
|
||||
3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
|
||||
==================================
|
||||
|
||||
|
@ -1965,6 +1965,7 @@ config ARM64_MTE
|
||||
depends on ARM64_PAN
|
||||
select ARCH_HAS_SUBPAGE_FAULTS
|
||||
select ARCH_USES_HIGH_VMA_FLAGS
|
||||
select ARCH_USES_PG_ARCH_X
|
||||
help
|
||||
Memory Tagging (part of the ARMv8.5 Extensions) provides
|
||||
architectural support for run-time, always-on detection of
|
||||
|
@ -135,7 +135,7 @@
|
||||
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
|
||||
* not known to exist and will break with this configuration.
|
||||
*
|
||||
* The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
|
||||
* The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu.
|
||||
*
|
||||
* Note that when using 4K pages, we concatenate two first level page tables
|
||||
* together. With 16K pages, we concatenate 16 first level page tables.
|
||||
@ -340,9 +340,13 @@
|
||||
* We have
|
||||
* PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12]
|
||||
* HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12]
|
||||
*
|
||||
* Always assume 52 bit PA since at this point, we don't know how many PA bits
|
||||
* the page table has been set up for. This should be safe since unused address
|
||||
* bits in PAR are res0.
|
||||
*/
|
||||
#define PAR_TO_HPFAR(par) \
|
||||
(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
|
||||
(((par) & GENMASK_ULL(52 - 1, 12)) >> 8)
|
||||
|
||||
#define ECN(x) { ESR_ELx_EC_##x, #x }
|
||||
|
||||
|
@ -76,6 +76,9 @@ enum __kvm_host_smccc_func {
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
|
||||
};
|
||||
|
||||
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
|
||||
@ -106,7 +109,7 @@ enum __kvm_host_smccc_func {
|
||||
#define per_cpu_ptr_nvhe_sym(sym, cpu) \
|
||||
({ \
|
||||
unsigned long base, off; \
|
||||
base = kvm_arm_hyp_percpu_base[cpu]; \
|
||||
base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \
|
||||
off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \
|
||||
(unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \
|
||||
base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \
|
||||
@ -211,7 +214,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
|
||||
#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init)
|
||||
#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector)
|
||||
|
||||
extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
|
||||
extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[];
|
||||
DECLARE_KVM_NVHE_SYM(__per_cpu_start);
|
||||
DECLARE_KVM_NVHE_SYM(__per_cpu_end);
|
||||
|
||||
|
@ -73,6 +73,63 @@ u32 __attribute_const__ kvm_target_cpu(void);
|
||||
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
|
||||
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
|
||||
struct kvm_hyp_memcache {
|
||||
phys_addr_t head;
|
||||
unsigned long nr_pages;
|
||||
};
|
||||
|
||||
static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
|
||||
phys_addr_t *p,
|
||||
phys_addr_t (*to_pa)(void *virt))
|
||||
{
|
||||
*p = mc->head;
|
||||
mc->head = to_pa(p);
|
||||
mc->nr_pages++;
|
||||
}
|
||||
|
||||
static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
|
||||
void *(*to_va)(phys_addr_t phys))
|
||||
{
|
||||
phys_addr_t *p = to_va(mc->head);
|
||||
|
||||
if (!mc->nr_pages)
|
||||
return NULL;
|
||||
|
||||
mc->head = *p;
|
||||
mc->nr_pages--;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc,
|
||||
unsigned long min_pages,
|
||||
void *(*alloc_fn)(void *arg),
|
||||
phys_addr_t (*to_pa)(void *virt),
|
||||
void *arg)
|
||||
{
|
||||
while (mc->nr_pages < min_pages) {
|
||||
phys_addr_t *p = alloc_fn(arg);
|
||||
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
push_hyp_memcache(mc, p, to_pa);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc,
|
||||
void (*free_fn)(void *virt, void *arg),
|
||||
void *(*to_va)(phys_addr_t phys),
|
||||
void *arg)
|
||||
{
|
||||
while (mc->nr_pages)
|
||||
free_fn(pop_hyp_memcache(mc, to_va), arg);
|
||||
}
|
||||
|
||||
void free_hyp_memcache(struct kvm_hyp_memcache *mc);
|
||||
int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages);
|
||||
|
||||
struct kvm_vmid {
|
||||
atomic64_t id;
|
||||
};
|
||||
@ -115,6 +172,13 @@ struct kvm_smccc_features {
|
||||
unsigned long vendor_hyp_bmap;
|
||||
};
|
||||
|
||||
typedef unsigned int pkvm_handle_t;
|
||||
|
||||
struct kvm_protected_vm {
|
||||
pkvm_handle_t handle;
|
||||
struct kvm_hyp_memcache teardown_mc;
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
struct kvm_s2_mmu mmu;
|
||||
|
||||
@ -163,9 +227,19 @@ struct kvm_arch {
|
||||
|
||||
u8 pfr0_csv2;
|
||||
u8 pfr0_csv3;
|
||||
struct {
|
||||
u8 imp:4;
|
||||
u8 unimp:4;
|
||||
} dfr0_pmuver;
|
||||
|
||||
/* Hypercall features firmware registers' descriptor */
|
||||
struct kvm_smccc_features smccc_feat;
|
||||
|
||||
/*
|
||||
* For an untrusted host VM, 'pkvm.handle' is used to lookup
|
||||
* the associated pKVM instance in the hypervisor.
|
||||
*/
|
||||
struct kvm_protected_vm pkvm;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_fault_info {
|
||||
@ -915,8 +989,6 @@ int kvm_set_ipa_limit(void);
|
||||
#define __KVM_HAVE_ARCH_VM_ALLOC
|
||||
struct kvm *kvm_arch_alloc_vm(void);
|
||||
|
||||
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
|
||||
|
||||
static inline bool kvm_vm_is_protected(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
|
@ -123,4 +123,7 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
|
||||
|
||||
extern unsigned long kvm_nvhe_sym(__icache_flags);
|
||||
extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
|
||||
|
||||
#endif /* __ARM64_KVM_HYP_H__ */
|
||||
|
@ -166,7 +166,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
|
||||
void free_hyp_pgds(void);
|
||||
|
||||
void stage2_unmap_vm(struct kvm *kvm);
|
||||
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
|
||||
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
|
||||
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
|
||||
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
||||
phys_addr_t pa, unsigned long size, bool writable);
|
||||
|
@ -42,6 +42,8 @@ typedef u64 kvm_pte_t;
|
||||
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
|
||||
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
|
||||
|
||||
#define KVM_PHYS_INVALID (-1ULL)
|
||||
|
||||
static inline bool kvm_pte_valid(kvm_pte_t pte)
|
||||
{
|
||||
return pte & KVM_PTE_VALID;
|
||||
@ -57,6 +59,18 @@ static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
|
||||
return pa;
|
||||
}
|
||||
|
||||
static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
|
||||
{
|
||||
kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
|
||||
|
||||
if (PAGE_SHIFT == 16) {
|
||||
pa &= GENMASK(51, 48);
|
||||
pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
|
||||
}
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline u64 kvm_granule_shift(u32 level)
|
||||
{
|
||||
/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
|
||||
@ -85,6 +99,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
|
||||
* allocation is physically contiguous.
|
||||
* @free_pages_exact: Free an exact number of memory pages previously
|
||||
* allocated by zalloc_pages_exact.
|
||||
* @free_removed_table: Free a removed paging structure by unlinking and
|
||||
* dropping references.
|
||||
* @get_page: Increment the refcount on a page.
|
||||
* @put_page: Decrement the refcount on a page. When the
|
||||
* refcount reaches 0 the page is automatically
|
||||
@ -103,6 +119,7 @@ struct kvm_pgtable_mm_ops {
|
||||
void* (*zalloc_page)(void *arg);
|
||||
void* (*zalloc_pages_exact)(size_t size);
|
||||
void (*free_pages_exact)(void *addr, size_t size);
|
||||
void (*free_removed_table)(void *addr, u32 level);
|
||||
void (*get_page)(void *addr);
|
||||
void (*put_page)(void *addr);
|
||||
int (*page_count)(void *addr);
|
||||
@ -161,6 +178,121 @@ enum kvm_pgtable_prot {
|
||||
typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
|
||||
enum kvm_pgtable_prot prot);
|
||||
|
||||
/**
|
||||
* enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
|
||||
* @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
|
||||
* entries.
|
||||
* @KVM_PGTABLE_WALK_TABLE_PRE: Visit table entries before their
|
||||
* children.
|
||||
* @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
|
||||
* children.
|
||||
* @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
|
||||
* with other software walkers.
|
||||
*/
|
||||
enum kvm_pgtable_walk_flags {
|
||||
KVM_PGTABLE_WALK_LEAF = BIT(0),
|
||||
KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
|
||||
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
|
||||
KVM_PGTABLE_WALK_SHARED = BIT(3),
|
||||
};
|
||||
|
||||
struct kvm_pgtable_visit_ctx {
|
||||
kvm_pte_t *ptep;
|
||||
kvm_pte_t old;
|
||||
void *arg;
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
u64 addr;
|
||||
u64 end;
|
||||
u32 level;
|
||||
enum kvm_pgtable_walk_flags flags;
|
||||
};
|
||||
|
||||
typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit);
|
||||
|
||||
static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
|
||||
{
|
||||
return ctx->flags & KVM_PGTABLE_WALK_SHARED;
|
||||
}
|
||||
|
||||
/**
|
||||
* struct kvm_pgtable_walker - Hook into a page-table walk.
|
||||
* @cb: Callback function to invoke during the walk.
|
||||
* @arg: Argument passed to the callback function.
|
||||
* @flags: Bitwise-OR of flags to identify the entry types on which to
|
||||
* invoke the callback function.
|
||||
*/
|
||||
struct kvm_pgtable_walker {
|
||||
const kvm_pgtable_visitor_fn_t cb;
|
||||
void * const arg;
|
||||
const enum kvm_pgtable_walk_flags flags;
|
||||
};
|
||||
|
||||
/*
|
||||
* RCU cannot be used in a non-kernel context such as the hyp. As such, page
|
||||
* table walkers used in hyp do not call into RCU and instead use other
|
||||
* synchronization mechanisms (such as a spinlock).
|
||||
*/
|
||||
#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
|
||||
|
||||
typedef kvm_pte_t *kvm_pteref_t;
|
||||
|
||||
static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
|
||||
kvm_pteref_t pteref)
|
||||
{
|
||||
return pteref;
|
||||
}
|
||||
|
||||
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
|
||||
{
|
||||
/*
|
||||
* Due to the lack of RCU (or a similar protection scheme), only
|
||||
* non-shared table walkers are allowed in the hypervisor.
|
||||
*/
|
||||
if (walker->flags & KVM_PGTABLE_WALK_SHARED)
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
|
||||
|
||||
static inline bool kvm_pgtable_walk_lock_held(void)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
typedef kvm_pte_t __rcu *kvm_pteref_t;
|
||||
|
||||
static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
|
||||
kvm_pteref_t pteref)
|
||||
{
|
||||
return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
|
||||
}
|
||||
|
||||
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
|
||||
{
|
||||
if (walker->flags & KVM_PGTABLE_WALK_SHARED)
|
||||
rcu_read_lock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
|
||||
{
|
||||
if (walker->flags & KVM_PGTABLE_WALK_SHARED)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static inline bool kvm_pgtable_walk_lock_held(void)
|
||||
{
|
||||
return rcu_read_lock_held();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* struct kvm_pgtable - KVM page-table.
|
||||
* @ia_bits: Maximum input address size, in bits.
|
||||
@ -175,7 +307,7 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
|
||||
struct kvm_pgtable {
|
||||
u32 ia_bits;
|
||||
u32 start_level;
|
||||
kvm_pte_t *pgd;
|
||||
kvm_pteref_t pgd;
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
|
||||
/* Stage-2 only */
|
||||
@ -184,39 +316,6 @@ struct kvm_pgtable {
|
||||
kvm_pgtable_force_pte_cb_t force_pte_cb;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
|
||||
* @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
|
||||
* entries.
|
||||
* @KVM_PGTABLE_WALK_TABLE_PRE: Visit table entries before their
|
||||
* children.
|
||||
* @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
|
||||
* children.
|
||||
*/
|
||||
enum kvm_pgtable_walk_flags {
|
||||
KVM_PGTABLE_WALK_LEAF = BIT(0),
|
||||
KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
|
||||
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
|
||||
};
|
||||
|
||||
typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg);
|
||||
|
||||
/**
|
||||
* struct kvm_pgtable_walker - Hook into a page-table walk.
|
||||
* @cb: Callback function to invoke during the walk.
|
||||
* @arg: Argument passed to the callback function.
|
||||
* @flags: Bitwise-OR of flags to identify the entry types on which to
|
||||
* invoke the callback function.
|
||||
*/
|
||||
struct kvm_pgtable_walker {
|
||||
const kvm_pgtable_visitor_fn_t cb;
|
||||
void * const arg;
|
||||
const enum kvm_pgtable_walk_flags flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
|
||||
* @pgt: Uninitialised page-table structure to initialise.
|
||||
@ -296,6 +395,14 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
||||
*/
|
||||
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
|
||||
* @vtcr: Content of the VTCR register.
|
||||
*
|
||||
* Return: the size (in bytes) of the stage-2 PGD
|
||||
*/
|
||||
size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
|
||||
|
||||
/**
|
||||
* __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
|
||||
* @pgt: Uninitialised page-table structure to initialise.
|
||||
@ -324,6 +431,17 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
*/
|
||||
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
|
||||
* @mm_ops: Memory management callbacks.
|
||||
* @pgtable: Unlinked stage-2 paging structure to be freed.
|
||||
* @level: Level of the stage-2 paging structure to be freed.
|
||||
*
|
||||
* The page-table is assumed to be unreachable by any hardware walkers prior to
|
||||
* freeing and therefore no TLB invalidation is performed.
|
||||
*/
|
||||
void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
@ -333,6 +451,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
* @prot: Permissions and attributes for the mapping.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
* @flags: Flags to control the page-table walk (ex. a shared walk)
|
||||
*
|
||||
* The offset of @addr within a page is ignored, @size is rounded-up to
|
||||
* the next page boundary and @phys is rounded-down to the previous page
|
||||
@ -354,7 +473,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
*/
|
||||
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
u64 phys, enum kvm_pgtable_prot prot,
|
||||
void *mc);
|
||||
void *mc, enum kvm_pgtable_walk_flags flags);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
|
||||
|
@ -9,11 +9,49 @@
|
||||
#include <linux/memblock.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
|
||||
/* Maximum number of VMs that can co-exist under pKVM. */
|
||||
#define KVM_MAX_PVMS 255
|
||||
|
||||
#define HYP_MEMBLOCK_REGIONS 128
|
||||
|
||||
int pkvm_init_host_vm(struct kvm *kvm);
|
||||
int pkvm_create_hyp_vm(struct kvm *kvm);
|
||||
void pkvm_destroy_hyp_vm(struct kvm *kvm);
|
||||
|
||||
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
|
||||
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
|
||||
|
||||
static inline unsigned long
|
||||
hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
|
||||
{
|
||||
unsigned long nr_pages = reg->size >> PAGE_SHIFT;
|
||||
unsigned long start, end;
|
||||
|
||||
start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
|
||||
end = start + nr_pages * vmemmap_entry_size;
|
||||
start = ALIGN_DOWN(start, PAGE_SIZE);
|
||||
end = ALIGN(end, PAGE_SIZE);
|
||||
|
||||
return end - start;
|
||||
}
|
||||
|
||||
static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
|
||||
{
|
||||
unsigned long res = 0, i;
|
||||
|
||||
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
|
||||
res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
|
||||
vmemmap_entry_size);
|
||||
}
|
||||
|
||||
return res >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline unsigned long hyp_vm_table_pages(void)
|
||||
{
|
||||
return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
|
||||
{
|
||||
unsigned long total = 0, i;
|
||||
|
@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from,
|
||||
unsigned long n);
|
||||
int mte_save_tags(struct page *page);
|
||||
void mte_save_page_tags(const void *page_addr, void *tag_storage);
|
||||
bool mte_restore_tags(swp_entry_t entry, struct page *page);
|
||||
void mte_restore_tags(swp_entry_t entry, struct page *page);
|
||||
void mte_restore_page_tags(void *page_addr, const void *tag_storage);
|
||||
void mte_invalidate_tags(int type, pgoff_t offset);
|
||||
void mte_invalidate_tags_area(int type);
|
||||
@ -36,6 +36,58 @@ void mte_free_tag_storage(char *storage);
|
||||
|
||||
/* track which pages have valid allocation tags */
|
||||
#define PG_mte_tagged PG_arch_2
|
||||
/* simple lock to avoid multiple threads tagging the same page */
|
||||
#define PG_mte_lock PG_arch_3
|
||||
|
||||
static inline void set_page_mte_tagged(struct page *page)
|
||||
{
|
||||
/*
|
||||
* Ensure that the tags written prior to this function are visible
|
||||
* before the page flags update.
|
||||
*/
|
||||
smp_wmb();
|
||||
set_bit(PG_mte_tagged, &page->flags);
|
||||
}
|
||||
|
||||
static inline bool page_mte_tagged(struct page *page)
|
||||
{
|
||||
bool ret = test_bit(PG_mte_tagged, &page->flags);
|
||||
|
||||
/*
|
||||
* If the page is tagged, ensure ordering with a likely subsequent
|
||||
* read of the tags.
|
||||
*/
|
||||
if (ret)
|
||||
smp_rmb();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock the page for tagging and return 'true' if the page can be tagged,
|
||||
* 'false' if already tagged. PG_mte_tagged is never cleared and therefore the
|
||||
* locking only happens once for page initialisation.
|
||||
*
|
||||
* The page MTE lock state:
|
||||
*
|
||||
* Locked: PG_mte_lock && !PG_mte_tagged
|
||||
* Unlocked: !PG_mte_lock || PG_mte_tagged
|
||||
*
|
||||
* Acquire semantics only if the page is tagged (returning 'false').
|
||||
*/
|
||||
static inline bool try_page_mte_tagging(struct page *page)
|
||||
{
|
||||
if (!test_and_set_bit(PG_mte_lock, &page->flags))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* The tags are either being initialised or may have been initialised
|
||||
* already. Check if the PG_mte_tagged flag has been set or wait
|
||||
* otherwise.
|
||||
*/
|
||||
smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void mte_zero_clear_page_tags(void *addr);
|
||||
void mte_sync_tags(pte_t old_pte, pte_t pte);
|
||||
@ -56,6 +108,17 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size);
|
||||
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
|
||||
#define PG_mte_tagged 0
|
||||
|
||||
static inline void set_page_mte_tagged(struct page *page)
|
||||
{
|
||||
}
|
||||
static inline bool page_mte_tagged(struct page *page)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool try_page_mte_tagging(struct page *page)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void mte_zero_clear_page_tags(void *addr)
|
||||
{
|
||||
}
|
||||
|
@ -1049,8 +1049,8 @@ static inline void arch_swap_invalidate_area(int type)
|
||||
#define __HAVE_ARCH_SWAP_RESTORE
|
||||
static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
|
||||
{
|
||||
if (system_supports_mte() && mte_restore_tags(entry, &folio->page))
|
||||
set_bit(PG_mte_tagged, &folio->flags);
|
||||
if (system_supports_mte())
|
||||
mte_restore_tags(entry, &folio->page);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_ARM64_MTE */
|
||||
|
@ -165,31 +165,6 @@
|
||||
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
|
||||
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
|
||||
|
||||
#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0)
|
||||
#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1)
|
||||
#define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4)
|
||||
#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2)
|
||||
#define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5)
|
||||
#define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3)
|
||||
#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4)
|
||||
#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5)
|
||||
#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6)
|
||||
#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7)
|
||||
#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6)
|
||||
#define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6)
|
||||
|
||||
#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0)
|
||||
#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1)
|
||||
#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2)
|
||||
#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3)
|
||||
#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4)
|
||||
#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5)
|
||||
#define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7)
|
||||
|
||||
#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0)
|
||||
#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1)
|
||||
#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2)
|
||||
|
||||
#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1)
|
||||
#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5)
|
||||
#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6)
|
||||
@ -692,112 +667,6 @@
|
||||
#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48
|
||||
#endif
|
||||
|
||||
#define ID_DFR0_PERFMON_SHIFT 24
|
||||
|
||||
#define ID_DFR0_PERFMON_8_0 0x3
|
||||
#define ID_DFR0_PERFMON_8_1 0x4
|
||||
#define ID_DFR0_PERFMON_8_4 0x5
|
||||
#define ID_DFR0_PERFMON_8_5 0x6
|
||||
|
||||
#define ID_ISAR4_SWP_FRAC_SHIFT 28
|
||||
#define ID_ISAR4_PSR_M_SHIFT 24
|
||||
#define ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT 20
|
||||
#define ID_ISAR4_BARRIER_SHIFT 16
|
||||
#define ID_ISAR4_SMC_SHIFT 12
|
||||
#define ID_ISAR4_WRITEBACK_SHIFT 8
|
||||
#define ID_ISAR4_WITHSHIFTS_SHIFT 4
|
||||
#define ID_ISAR4_UNPRIV_SHIFT 0
|
||||
|
||||
#define ID_DFR1_MTPMU_SHIFT 0
|
||||
|
||||
#define ID_ISAR0_DIVIDE_SHIFT 24
|
||||
#define ID_ISAR0_DEBUG_SHIFT 20
|
||||
#define ID_ISAR0_COPROC_SHIFT 16
|
||||
#define ID_ISAR0_CMPBRANCH_SHIFT 12
|
||||
#define ID_ISAR0_BITFIELD_SHIFT 8
|
||||
#define ID_ISAR0_BITCOUNT_SHIFT 4
|
||||
#define ID_ISAR0_SWAP_SHIFT 0
|
||||
|
||||
#define ID_ISAR5_RDM_SHIFT 24
|
||||
#define ID_ISAR5_CRC32_SHIFT 16
|
||||
#define ID_ISAR5_SHA2_SHIFT 12
|
||||
#define ID_ISAR5_SHA1_SHIFT 8
|
||||
#define ID_ISAR5_AES_SHIFT 4
|
||||
#define ID_ISAR5_SEVL_SHIFT 0
|
||||
|
||||
#define ID_ISAR6_I8MM_SHIFT 24
|
||||
#define ID_ISAR6_BF16_SHIFT 20
|
||||
#define ID_ISAR6_SPECRES_SHIFT 16
|
||||
#define ID_ISAR6_SB_SHIFT 12
|
||||
#define ID_ISAR6_FHM_SHIFT 8
|
||||
#define ID_ISAR6_DP_SHIFT 4
|
||||
#define ID_ISAR6_JSCVT_SHIFT 0
|
||||
|
||||
#define ID_MMFR0_INNERSHR_SHIFT 28
|
||||
#define ID_MMFR0_FCSE_SHIFT 24
|
||||
#define ID_MMFR0_AUXREG_SHIFT 20
|
||||
#define ID_MMFR0_TCM_SHIFT 16
|
||||
#define ID_MMFR0_SHARELVL_SHIFT 12
|
||||
#define ID_MMFR0_OUTERSHR_SHIFT 8
|
||||
#define ID_MMFR0_PMSA_SHIFT 4
|
||||
#define ID_MMFR0_VMSA_SHIFT 0
|
||||
|
||||
#define ID_MMFR4_EVT_SHIFT 28
|
||||
#define ID_MMFR4_CCIDX_SHIFT 24
|
||||
#define ID_MMFR4_LSM_SHIFT 20
|
||||
#define ID_MMFR4_HPDS_SHIFT 16
|
||||
#define ID_MMFR4_CNP_SHIFT 12
|
||||
#define ID_MMFR4_XNX_SHIFT 8
|
||||
#define ID_MMFR4_AC2_SHIFT 4
|
||||
#define ID_MMFR4_SPECSEI_SHIFT 0
|
||||
|
||||
#define ID_MMFR5_ETS_SHIFT 0
|
||||
|
||||
#define ID_PFR0_DIT_SHIFT 24
|
||||
#define ID_PFR0_CSV2_SHIFT 16
|
||||
#define ID_PFR0_STATE3_SHIFT 12
|
||||
#define ID_PFR0_STATE2_SHIFT 8
|
||||
#define ID_PFR0_STATE1_SHIFT 4
|
||||
#define ID_PFR0_STATE0_SHIFT 0
|
||||
|
||||
#define ID_DFR0_PERFMON_SHIFT 24
|
||||
#define ID_DFR0_MPROFDBG_SHIFT 20
|
||||
#define ID_DFR0_MMAPTRC_SHIFT 16
|
||||
#define ID_DFR0_COPTRC_SHIFT 12
|
||||
#define ID_DFR0_MMAPDBG_SHIFT 8
|
||||
#define ID_DFR0_COPSDBG_SHIFT 4
|
||||
#define ID_DFR0_COPDBG_SHIFT 0
|
||||
|
||||
#define ID_PFR2_SSBS_SHIFT 4
|
||||
#define ID_PFR2_CSV3_SHIFT 0
|
||||
|
||||
#define MVFR0_FPROUND_SHIFT 28
|
||||
#define MVFR0_FPSHVEC_SHIFT 24
|
||||
#define MVFR0_FPSQRT_SHIFT 20
|
||||
#define MVFR0_FPDIVIDE_SHIFT 16
|
||||
#define MVFR0_FPTRAP_SHIFT 12
|
||||
#define MVFR0_FPDP_SHIFT 8
|
||||
#define MVFR0_FPSP_SHIFT 4
|
||||
#define MVFR0_SIMD_SHIFT 0
|
||||
|
||||
#define MVFR1_SIMDFMAC_SHIFT 28
|
||||
#define MVFR1_FPHP_SHIFT 24
|
||||
#define MVFR1_SIMDHP_SHIFT 20
|
||||
#define MVFR1_SIMDSP_SHIFT 16
|
||||
#define MVFR1_SIMDINT_SHIFT 12
|
||||
#define MVFR1_SIMDLS_SHIFT 8
|
||||
#define MVFR1_FPDNAN_SHIFT 4
|
||||
#define MVFR1_FPFTZ_SHIFT 0
|
||||
|
||||
#define ID_PFR1_GIC_SHIFT 28
|
||||
#define ID_PFR1_VIRT_FRAC_SHIFT 24
|
||||
#define ID_PFR1_SEC_FRAC_SHIFT 20
|
||||
#define ID_PFR1_GENTIMER_SHIFT 16
|
||||
#define ID_PFR1_VIRTUALIZATION_SHIFT 12
|
||||
#define ID_PFR1_MPROGMOD_SHIFT 8
|
||||
#define ID_PFR1_SECURITY_SHIFT 4
|
||||
#define ID_PFR1_PROGMOD_SHIFT 0
|
||||
|
||||
#if defined(CONFIG_ARM64_4K_PAGES)
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
|
||||
@ -815,9 +684,6 @@
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT
|
||||
#endif
|
||||
|
||||
#define MVFR2_FPMISC_SHIFT 4
|
||||
#define MVFR2_SIMDMISC_SHIFT 0
|
||||
|
||||
#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */
|
||||
#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */
|
||||
|
||||
@ -851,10 +717,6 @@
|
||||
#define SYS_RGSR_EL1_SEED_SHIFT 8
|
||||
#define SYS_RGSR_EL1_SEED_MASK 0xffffUL
|
||||
|
||||
/* GMID_EL1 field definitions */
|
||||
#define GMID_EL1_BS_SHIFT 0
|
||||
#define GMID_EL1_BS_SIZE 4
|
||||
|
||||
/* TFSR{,E0}_EL1 bit definitions */
|
||||
#define SYS_TFSR_EL1_TF0_SHIFT 0
|
||||
#define SYS_TFSR_EL1_TF1_SHIFT 1
|
||||
|
@ -43,6 +43,7 @@
|
||||
#define __KVM_HAVE_VCPU_EVENTS
|
||||
|
||||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
#define KVM_DIRTY_LOG_PAGE_OFFSET 64
|
||||
|
||||
#define KVM_REG_SIZE(id) \
|
||||
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
|
||||
|
@ -402,14 +402,14 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
|
||||
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_INNERSHR_SHIFT, 4, 0xf),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_FCSE_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_AUXREG_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_TCM_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_SHARELVL_SHIFT, 4, 0),
|
||||
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_OUTERSHR_SHIFT, 4, 0xf),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_PMSA_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_VMSA_SHIFT, 4, 0),
|
||||
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_InnerShr_SHIFT, 4, 0xf),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_FCSE_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_AuxReg_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_TCM_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_ShareLvl_SHIFT, 4, 0),
|
||||
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_OuterShr_SHIFT, 4, 0xf),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_PMSA_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_VMSA_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
@ -429,32 +429,32 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_mvfr0[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPROUND_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSHVEC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSQRT_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPDIVIDE_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPTRAP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPDP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_SIMD_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPRound_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPShVec_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPSqrt_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPDivide_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPTrap_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPDP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPSP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_SIMDReg_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_mvfr1[] = {
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDFMAC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPHP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDHP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDSP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDINT_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDLS_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPDNAN_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPFTZ_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDFMAC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPHP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDHP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDSP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDInt_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDLS_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPDNaN_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPFtZ_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_mvfr2[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_FPMISC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_SIMDMISC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_EL1_FPMisc_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_EL1_SIMDMisc_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
@ -470,34 +470,34 @@ static const struct arm64_ftr_bits ftr_gmid[] = {
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_isar0[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_COPROC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_CMPBRANCH_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITFIELD_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITCOUNT_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_SWAP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Divide_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Debug_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Coproc_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_CmpBranch_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_BitField_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_BitCount_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Swap_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_isar5[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SEVL_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_RDM_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_CRC32_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SHA2_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SHA1_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_AES_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SEVL_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EVT_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CCIDX_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_LSM_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_HPDS_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CNP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_XNX_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_AC2_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_EVT_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_CCIDX_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_LSM_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_HPDS_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_CnP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_XNX_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_AC2_SHIFT, 4, 0),
|
||||
|
||||
/*
|
||||
* SpecSEI = 1 indicates that the PE might generate an SError on an
|
||||
@ -505,80 +505,80 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
|
||||
* SError might be generated than it will not be. Hence it has been
|
||||
* classified as FTR_HIGHER_SAFE.
|
||||
*/
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_MMFR4_SPECSEI_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_MMFR4_EL1_SpecSEI_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_isar4[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SWP_FRAC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_PSR_M_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_BARRIER_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SMC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WRITEBACK_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WITHSHIFTS_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_UNPRIV_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SWP_frac_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_PSR_M_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SynchPrim_frac_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Barrier_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SMC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Writeback_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_WithShifts_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Unpriv_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_mmfr5[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR5_ETS_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR5_EL1_ETS_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_isar6[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_I8MM_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_BF16_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SPECRES_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SB_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_FHM_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_DP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_JSCVT_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_I8MM_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_BF16_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SPECRES_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SB_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_FHM_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_DP_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_JSCVT_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_pfr0[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_DIT_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_CSV2_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE3_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE2_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE1_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE0_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_DIT_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_CSV2_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State3_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State2_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State1_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State0_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_pfr1[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GIC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRT_FRAC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SEC_FRAC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GENTIMER_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRTUALIZATION_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_MPROGMOD_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SECURITY_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_PROGMOD_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_GIC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Virt_frac_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Sec_frac_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_GenTimer_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Virtualization_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_MProgMod_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Security_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_ProgMod_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_pfr2[] = {
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_CSV3_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_SSBS_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_CSV3_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_dfr0[] = {
|
||||
/* [31:28] TraceFilt */
|
||||
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_PERFMON_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPDBG_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPSDBG_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPDBG_SHIFT, 4, 0),
|
||||
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_EL1_PerfMon_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MProfDbg_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MMapTrc_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopTrc_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MMapDbg_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopSDbg_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopDbg_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_dfr1[] = {
|
||||
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR1_MTPMU_SHIFT, 4, 0),
|
||||
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR1_EL1_MTPMU_SHIFT, 4, 0),
|
||||
ARM64_FTR_END,
|
||||
};
|
||||
|
||||
@ -1119,12 +1119,12 @@ static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info,
|
||||
* EL1-dependent register fields to avoid spurious sanity check fails.
|
||||
*/
|
||||
if (!id_aa64pfr0_32bit_el1(pfr0)) {
|
||||
relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_SMC_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRT_FRAC_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SEC_FRAC_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRTUALIZATION_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SECURITY_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_PROGMOD_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_EL1_SMC_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Virt_frac_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Sec_frac_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Virtualization_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Security_SHIFT);
|
||||
relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_ProgMod_SHIFT);
|
||||
}
|
||||
|
||||
taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
|
||||
@ -2074,8 +2074,10 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
|
||||
* Clear the tags in the zero page. This needs to be done via the
|
||||
* linear map which has the Tagged attribute.
|
||||
*/
|
||||
if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags))
|
||||
if (try_page_mte_tagging(ZERO_PAGE(0))) {
|
||||
mte_clear_page_tags(lm_alias(empty_zero_page));
|
||||
set_page_mte_tagged(ZERO_PAGE(0));
|
||||
}
|
||||
|
||||
kasan_init_hw_tags_cpu();
|
||||
}
|
||||
@ -2829,24 +2831,24 @@ static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope)
|
||||
else
|
||||
mvfr1 = read_sysreg_s(SYS_MVFR1_EL1);
|
||||
|
||||
return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) &&
|
||||
cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) &&
|
||||
cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT);
|
||||
return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDSP_SHIFT) &&
|
||||
cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDInt_SHIFT) &&
|
||||
cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDLS_SHIFT);
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
|
||||
#ifdef CONFIG_COMPAT
|
||||
HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON),
|
||||
HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
|
||||
HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_EL1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
|
||||
/* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */
|
||||
HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
|
||||
HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
|
||||
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
|
||||
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
|
||||
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
|
||||
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
|
||||
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
|
||||
HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_EL1_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
|
||||
HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_EL1_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
|
||||
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
|
||||
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
|
||||
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
|
||||
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
|
||||
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_EL1_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
|
||||
#endif
|
||||
{},
|
||||
};
|
||||
|
@ -47,7 +47,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
|
||||
* Pages mapped in user space as !pte_access_permitted() (e.g.
|
||||
* PROT_EXEC only) may not have the PG_mte_tagged flag set.
|
||||
*/
|
||||
if (!test_bit(PG_mte_tagged, &page->flags)) {
|
||||
if (!page_mte_tagged(page)) {
|
||||
put_page(page);
|
||||
dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
|
||||
continue;
|
||||
|
@ -271,7 +271,7 @@ static int swsusp_mte_save_tags(void)
|
||||
if (!page)
|
||||
continue;
|
||||
|
||||
if (!test_bit(PG_mte_tagged, &page->flags))
|
||||
if (!page_mte_tagged(page))
|
||||
continue;
|
||||
|
||||
ret = save_tags(page, pfn);
|
||||
|
@ -71,12 +71,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
|
||||
/* Vectors installed by hyp-init on reset HVC. */
|
||||
KVM_NVHE_ALIAS(__hyp_stub_vectors);
|
||||
|
||||
/* Kernel symbol used by icache_is_vpipt(). */
|
||||
KVM_NVHE_ALIAS(__icache_flags);
|
||||
|
||||
/* VMID bits set by the KVM VMID allocator */
|
||||
KVM_NVHE_ALIAS(kvm_arm_vmid_bits);
|
||||
|
||||
/* Static keys which are set if a vGIC trap should be handled in hyp. */
|
||||
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
|
||||
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
|
||||
@ -92,9 +86,6 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities);
|
||||
KVM_NVHE_ALIAS(__start___kvm_ex_table);
|
||||
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
|
||||
|
||||
/* Array containing bases of nVHE per-CPU memory regions. */
|
||||
KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
|
||||
|
||||
/* PMU available static key */
|
||||
#ifdef CONFIG_HW_PERF_EVENTS
|
||||
KVM_NVHE_ALIAS(kvm_arm_pmu_available);
|
||||
@ -111,12 +102,6 @@ KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy);
|
||||
KVM_NVHE_ALIAS_HYP(__memset, __pi_memset);
|
||||
#endif
|
||||
|
||||
/* Kernel memory sections */
|
||||
KVM_NVHE_ALIAS(__start_rodata);
|
||||
KVM_NVHE_ALIAS(__end_rodata);
|
||||
KVM_NVHE_ALIAS(__bss_start);
|
||||
KVM_NVHE_ALIAS(__bss_stop);
|
||||
|
||||
/* Hyp memory sections */
|
||||
KVM_NVHE_ALIAS(__hyp_idmap_text_start);
|
||||
KVM_NVHE_ALIAS(__hyp_idmap_text_end);
|
||||
|
@ -41,19 +41,17 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
|
||||
if (check_swap && is_swap_pte(old_pte)) {
|
||||
swp_entry_t entry = pte_to_swp_entry(old_pte);
|
||||
|
||||
if (!non_swap_entry(entry) && mte_restore_tags(entry, page))
|
||||
return;
|
||||
if (!non_swap_entry(entry))
|
||||
mte_restore_tags(entry, page);
|
||||
}
|
||||
|
||||
if (!pte_is_tagged)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Test PG_mte_tagged again in case it was racing with another
|
||||
* set_pte_at().
|
||||
*/
|
||||
if (!test_and_set_bit(PG_mte_tagged, &page->flags))
|
||||
if (try_page_mte_tagging(page)) {
|
||||
mte_clear_page_tags(page_address(page));
|
||||
set_page_mte_tagged(page);
|
||||
}
|
||||
}
|
||||
|
||||
void mte_sync_tags(pte_t old_pte, pte_t pte)
|
||||
@ -69,9 +67,11 @@ void mte_sync_tags(pte_t old_pte, pte_t pte)
|
||||
|
||||
/* if PG_mte_tagged is set, tags have already been initialised */
|
||||
for (i = 0; i < nr_pages; i++, page++) {
|
||||
if (!test_bit(PG_mte_tagged, &page->flags))
|
||||
if (!page_mte_tagged(page)) {
|
||||
mte_sync_page_tags(page, old_pte, check_swap,
|
||||
pte_is_tagged);
|
||||
set_page_mte_tagged(page);
|
||||
}
|
||||
}
|
||||
|
||||
/* ensure the tags are visible before the PTE is set */
|
||||
@ -96,8 +96,7 @@ int memcmp_pages(struct page *page1, struct page *page2)
|
||||
* pages is tagged, set_pte_at() may zero or change the tags of the
|
||||
* other page via mte_sync_tags().
|
||||
*/
|
||||
if (test_bit(PG_mte_tagged, &page1->flags) ||
|
||||
test_bit(PG_mte_tagged, &page2->flags))
|
||||
if (page_mte_tagged(page1) || page_mte_tagged(page2))
|
||||
return addr1 != addr2;
|
||||
|
||||
return ret;
|
||||
@ -454,7 +453,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
|
||||
put_page(page);
|
||||
break;
|
||||
}
|
||||
WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags));
|
||||
WARN_ON_ONCE(!page_mte_tagged(page));
|
||||
|
||||
/* limit access to the end of the page */
|
||||
offset = offset_in_page(addr);
|
||||
|
@ -32,6 +32,8 @@ menuconfig KVM
|
||||
select KVM_VFIO
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_IRQFD
|
||||
select HAVE_KVM_DIRTY_RING_ACQ_REL
|
||||
select NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
select HAVE_KVM_MSI
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQ_ROUTING
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
@ -50,7 +51,6 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
|
||||
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
|
||||
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
|
||||
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
|
||||
static bool vgic_present;
|
||||
@ -138,24 +138,24 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = kvm_arm_setup_stage2(kvm, type);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = kvm_share_hyp(kvm, kvm + 1);
|
||||
if (ret)
|
||||
goto out_free_stage2_pgd;
|
||||
return ret;
|
||||
|
||||
ret = pkvm_init_host_vm(kvm);
|
||||
if (ret)
|
||||
goto err_unshare_kvm;
|
||||
|
||||
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_stage2_pgd;
|
||||
goto err_unshare_kvm;
|
||||
}
|
||||
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
|
||||
|
||||
ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type);
|
||||
if (ret)
|
||||
goto err_free_cpumask;
|
||||
|
||||
kvm_vgic_early_init(kvm);
|
||||
|
||||
/* The maximum number of VCPUs is limited by the host's GIC model */
|
||||
@ -164,9 +164,18 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
set_default_spectre(kvm);
|
||||
kvm_arm_init_hypercalls(kvm);
|
||||
|
||||
return ret;
|
||||
out_free_stage2_pgd:
|
||||
kvm_free_stage2_pgd(&kvm->arch.mmu);
|
||||
/*
|
||||
* Initialise the default PMUver before there is a chance to
|
||||
* create an actual PMU.
|
||||
*/
|
||||
kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit();
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_cpumask:
|
||||
free_cpumask_var(kvm->arch.supported_cpus);
|
||||
err_unshare_kvm:
|
||||
kvm_unshare_hyp(kvm, kvm + 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -187,6 +196,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
|
||||
kvm_vgic_destroy(kvm);
|
||||
|
||||
if (is_protected_kvm_enabled())
|
||||
pkvm_destroy_hyp_vm(kvm);
|
||||
|
||||
kvm_destroy_vcpus(kvm);
|
||||
|
||||
kvm_unshare_hyp(kvm, kvm + 1);
|
||||
@ -569,6 +581,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (is_protected_kvm_enabled()) {
|
||||
ret = pkvm_create_hyp_vm(kvm);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!irqchip_in_kernel(kvm)) {
|
||||
/*
|
||||
* Tell the rest of the code that there are userspace irqchip
|
||||
@ -746,6 +764,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
|
||||
return kvm_vcpu_suspend(vcpu);
|
||||
|
||||
if (kvm_dirty_ring_check_request(vcpu))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
@ -1518,7 +1539,7 @@ static int kvm_init_vector_slots(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cpu_prepare_hyp_mode(int cpu)
|
||||
static void cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
|
||||
{
|
||||
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
|
||||
unsigned long tcr;
|
||||
@ -1534,23 +1555,9 @@ static void cpu_prepare_hyp_mode(int cpu)
|
||||
|
||||
params->mair_el2 = read_sysreg(mair_el1);
|
||||
|
||||
/*
|
||||
* The ID map may be configured to use an extended virtual address
|
||||
* range. This is only the case if system RAM is out of range for the
|
||||
* currently configured page size and VA_BITS, in which case we will
|
||||
* also need the extended virtual range for the HYP ID map, or we won't
|
||||
* be able to enable the EL2 MMU.
|
||||
*
|
||||
* However, at EL2, there is only one TTBR register, and we can't switch
|
||||
* between translation tables *and* update TCR_EL2.T0SZ at the same
|
||||
* time. Bottom line: we need to use the extended range with *both* our
|
||||
* translation tables.
|
||||
*
|
||||
* So use the same T0SZ value we use for the ID map.
|
||||
*/
|
||||
tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1;
|
||||
tcr &= ~TCR_T0SZ_MASK;
|
||||
tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
|
||||
tcr |= TCR_T0SZ(hyp_va_bits);
|
||||
params->tcr_el2 = tcr;
|
||||
|
||||
params->pgd_pa = kvm_mmu_get_httbr();
|
||||
@ -1844,13 +1851,13 @@ static void teardown_hyp_mode(void)
|
||||
free_hyp_pgds();
|
||||
for_each_possible_cpu(cpu) {
|
||||
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
|
||||
free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order());
|
||||
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
|
||||
}
|
||||
}
|
||||
|
||||
static int do_pkvm_init(u32 hyp_va_bits)
|
||||
{
|
||||
void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base);
|
||||
void *per_cpu_base = kvm_ksym_ref(kvm_nvhe_sym(kvm_arm_hyp_percpu_base));
|
||||
int ret;
|
||||
|
||||
preempt_disable();
|
||||
@ -1870,11 +1877,8 @@ static int do_pkvm_init(u32 hyp_va_bits)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_hyp_init_protection(u32 hyp_va_bits)
|
||||
static void kvm_hyp_init_symbols(void)
|
||||
{
|
||||
void *addr = phys_to_virt(hyp_mem_base);
|
||||
int ret;
|
||||
|
||||
kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
|
||||
kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
|
||||
kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
|
||||
@ -1883,6 +1887,14 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
|
||||
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
|
||||
kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
|
||||
kvm_nvhe_sym(__icache_flags) = __icache_flags;
|
||||
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
|
||||
}
|
||||
|
||||
static int kvm_hyp_init_protection(u32 hyp_va_bits)
|
||||
{
|
||||
void *addr = phys_to_virt(hyp_mem_base);
|
||||
int ret;
|
||||
|
||||
ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
|
||||
if (ret)
|
||||
@ -1950,7 +1962,7 @@ static int init_hyp_mode(void)
|
||||
|
||||
page_addr = page_address(page);
|
||||
memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size());
|
||||
kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr;
|
||||
kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2043,7 +2055,7 @@ static int init_hyp_mode(void)
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
|
||||
char *percpu_begin = (char *)kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu];
|
||||
char *percpu_end = percpu_begin + nvhe_percpu_size();
|
||||
|
||||
/* Map Hyp percpu pages */
|
||||
@ -2054,9 +2066,11 @@ static int init_hyp_mode(void)
|
||||
}
|
||||
|
||||
/* Prepare the CPU initialization parameters */
|
||||
cpu_prepare_hyp_mode(cpu);
|
||||
cpu_prepare_hyp_mode(cpu, hyp_va_bits);
|
||||
}
|
||||
|
||||
kvm_hyp_init_symbols();
|
||||
|
||||
if (is_protected_kvm_enabled()) {
|
||||
init_cpu_logical_map();
|
||||
|
||||
@ -2064,9 +2078,7 @@ static int init_hyp_mode(void)
|
||||
err = -ENODEV;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_protected_kvm_enabled()) {
|
||||
err = kvm_hyp_init_protection(hyp_va_bits);
|
||||
if (err) {
|
||||
kvm_err("Failed to init hyp memory protection\n");
|
||||
|
@ -1059,7 +1059,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
|
||||
maddr = page_address(page);
|
||||
|
||||
if (!write) {
|
||||
if (test_bit(PG_mte_tagged, &page->flags))
|
||||
if (page_mte_tagged(page))
|
||||
num_tags = mte_copy_tags_to_user(tags, maddr,
|
||||
MTE_GRANULES_PER_PAGE);
|
||||
else
|
||||
@ -1068,15 +1068,19 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
|
||||
clear_user(tags, MTE_GRANULES_PER_PAGE);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
} else {
|
||||
/*
|
||||
* Only locking to serialise with a concurrent
|
||||
* set_pte_at() in the VMM but still overriding the
|
||||
* tags, hence ignoring the return value.
|
||||
*/
|
||||
try_page_mte_tagging(page);
|
||||
num_tags = mte_copy_tags_from_user(maddr, tags,
|
||||
MTE_GRANULES_PER_PAGE);
|
||||
|
||||
/*
|
||||
* Set the flag after checking the write
|
||||
* completed fully
|
||||
*/
|
||||
if (num_tags == MTE_GRANULES_PER_PAGE)
|
||||
set_bit(PG_mte_tagged, &page->flags);
|
||||
/* uaccess failed, don't leave stale tags */
|
||||
if (num_tags != MTE_GRANULES_PER_PAGE)
|
||||
mte_clear_page_tags(page);
|
||||
set_page_mte_tagged(page);
|
||||
|
||||
kvm_release_pfn_dirty(pfn);
|
||||
}
|
||||
|
@ -2,9 +2,12 @@
|
||||
|
||||
#include <linux/kbuild.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/pkvm.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
|
||||
DEFINE(PKVM_HYP_VM_SIZE, sizeof(struct pkvm_hyp_vm));
|
||||
DEFINE(PKVM_HYP_VCPU_SIZE, sizeof(struct pkvm_hyp_vcpu));
|
||||
return 0;
|
||||
}
|
||||
|
@ -8,8 +8,10 @@
|
||||
#define __KVM_NVHE_MEM_PROTECT__
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/virt.h>
|
||||
#include <nvhe/pkvm.h>
|
||||
#include <nvhe/spinlock.h>
|
||||
|
||||
/*
|
||||
@ -43,30 +45,45 @@ static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
|
||||
return prot & PKVM_PAGE_STATE_PROT_MASK;
|
||||
}
|
||||
|
||||
struct host_kvm {
|
||||
struct host_mmu {
|
||||
struct kvm_arch arch;
|
||||
struct kvm_pgtable pgt;
|
||||
struct kvm_pgtable_mm_ops mm_ops;
|
||||
hyp_spinlock_t lock;
|
||||
};
|
||||
extern struct host_kvm host_kvm;
|
||||
extern struct host_mmu host_mmu;
|
||||
|
||||
extern const u8 pkvm_hyp_id;
|
||||
/* This corresponds to page-table locking order */
|
||||
enum pkvm_component_id {
|
||||
PKVM_ID_HOST,
|
||||
PKVM_ID_HYP,
|
||||
};
|
||||
|
||||
extern unsigned long hyp_nr_cpus;
|
||||
|
||||
int __pkvm_prot_finalize(void);
|
||||
int __pkvm_host_share_hyp(u64 pfn);
|
||||
int __pkvm_host_unshare_hyp(u64 pfn);
|
||||
int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
|
||||
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
|
||||
|
||||
bool addr_is_memory(phys_addr_t phys);
|
||||
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
|
||||
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
|
||||
int kvm_host_prepare_stage2(void *pgt_pool_base);
|
||||
int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd);
|
||||
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
|
||||
|
||||
int hyp_pin_shared_mem(void *from, void *to);
|
||||
void hyp_unpin_shared_mem(void *from, void *to);
|
||||
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc);
|
||||
int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
|
||||
struct kvm_hyp_memcache *host_mc);
|
||||
|
||||
static __always_inline void __load_host_stage2(void)
|
||||
{
|
||||
if (static_branch_likely(&kvm_protected_mode_initialized))
|
||||
__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
|
||||
__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
|
||||
else
|
||||
write_sysreg(0, vttbr_el2);
|
||||
}
|
||||
|
@ -38,6 +38,10 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr)
|
||||
#define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page))
|
||||
#define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool)
|
||||
|
||||
/*
|
||||
* Refcounting for 'struct hyp_page'.
|
||||
* hyp_pool::lock must be held if atomic access to the refcount is required.
|
||||
*/
|
||||
static inline int hyp_page_count(void *addr)
|
||||
{
|
||||
struct hyp_page *p = hyp_virt_to_page(addr);
|
||||
@ -45,4 +49,27 @@ static inline int hyp_page_count(void *addr)
|
||||
return p->refcount;
|
||||
}
|
||||
|
||||
static inline void hyp_page_ref_inc(struct hyp_page *p)
|
||||
{
|
||||
BUG_ON(p->refcount == USHRT_MAX);
|
||||
p->refcount++;
|
||||
}
|
||||
|
||||
static inline void hyp_page_ref_dec(struct hyp_page *p)
|
||||
{
|
||||
BUG_ON(!p->refcount);
|
||||
p->refcount--;
|
||||
}
|
||||
|
||||
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
|
||||
{
|
||||
hyp_page_ref_dec(p);
|
||||
return (p->refcount == 0);
|
||||
}
|
||||
|
||||
static inline void hyp_set_page_refcounted(struct hyp_page *p)
|
||||
{
|
||||
BUG_ON(p->refcount);
|
||||
p->refcount = 1;
|
||||
}
|
||||
#endif /* __KVM_HYP_MEMORY_H */
|
||||
|
@ -13,9 +13,13 @@
|
||||
extern struct kvm_pgtable pkvm_pgtable;
|
||||
extern hyp_spinlock_t pkvm_pgd_lock;
|
||||
|
||||
int hyp_create_pcpu_fixmap(void);
|
||||
void *hyp_fixmap_map(phys_addr_t phys);
|
||||
void hyp_fixmap_unmap(void);
|
||||
|
||||
int hyp_create_idmap(u32 hyp_va_bits);
|
||||
int hyp_map_vectors(void);
|
||||
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
|
||||
int hyp_back_vmemmap(phys_addr_t back);
|
||||
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
|
||||
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
|
||||
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
|
||||
@ -24,16 +28,4 @@ int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
|
||||
unsigned long *haddr);
|
||||
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
|
||||
|
||||
static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
|
||||
unsigned long *start, unsigned long *end)
|
||||
{
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
struct hyp_page *p = hyp_phys_to_page(phys);
|
||||
|
||||
*start = (unsigned long)p;
|
||||
*end = *start + nr_pages * sizeof(struct hyp_page);
|
||||
*start = ALIGN_DOWN(*start, PAGE_SIZE);
|
||||
*end = ALIGN(*end, PAGE_SIZE);
|
||||
}
|
||||
|
||||
#endif /* __KVM_HYP_MM_H */
|
||||
|
68
arch/arm64/kvm/hyp/include/nvhe/pkvm.h
Normal file
68
arch/arm64/kvm/hyp/include/nvhe/pkvm.h
Normal file
@ -0,0 +1,68 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2021 Google LLC
|
||||
* Author: Fuad Tabba <tabba@google.com>
|
||||
*/
|
||||
|
||||
#ifndef __ARM64_KVM_NVHE_PKVM_H__
|
||||
#define __ARM64_KVM_NVHE_PKVM_H__
|
||||
|
||||
#include <asm/kvm_pkvm.h>
|
||||
|
||||
#include <nvhe/gfp.h>
|
||||
#include <nvhe/spinlock.h>
|
||||
|
||||
/*
|
||||
* Holds the relevant data for maintaining the vcpu state completely at hyp.
|
||||
*/
|
||||
struct pkvm_hyp_vcpu {
|
||||
struct kvm_vcpu vcpu;
|
||||
|
||||
/* Backpointer to the host's (untrusted) vCPU instance. */
|
||||
struct kvm_vcpu *host_vcpu;
|
||||
};
|
||||
|
||||
/*
|
||||
* Holds the relevant data for running a protected vm.
|
||||
*/
|
||||
struct pkvm_hyp_vm {
|
||||
struct kvm kvm;
|
||||
|
||||
/* Backpointer to the host's (untrusted) KVM instance. */
|
||||
struct kvm *host_kvm;
|
||||
|
||||
/* The guest's stage-2 page-table managed by the hypervisor. */
|
||||
struct kvm_pgtable pgt;
|
||||
struct kvm_pgtable_mm_ops mm_ops;
|
||||
struct hyp_pool pool;
|
||||
hyp_spinlock_t lock;
|
||||
|
||||
/*
|
||||
* The number of vcpus initialized and ready to run.
|
||||
* Modifying this is protected by 'vm_table_lock'.
|
||||
*/
|
||||
unsigned int nr_vcpus;
|
||||
|
||||
/* Array of the hyp vCPU structures for this VM. */
|
||||
struct pkvm_hyp_vcpu *vcpus[];
|
||||
};
|
||||
|
||||
static inline struct pkvm_hyp_vm *
|
||||
pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm);
|
||||
}
|
||||
|
||||
void pkvm_hyp_vm_table_init(void *tbl);
|
||||
|
||||
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
|
||||
unsigned long pgd_hva);
|
||||
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
|
||||
unsigned long vcpu_hva);
|
||||
int __pkvm_teardown_vm(pkvm_handle_t handle);
|
||||
|
||||
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
|
||||
unsigned int vcpu_idx);
|
||||
void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
|
||||
|
||||
#endif /* __ARM64_KVM_NVHE_PKVM_H__ */
|
@ -28,9 +28,17 @@ typedef union hyp_spinlock {
|
||||
};
|
||||
} hyp_spinlock_t;
|
||||
|
||||
#define __HYP_SPIN_LOCK_INITIALIZER \
|
||||
{ .__val = 0 }
|
||||
|
||||
#define __HYP_SPIN_LOCK_UNLOCKED \
|
||||
((hyp_spinlock_t) __HYP_SPIN_LOCK_INITIALIZER)
|
||||
|
||||
#define DEFINE_HYP_SPINLOCK(x) hyp_spinlock_t x = __HYP_SPIN_LOCK_UNLOCKED
|
||||
|
||||
#define hyp_spin_lock_init(l) \
|
||||
do { \
|
||||
*(l) = (hyp_spinlock_t){ .__val = 0 }; \
|
||||
*(l) = __HYP_SPIN_LOCK_UNLOCKED; \
|
||||
} while (0)
|
||||
|
||||
static inline void hyp_spin_lock(hyp_spinlock_t *lock)
|
||||
|
@ -12,3 +12,14 @@ SYM_FUNC_START(__pi_dcache_clean_inval_poc)
|
||||
ret
|
||||
SYM_FUNC_END(__pi_dcache_clean_inval_poc)
|
||||
SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc)
|
||||
|
||||
SYM_FUNC_START(__pi_icache_inval_pou)
|
||||
alternative_if ARM64_HAS_CACHE_DIC
|
||||
isb
|
||||
ret
|
||||
alternative_else_nop_endif
|
||||
|
||||
invalidate_icache_by_line x0, x1, x2, x3
|
||||
ret
|
||||
SYM_FUNC_END(__pi_icache_inval_pou)
|
||||
SYM_FUNC_ALIAS(icache_inval_pou, __pi_icache_inval_pou)
|
||||
|
@ -15,17 +15,93 @@
|
||||
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/pkvm.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
|
||||
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
|
||||
|
||||
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
|
||||
|
||||
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
|
||||
|
||||
hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
|
||||
hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
|
||||
|
||||
hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
|
||||
|
||||
hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
|
||||
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
|
||||
hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
|
||||
|
||||
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
|
||||
hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state;
|
||||
|
||||
hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr);
|
||||
hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state;
|
||||
|
||||
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
|
||||
|
||||
hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3;
|
||||
}
|
||||
|
||||
static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
|
||||
struct vgic_v3_cpu_if *hyp_cpu_if = &hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3;
|
||||
struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
|
||||
unsigned int i;
|
||||
|
||||
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
|
||||
|
||||
host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
|
||||
host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2;
|
||||
|
||||
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
|
||||
|
||||
host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
|
||||
host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state;
|
||||
|
||||
host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr;
|
||||
for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
|
||||
host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i];
|
||||
}
|
||||
|
||||
static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
|
||||
DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1);
|
||||
int ret;
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu));
|
||||
host_vcpu = kern_hyp_va(host_vcpu);
|
||||
|
||||
if (unlikely(is_protected_kvm_enabled())) {
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
struct kvm *host_kvm;
|
||||
|
||||
host_kvm = kern_hyp_va(host_vcpu->kvm);
|
||||
hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
|
||||
host_vcpu->vcpu_idx);
|
||||
if (!hyp_vcpu) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
flush_hyp_vcpu(hyp_vcpu);
|
||||
|
||||
ret = __kvm_vcpu_run(&hyp_vcpu->vcpu);
|
||||
|
||||
sync_hyp_vcpu(hyp_vcpu);
|
||||
pkvm_put_hyp_vcpu(hyp_vcpu);
|
||||
} else {
|
||||
/* The host is fully trusted, run its vCPU directly. */
|
||||
ret = __kvm_vcpu_run(host_vcpu);
|
||||
}
|
||||
|
||||
out:
|
||||
cpu_reg(host_ctxt, 1) = ret;
|
||||
}
|
||||
|
||||
static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
|
||||
@ -191,6 +267,33 @@ static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt)
|
||||
__pkvm_vcpu_init_traps(kern_hyp_va(vcpu));
|
||||
}
|
||||
|
||||
static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
|
||||
DECLARE_REG(unsigned long, vm_hva, host_ctxt, 2);
|
||||
DECLARE_REG(unsigned long, pgd_hva, host_ctxt, 3);
|
||||
|
||||
host_kvm = kern_hyp_va(host_kvm);
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_init_vm(host_kvm, vm_hva, pgd_hva);
|
||||
}
|
||||
|
||||
static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
||||
DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2);
|
||||
DECLARE_REG(unsigned long, vcpu_hva, host_ctxt, 3);
|
||||
|
||||
host_vcpu = kern_hyp_va(host_vcpu);
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
|
||||
}
|
||||
|
||||
static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
|
||||
}
|
||||
|
||||
typedef void (*hcall_t)(struct kvm_cpu_context *);
|
||||
|
||||
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
|
||||
@ -220,6 +323,9 @@ static const hcall_t host_hcall[] = {
|
||||
HANDLE_FUNC(__vgic_v3_save_aprs),
|
||||
HANDLE_FUNC(__vgic_v3_restore_aprs),
|
||||
HANDLE_FUNC(__pkvm_vcpu_init_traps),
|
||||
HANDLE_FUNC(__pkvm_init_vm),
|
||||
HANDLE_FUNC(__pkvm_init_vcpu),
|
||||
HANDLE_FUNC(__pkvm_teardown_vm),
|
||||
};
|
||||
|
||||
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
|
||||
|
@ -23,6 +23,8 @@ u64 cpu_logical_map(unsigned int cpu)
|
||||
return hyp_cpu_logical_map[cpu];
|
||||
}
|
||||
|
||||
unsigned long __ro_after_init kvm_arm_hyp_percpu_base[NR_CPUS];
|
||||
|
||||
unsigned long __hyp_per_cpu_offset(unsigned int cpu)
|
||||
{
|
||||
unsigned long *cpu_base_array;
|
||||
|
@ -21,21 +21,33 @@
|
||||
|
||||
#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
|
||||
|
||||
extern unsigned long hyp_nr_cpus;
|
||||
struct host_kvm host_kvm;
|
||||
struct host_mmu host_mmu;
|
||||
|
||||
static struct hyp_pool host_s2_pool;
|
||||
|
||||
const u8 pkvm_hyp_id = 1;
|
||||
static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
|
||||
#define current_vm (*this_cpu_ptr(&__current_vm))
|
||||
|
||||
static void guest_lock_component(struct pkvm_hyp_vm *vm)
|
||||
{
|
||||
hyp_spin_lock(&vm->lock);
|
||||
current_vm = vm;
|
||||
}
|
||||
|
||||
static void guest_unlock_component(struct pkvm_hyp_vm *vm)
|
||||
{
|
||||
current_vm = NULL;
|
||||
hyp_spin_unlock(&vm->lock);
|
||||
}
|
||||
|
||||
static void host_lock_component(void)
|
||||
{
|
||||
hyp_spin_lock(&host_kvm.lock);
|
||||
hyp_spin_lock(&host_mmu.lock);
|
||||
}
|
||||
|
||||
static void host_unlock_component(void)
|
||||
{
|
||||
hyp_spin_unlock(&host_kvm.lock);
|
||||
hyp_spin_unlock(&host_mmu.lock);
|
||||
}
|
||||
|
||||
static void hyp_lock_component(void)
|
||||
@ -79,6 +91,11 @@ static void host_s2_put_page(void *addr)
|
||||
hyp_put_page(&host_s2_pool, addr);
|
||||
}
|
||||
|
||||
static void host_s2_free_removed_table(void *addr, u32 level)
|
||||
{
|
||||
kvm_pgtable_stage2_free_removed(&host_mmu.mm_ops, addr, level);
|
||||
}
|
||||
|
||||
static int prepare_s2_pool(void *pgt_pool_base)
|
||||
{
|
||||
unsigned long nr_pages, pfn;
|
||||
@ -90,9 +107,10 @@ static int prepare_s2_pool(void *pgt_pool_base)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) {
|
||||
host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
|
||||
.zalloc_pages_exact = host_s2_zalloc_pages_exact,
|
||||
.zalloc_page = host_s2_zalloc_page,
|
||||
.free_removed_table = host_s2_free_removed_table,
|
||||
.phys_to_virt = hyp_phys_to_virt,
|
||||
.virt_to_phys = hyp_virt_to_phys,
|
||||
.page_count = hyp_page_count,
|
||||
@ -111,7 +129,7 @@ static void prepare_host_vtcr(void)
|
||||
parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
|
||||
phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
|
||||
|
||||
host_kvm.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
|
||||
host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
|
||||
id_aa64mmfr1_el1_sys_val, phys_shift);
|
||||
}
|
||||
|
||||
@ -119,45 +137,170 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr
|
||||
|
||||
int kvm_host_prepare_stage2(void *pgt_pool_base)
|
||||
{
|
||||
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
|
||||
struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
|
||||
int ret;
|
||||
|
||||
prepare_host_vtcr();
|
||||
hyp_spin_lock_init(&host_kvm.lock);
|
||||
mmu->arch = &host_kvm.arch;
|
||||
hyp_spin_lock_init(&host_mmu.lock);
|
||||
mmu->arch = &host_mmu.arch;
|
||||
|
||||
ret = prepare_s2_pool(pgt_pool_base);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu,
|
||||
&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
|
||||
ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
|
||||
&host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
|
||||
host_stage2_force_pte_cb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
|
||||
mmu->pgt = &host_kvm.pgt;
|
||||
mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
|
||||
mmu->pgt = &host_mmu.pgt;
|
||||
atomic64_set(&mmu->vmid.id, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void *guest_s2_zalloc_pages_exact(size_t size)
|
||||
{
|
||||
void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size));
|
||||
|
||||
WARN_ON(size != (PAGE_SIZE << get_order(size)));
|
||||
hyp_split_page(hyp_virt_to_page(addr));
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
static void guest_s2_free_pages_exact(void *addr, unsigned long size)
|
||||
{
|
||||
u8 order = get_order(size);
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < (1 << order); i++)
|
||||
hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE));
|
||||
}
|
||||
|
||||
static void *guest_s2_zalloc_page(void *mc)
|
||||
{
|
||||
struct hyp_page *p;
|
||||
void *addr;
|
||||
|
||||
addr = hyp_alloc_pages(¤t_vm->pool, 0);
|
||||
if (addr)
|
||||
return addr;
|
||||
|
||||
addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
|
||||
if (!addr)
|
||||
return addr;
|
||||
|
||||
memset(addr, 0, PAGE_SIZE);
|
||||
p = hyp_virt_to_page(addr);
|
||||
memset(p, 0, sizeof(*p));
|
||||
p->refcount = 1;
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
static void guest_s2_get_page(void *addr)
|
||||
{
|
||||
hyp_get_page(¤t_vm->pool, addr);
|
||||
}
|
||||
|
||||
static void guest_s2_put_page(void *addr)
|
||||
{
|
||||
hyp_put_page(¤t_vm->pool, addr);
|
||||
}
|
||||
|
||||
static void clean_dcache_guest_page(void *va, size_t size)
|
||||
{
|
||||
__clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
|
||||
hyp_fixmap_unmap();
|
||||
}
|
||||
|
||||
static void invalidate_icache_guest_page(void *va, size_t size)
|
||||
{
|
||||
__invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
|
||||
hyp_fixmap_unmap();
|
||||
}
|
||||
|
||||
int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
|
||||
{
|
||||
struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
|
||||
unsigned long nr_pages;
|
||||
int ret;
|
||||
|
||||
nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT;
|
||||
ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
hyp_spin_lock_init(&vm->lock);
|
||||
vm->mm_ops = (struct kvm_pgtable_mm_ops) {
|
||||
.zalloc_pages_exact = guest_s2_zalloc_pages_exact,
|
||||
.free_pages_exact = guest_s2_free_pages_exact,
|
||||
.zalloc_page = guest_s2_zalloc_page,
|
||||
.phys_to_virt = hyp_phys_to_virt,
|
||||
.virt_to_phys = hyp_virt_to_phys,
|
||||
.page_count = hyp_page_count,
|
||||
.get_page = guest_s2_get_page,
|
||||
.put_page = guest_s2_put_page,
|
||||
.dcache_clean_inval_poc = clean_dcache_guest_page,
|
||||
.icache_inval_pou = invalidate_icache_guest_page,
|
||||
};
|
||||
|
||||
guest_lock_component(vm);
|
||||
ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0,
|
||||
guest_stage2_force_pte_cb);
|
||||
guest_unlock_component(vm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
|
||||
{
|
||||
void *addr;
|
||||
|
||||
/* Dump all pgtable pages in the hyp_pool */
|
||||
guest_lock_component(vm);
|
||||
kvm_pgtable_stage2_destroy(&vm->pgt);
|
||||
vm->kvm.arch.mmu.pgd_phys = 0ULL;
|
||||
guest_unlock_component(vm);
|
||||
|
||||
/* Drain the hyp_pool into the memcache */
|
||||
addr = hyp_alloc_pages(&vm->pool, 0);
|
||||
while (addr) {
|
||||
memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
|
||||
push_hyp_memcache(mc, addr, hyp_virt_to_phys);
|
||||
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
|
||||
addr = hyp_alloc_pages(&vm->pool, 0);
|
||||
}
|
||||
}
|
||||
|
||||
int __pkvm_prot_finalize(void)
|
||||
{
|
||||
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
|
||||
struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
|
||||
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
|
||||
|
||||
if (params->hcr_el2 & HCR_VM)
|
||||
return -EPERM;
|
||||
|
||||
params->vttbr = kvm_get_vttbr(mmu);
|
||||
params->vtcr = host_kvm.arch.vtcr;
|
||||
params->vtcr = host_mmu.arch.vtcr;
|
||||
params->hcr_el2 |= HCR_VM;
|
||||
kvm_flush_dcache_to_poc(params, sizeof(*params));
|
||||
|
||||
write_sysreg(params->hcr_el2, hcr_el2);
|
||||
__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
|
||||
__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
|
||||
|
||||
/*
|
||||
* Make sure to have an ISB before the TLB maintenance below but only
|
||||
@ -175,7 +318,7 @@ int __pkvm_prot_finalize(void)
|
||||
|
||||
static int host_stage2_unmap_dev_all(void)
|
||||
{
|
||||
struct kvm_pgtable *pgt = &host_kvm.pgt;
|
||||
struct kvm_pgtable *pgt = &host_mmu.pgt;
|
||||
struct memblock_region *reg;
|
||||
u64 addr = 0;
|
||||
int i, ret;
|
||||
@ -195,7 +338,7 @@ struct kvm_mem_range {
|
||||
u64 end;
|
||||
};
|
||||
|
||||
static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
|
||||
static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
|
||||
{
|
||||
int cur, left = 0, right = hyp_memblock_nr;
|
||||
struct memblock_region *reg;
|
||||
@ -218,18 +361,28 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
|
||||
} else {
|
||||
range->start = reg->base;
|
||||
range->end = end;
|
||||
return true;
|
||||
return reg;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool addr_is_memory(phys_addr_t phys)
|
||||
{
|
||||
struct kvm_mem_range range;
|
||||
|
||||
return find_mem_range(phys, &range);
|
||||
return !!find_mem_range(phys, &range);
|
||||
}
|
||||
|
||||
static bool addr_is_allowed_memory(phys_addr_t phys)
|
||||
{
|
||||
struct memblock_region *reg;
|
||||
struct kvm_mem_range range;
|
||||
|
||||
reg = find_mem_range(phys, &range);
|
||||
|
||||
return reg && !(reg->flags & MEMBLOCK_NOMAP);
|
||||
}
|
||||
|
||||
static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
|
||||
@ -250,8 +403,8 @@ static bool range_is_memory(u64 start, u64 end)
|
||||
static inline int __host_stage2_idmap(u64 start, u64 end,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
|
||||
prot, &host_s2_pool);
|
||||
return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
|
||||
prot, &host_s2_pool, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -263,7 +416,7 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
|
||||
#define host_stage2_try(fn, ...) \
|
||||
({ \
|
||||
int __ret; \
|
||||
hyp_assert_lock_held(&host_kvm.lock); \
|
||||
hyp_assert_lock_held(&host_mmu.lock); \
|
||||
__ret = fn(__VA_ARGS__); \
|
||||
if (__ret == -ENOMEM) { \
|
||||
__ret = host_stage2_unmap_dev_all(); \
|
||||
@ -286,8 +439,8 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
|
||||
u32 level;
|
||||
int ret;
|
||||
|
||||
hyp_assert_lock_held(&host_kvm.lock);
|
||||
ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level);
|
||||
hyp_assert_lock_held(&host_mmu.lock);
|
||||
ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -319,7 +472,7 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
|
||||
|
||||
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
|
||||
{
|
||||
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
|
||||
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
|
||||
addr, size, &host_s2_pool, owner_id);
|
||||
}
|
||||
|
||||
@ -348,7 +501,7 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr
|
||||
static int host_stage2_idmap(u64 addr)
|
||||
{
|
||||
struct kvm_mem_range range;
|
||||
bool is_memory = find_mem_range(addr, &range);
|
||||
bool is_memory = !!find_mem_range(addr, &range);
|
||||
enum kvm_pgtable_prot prot;
|
||||
int ret;
|
||||
|
||||
@ -380,12 +533,6 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
|
||||
BUG_ON(ret && ret != -EAGAIN);
|
||||
}
|
||||
|
||||
/* This corresponds to locking order */
|
||||
enum pkvm_component_id {
|
||||
PKVM_ID_HOST,
|
||||
PKVM_ID_HYP,
|
||||
};
|
||||
|
||||
struct pkvm_mem_transition {
|
||||
u64 nr_pages;
|
||||
|
||||
@ -399,6 +546,9 @@ struct pkvm_mem_transition {
|
||||
/* Address in the completer's address space */
|
||||
u64 completer_addr;
|
||||
} host;
|
||||
struct {
|
||||
u64 completer_addr;
|
||||
} hyp;
|
||||
};
|
||||
} initiator;
|
||||
|
||||
@ -412,23 +562,24 @@ struct pkvm_mem_share {
|
||||
const enum kvm_pgtable_prot completer_prot;
|
||||
};
|
||||
|
||||
struct pkvm_mem_donation {
|
||||
const struct pkvm_mem_transition tx;
|
||||
};
|
||||
|
||||
struct check_walk_data {
|
||||
enum pkvm_page_state desired;
|
||||
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
|
||||
};
|
||||
|
||||
static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg)
|
||||
static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit)
|
||||
{
|
||||
struct check_walk_data *d = arg;
|
||||
kvm_pte_t pte = *ptep;
|
||||
struct check_walk_data *d = ctx->arg;
|
||||
|
||||
if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
|
||||
if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old)))
|
||||
return -EINVAL;
|
||||
|
||||
return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
|
||||
return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM;
|
||||
}
|
||||
|
||||
static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
@ -459,8 +610,8 @@ static int __host_check_page_state_range(u64 addr, u64 size,
|
||||
.get_page_state = host_get_page_state,
|
||||
};
|
||||
|
||||
hyp_assert_lock_held(&host_kvm.lock);
|
||||
return check_page_state_range(&host_kvm.pgt, addr, size, &d);
|
||||
hyp_assert_lock_held(&host_mmu.lock);
|
||||
return check_page_state_range(&host_mmu.pgt, addr, size, &d);
|
||||
}
|
||||
|
||||
static int __host_set_page_state_range(u64 addr, u64 size,
|
||||
@ -511,6 +662,46 @@ static int host_initiate_unshare(u64 *completer_addr,
|
||||
return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
|
||||
}
|
||||
|
||||
static int host_initiate_donation(u64 *completer_addr,
|
||||
const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u8 owner_id = tx->completer.id;
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
|
||||
*completer_addr = tx->initiator.host.completer_addr;
|
||||
return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id);
|
||||
}
|
||||
|
||||
static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
|
||||
tx->initiator.id != PKVM_ID_HYP);
|
||||
}
|
||||
|
||||
static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx,
|
||||
enum pkvm_page_state state)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
|
||||
if (__host_ack_skip_pgtable_check(tx))
|
||||
return 0;
|
||||
|
||||
return __host_check_page_state_range(addr, size, state);
|
||||
}
|
||||
|
||||
static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
return __host_ack_transition(addr, tx, PKVM_NOPAGE);
|
||||
}
|
||||
|
||||
static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
u8 host_id = tx->completer.id;
|
||||
|
||||
return host_stage2_set_owner_locked(addr, size, host_id);
|
||||
}
|
||||
|
||||
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
|
||||
{
|
||||
if (!kvm_pte_valid(pte))
|
||||
@ -531,6 +722,27 @@ static int __hyp_check_page_state_range(u64 addr, u64 size,
|
||||
return check_page_state_range(&pkvm_pgtable, addr, size, &d);
|
||||
}
|
||||
|
||||
static int hyp_request_donation(u64 *completer_addr,
|
||||
const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
u64 addr = tx->initiator.addr;
|
||||
|
||||
*completer_addr = tx->initiator.hyp.completer_addr;
|
||||
return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
|
||||
}
|
||||
|
||||
static int hyp_initiate_donation(u64 *completer_addr,
|
||||
const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
int ret;
|
||||
|
||||
*completer_addr = tx->initiator.hyp.completer_addr;
|
||||
ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size);
|
||||
return (ret != size) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
|
||||
@ -555,6 +767,9 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
|
||||
if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr))
|
||||
return -EBUSY;
|
||||
|
||||
if (__hyp_ack_skip_pgtable_check(tx))
|
||||
return 0;
|
||||
|
||||
@ -562,6 +777,16 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
|
||||
PKVM_PAGE_SHARED_BORROWED);
|
||||
}
|
||||
|
||||
static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
u64 size = tx->nr_pages * PAGE_SIZE;
|
||||
|
||||
if (__hyp_ack_skip_pgtable_check(tx))
|
||||
return 0;
|
||||
|
||||
return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
|
||||
}
|
||||
|
||||
static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
|
||||
enum kvm_pgtable_prot perms)
|
||||
{
|
||||
@ -580,6 +805,15 @@ static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
|
||||
return (ret != size) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int hyp_complete_donation(u64 addr,
|
||||
const struct pkvm_mem_transition *tx)
|
||||
{
|
||||
void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
|
||||
enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
|
||||
|
||||
return pkvm_create_mappings_locked(start, end, prot);
|
||||
}
|
||||
|
||||
static int check_share(struct pkvm_mem_share *share)
|
||||
{
|
||||
const struct pkvm_mem_transition *tx = &share->tx;
|
||||
@ -732,6 +966,94 @@ static int do_unshare(struct pkvm_mem_share *share)
|
||||
return WARN_ON(__do_unshare(share));
|
||||
}
|
||||
|
||||
static int check_donation(struct pkvm_mem_donation *donation)
|
||||
{
|
||||
const struct pkvm_mem_transition *tx = &donation->tx;
|
||||
u64 completer_addr;
|
||||
int ret;
|
||||
|
||||
switch (tx->initiator.id) {
|
||||
case PKVM_ID_HOST:
|
||||
ret = host_request_owned_transition(&completer_addr, tx);
|
||||
break;
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_request_donation(&completer_addr, tx);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (tx->completer.id) {
|
||||
case PKVM_ID_HOST:
|
||||
ret = host_ack_donation(completer_addr, tx);
|
||||
break;
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_ack_donation(completer_addr, tx);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __do_donate(struct pkvm_mem_donation *donation)
|
||||
{
|
||||
const struct pkvm_mem_transition *tx = &donation->tx;
|
||||
u64 completer_addr;
|
||||
int ret;
|
||||
|
||||
switch (tx->initiator.id) {
|
||||
case PKVM_ID_HOST:
|
||||
ret = host_initiate_donation(&completer_addr, tx);
|
||||
break;
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_initiate_donation(&completer_addr, tx);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (tx->completer.id) {
|
||||
case PKVM_ID_HOST:
|
||||
ret = host_complete_donation(completer_addr, tx);
|
||||
break;
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_complete_donation(completer_addr, tx);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* do_donate():
|
||||
*
|
||||
* The page owner transfers ownership to another component, losing access
|
||||
* as a consequence.
|
||||
*
|
||||
* Initiator: OWNED => NOPAGE
|
||||
* Completer: NOPAGE => OWNED
|
||||
*/
|
||||
static int do_donate(struct pkvm_mem_donation *donation)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = check_donation(donation);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return WARN_ON(__do_donate(donation));
|
||||
}
|
||||
|
||||
int __pkvm_host_share_hyp(u64 pfn)
|
||||
{
|
||||
int ret;
|
||||
@ -797,3 +1119,112 @@ int __pkvm_host_unshare_hyp(u64 pfn)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
|
||||
{
|
||||
int ret;
|
||||
u64 host_addr = hyp_pfn_to_phys(pfn);
|
||||
u64 hyp_addr = (u64)__hyp_va(host_addr);
|
||||
struct pkvm_mem_donation donation = {
|
||||
.tx = {
|
||||
.nr_pages = nr_pages,
|
||||
.initiator = {
|
||||
.id = PKVM_ID_HOST,
|
||||
.addr = host_addr,
|
||||
.host = {
|
||||
.completer_addr = hyp_addr,
|
||||
},
|
||||
},
|
||||
.completer = {
|
||||
.id = PKVM_ID_HYP,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
host_lock_component();
|
||||
hyp_lock_component();
|
||||
|
||||
ret = do_donate(&donation);
|
||||
|
||||
hyp_unlock_component();
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
|
||||
{
|
||||
int ret;
|
||||
u64 host_addr = hyp_pfn_to_phys(pfn);
|
||||
u64 hyp_addr = (u64)__hyp_va(host_addr);
|
||||
struct pkvm_mem_donation donation = {
|
||||
.tx = {
|
||||
.nr_pages = nr_pages,
|
||||
.initiator = {
|
||||
.id = PKVM_ID_HYP,
|
||||
.addr = hyp_addr,
|
||||
.hyp = {
|
||||
.completer_addr = host_addr,
|
||||
},
|
||||
},
|
||||
.completer = {
|
||||
.id = PKVM_ID_HOST,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
host_lock_component();
|
||||
hyp_lock_component();
|
||||
|
||||
ret = do_donate(&donation);
|
||||
|
||||
hyp_unlock_component();
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hyp_pin_shared_mem(void *from, void *to)
|
||||
{
|
||||
u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
|
||||
u64 end = PAGE_ALIGN((u64)to);
|
||||
u64 size = end - start;
|
||||
int ret;
|
||||
|
||||
host_lock_component();
|
||||
hyp_lock_component();
|
||||
|
||||
ret = __host_check_page_state_range(__hyp_pa(start), size,
|
||||
PKVM_PAGE_SHARED_OWNED);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = __hyp_check_page_state_range(start, size,
|
||||
PKVM_PAGE_SHARED_BORROWED);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
for (cur = start; cur < end; cur += PAGE_SIZE)
|
||||
hyp_page_ref_inc(hyp_virt_to_page(cur));
|
||||
|
||||
unlock:
|
||||
hyp_unlock_component();
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void hyp_unpin_shared_mem(void *from, void *to)
|
||||
{
|
||||
u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
|
||||
u64 end = PAGE_ALIGN((u64)to);
|
||||
|
||||
host_lock_component();
|
||||
hyp_lock_component();
|
||||
|
||||
for (cur = start; cur < end; cur += PAGE_SIZE)
|
||||
hyp_page_ref_dec(hyp_virt_to_page(cur));
|
||||
|
||||
hyp_unlock_component();
|
||||
host_unlock_component();
|
||||
}
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <nvhe/early_alloc.h>
|
||||
#include <nvhe/gfp.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/spinlock.h>
|
||||
|
||||
@ -25,6 +26,12 @@ unsigned int hyp_memblock_nr;
|
||||
|
||||
static u64 __io_map_base;
|
||||
|
||||
struct hyp_fixmap_slot {
|
||||
u64 addr;
|
||||
kvm_pte_t *ptep;
|
||||
};
|
||||
static DEFINE_PER_CPU(struct hyp_fixmap_slot, fixmap_slots);
|
||||
|
||||
static int __pkvm_create_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
@ -129,13 +136,36 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
|
||||
int hyp_back_vmemmap(phys_addr_t back)
|
||||
{
|
||||
unsigned long start, end;
|
||||
unsigned long i, start, size, end = 0;
|
||||
int ret;
|
||||
|
||||
hyp_vmemmap_range(phys, size, &start, &end);
|
||||
for (i = 0; i < hyp_memblock_nr; i++) {
|
||||
start = hyp_memory[i].base;
|
||||
start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE);
|
||||
/*
|
||||
* The begining of the hyp_vmemmap region for the current
|
||||
* memblock may already be backed by the page backing the end
|
||||
* the previous region, so avoid mapping it twice.
|
||||
*/
|
||||
start = max(start, end);
|
||||
|
||||
return __pkvm_create_mappings(start, end - start, back, PAGE_HYP);
|
||||
end = hyp_memory[i].base + hyp_memory[i].size;
|
||||
end = PAGE_ALIGN((u64)hyp_phys_to_page(end));
|
||||
if (start >= end)
|
||||
continue;
|
||||
|
||||
size = end - start;
|
||||
ret = __pkvm_create_mappings(start, size, back, PAGE_HYP);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
memset(hyp_phys_to_virt(back), 0, size);
|
||||
back += size;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *__hyp_bp_vect_base;
|
||||
@ -189,6 +219,102 @@ int hyp_map_vectors(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *hyp_fixmap_map(phys_addr_t phys)
|
||||
{
|
||||
struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots);
|
||||
kvm_pte_t pte, *ptep = slot->ptep;
|
||||
|
||||
pte = *ptep;
|
||||
pte &= ~kvm_phys_to_pte(KVM_PHYS_INVALID);
|
||||
pte |= kvm_phys_to_pte(phys) | KVM_PTE_VALID;
|
||||
WRITE_ONCE(*ptep, pte);
|
||||
dsb(ishst);
|
||||
|
||||
return (void *)slot->addr;
|
||||
}
|
||||
|
||||
static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
|
||||
{
|
||||
kvm_pte_t *ptep = slot->ptep;
|
||||
u64 addr = slot->addr;
|
||||
|
||||
WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID);
|
||||
|
||||
/*
|
||||
* Irritatingly, the architecture requires that we use inner-shareable
|
||||
* broadcast TLB invalidation here in case another CPU speculates
|
||||
* through our fixmap and decides to create an "amalagamation of the
|
||||
* values held in the TLB" due to the apparent lack of a
|
||||
* break-before-make sequence.
|
||||
*
|
||||
* https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
|
||||
*/
|
||||
dsb(ishst);
|
||||
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1));
|
||||
dsb(ish);
|
||||
isb();
|
||||
}
|
||||
|
||||
void hyp_fixmap_unmap(void)
|
||||
{
|
||||
fixmap_clear_slot(this_cpu_ptr(&fixmap_slots));
|
||||
}
|
||||
|
||||
static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit)
|
||||
{
|
||||
struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
|
||||
|
||||
if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
return -EINVAL;
|
||||
|
||||
slot->addr = ctx->addr;
|
||||
slot->ptep = ctx->ptep;
|
||||
|
||||
/*
|
||||
* Clear the PTE, but keep the page-table page refcount elevated to
|
||||
* prevent it from ever being freed. This lets us manipulate the PTEs
|
||||
* by hand safely without ever needing to allocate memory.
|
||||
*/
|
||||
fixmap_clear_slot(slot);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int create_fixmap_slot(u64 addr, u64 cpu)
|
||||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = __create_fixmap_slot_cb,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
.arg = (void *)cpu,
|
||||
};
|
||||
|
||||
return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
|
||||
}
|
||||
|
||||
int hyp_create_pcpu_fixmap(void)
|
||||
{
|
||||
unsigned long addr, i;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < hyp_nr_cpus; i++) {
|
||||
ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE,
|
||||
__hyp_pa(__hyp_bss_start), PAGE_HYP);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = create_fixmap_slot(addr, i);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hyp_create_idmap(u32 hyp_va_bits)
|
||||
{
|
||||
unsigned long start, end;
|
||||
@ -213,3 +339,36 @@ int hyp_create_idmap(u32 hyp_va_bits)
|
||||
|
||||
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
|
||||
}
|
||||
|
||||
static void *admit_host_page(void *arg)
|
||||
{
|
||||
struct kvm_hyp_memcache *host_mc = arg;
|
||||
|
||||
if (!host_mc->nr_pages)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* The host still owns the pages in its memcache, so we need to go
|
||||
* through a full host-to-hyp donation cycle to change it. Fortunately,
|
||||
* __pkvm_host_donate_hyp() takes care of races for us, so if it
|
||||
* succeeds we're good to go.
|
||||
*/
|
||||
if (__pkvm_host_donate_hyp(hyp_phys_to_pfn(host_mc->head), 1))
|
||||
return NULL;
|
||||
|
||||
return pop_hyp_memcache(host_mc, hyp_phys_to_virt);
|
||||
}
|
||||
|
||||
/* Refill our local memcache by poping pages from the one provided by the host. */
|
||||
int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
|
||||
struct kvm_hyp_memcache *host_mc)
|
||||
{
|
||||
struct kvm_hyp_memcache tmp = *host_mc;
|
||||
int ret;
|
||||
|
||||
ret = __topup_hyp_memcache(mc, min_pages, admit_host_page,
|
||||
hyp_virt_to_phys, &tmp);
|
||||
*host_mc = tmp;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -93,11 +93,16 @@ static inline struct hyp_page *node_to_page(struct list_head *node)
|
||||
static void __hyp_attach_page(struct hyp_pool *pool,
|
||||
struct hyp_page *p)
|
||||
{
|
||||
phys_addr_t phys = hyp_page_to_phys(p);
|
||||
unsigned short order = p->order;
|
||||
struct hyp_page *buddy;
|
||||
|
||||
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
|
||||
|
||||
/* Skip coalescing for 'external' pages being freed into the pool. */
|
||||
if (phys < pool->range_start || phys >= pool->range_end)
|
||||
goto insert;
|
||||
|
||||
/*
|
||||
* Only the first struct hyp_page of a high-order page (otherwise known
|
||||
* as the 'head') should have p->order set. The non-head pages should
|
||||
@ -116,6 +121,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
|
||||
p = min(p, buddy);
|
||||
}
|
||||
|
||||
insert:
|
||||
/* Mark the new head, and insert it */
|
||||
p->order = order;
|
||||
page_add_to_list(p, &pool->free_area[order]);
|
||||
@ -144,25 +150,6 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline void hyp_page_ref_inc(struct hyp_page *p)
|
||||
{
|
||||
BUG_ON(p->refcount == USHRT_MAX);
|
||||
p->refcount++;
|
||||
}
|
||||
|
||||
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
|
||||
{
|
||||
BUG_ON(!p->refcount);
|
||||
p->refcount--;
|
||||
return (p->refcount == 0);
|
||||
}
|
||||
|
||||
static inline void hyp_set_page_refcounted(struct hyp_page *p)
|
||||
{
|
||||
BUG_ON(p->refcount);
|
||||
p->refcount = 1;
|
||||
}
|
||||
|
||||
static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
|
||||
{
|
||||
if (hyp_page_ref_dec_and_test(p))
|
||||
@ -249,10 +236,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
|
||||
|
||||
/* Init the vmemmap portion */
|
||||
p = hyp_phys_to_page(phys);
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
p[i].order = 0;
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
hyp_set_page_refcounted(&p[i]);
|
||||
}
|
||||
|
||||
/* Attach the unused pages to the buddy tree */
|
||||
for (i = reserved_pages; i < nr_pages; i++)
|
||||
|
@ -7,8 +7,17 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/mm.h>
|
||||
#include <nvhe/fixed_config.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/pkvm.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
/* Used by icache_is_vpipt(). */
|
||||
unsigned long __icache_flags;
|
||||
|
||||
/* Used by kvm_get_vttbr(). */
|
||||
unsigned int kvm_arm_vmid_bits;
|
||||
|
||||
/*
|
||||
* Set trap register values based on features in ID_AA64PFR0.
|
||||
*/
|
||||
@ -183,3 +192,430 @@ void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
|
||||
pvm_init_traps_aa64mmfr0(vcpu);
|
||||
pvm_init_traps_aa64mmfr1(vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start the VM table handle at the offset defined instead of at 0.
|
||||
* Mainly for sanity checking and debugging.
|
||||
*/
|
||||
#define HANDLE_OFFSET 0x1000
|
||||
|
||||
static unsigned int vm_handle_to_idx(pkvm_handle_t handle)
|
||||
{
|
||||
return handle - HANDLE_OFFSET;
|
||||
}
|
||||
|
||||
static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
|
||||
{
|
||||
return idx + HANDLE_OFFSET;
|
||||
}
|
||||
|
||||
/*
|
||||
* Spinlock for protecting state related to the VM table. Protects writes
|
||||
* to 'vm_table' and 'nr_table_entries' as well as reads and writes to
|
||||
* 'last_hyp_vcpu_lookup'.
|
||||
*/
|
||||
static DEFINE_HYP_SPINLOCK(vm_table_lock);
|
||||
|
||||
/*
|
||||
* The table of VM entries for protected VMs in hyp.
|
||||
* Allocated at hyp initialization and setup.
|
||||
*/
|
||||
static struct pkvm_hyp_vm **vm_table;
|
||||
|
||||
void pkvm_hyp_vm_table_init(void *tbl)
|
||||
{
|
||||
WARN_ON(vm_table);
|
||||
vm_table = tbl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the hyp vm structure corresponding to the handle.
|
||||
*/
|
||||
static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
|
||||
{
|
||||
unsigned int idx = vm_handle_to_idx(handle);
|
||||
|
||||
if (unlikely(idx >= KVM_MAX_PVMS))
|
||||
return NULL;
|
||||
|
||||
return vm_table[idx];
|
||||
}
|
||||
|
||||
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
|
||||
unsigned int vcpu_idx)
|
||||
{
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu = NULL;
|
||||
struct pkvm_hyp_vm *hyp_vm;
|
||||
|
||||
hyp_spin_lock(&vm_table_lock);
|
||||
hyp_vm = get_vm_by_handle(handle);
|
||||
if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx)
|
||||
goto unlock;
|
||||
|
||||
hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
|
||||
hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
|
||||
unlock:
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
return hyp_vcpu;
|
||||
}
|
||||
|
||||
void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
|
||||
|
||||
hyp_spin_lock(&vm_table_lock);
|
||||
hyp_page_ref_dec(hyp_virt_to_page(hyp_vm));
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
}
|
||||
|
||||
static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
|
||||
{
|
||||
if (host_vcpu)
|
||||
hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
|
||||
}
|
||||
|
||||
static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
|
||||
unsigned int nr_vcpus)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_vcpus; i++)
|
||||
unpin_host_vcpu(hyp_vcpus[i]->host_vcpu);
|
||||
}
|
||||
|
||||
static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
|
||||
unsigned int nr_vcpus)
|
||||
{
|
||||
hyp_vm->host_kvm = host_kvm;
|
||||
hyp_vm->kvm.created_vcpus = nr_vcpus;
|
||||
hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr;
|
||||
}
|
||||
|
||||
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
|
||||
struct pkvm_hyp_vm *hyp_vm,
|
||||
struct kvm_vcpu *host_vcpu,
|
||||
unsigned int vcpu_idx)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1))
|
||||
return -EBUSY;
|
||||
|
||||
if (host_vcpu->vcpu_idx != vcpu_idx) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
hyp_vcpu->host_vcpu = host_vcpu;
|
||||
|
||||
hyp_vcpu->vcpu.kvm = &hyp_vm->kvm;
|
||||
hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id);
|
||||
hyp_vcpu->vcpu.vcpu_idx = vcpu_idx;
|
||||
|
||||
hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu;
|
||||
hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
|
||||
done:
|
||||
if (ret)
|
||||
unpin_host_vcpu(host_vcpu);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int find_free_vm_table_entry(struct kvm *host_kvm)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_MAX_PVMS; ++i) {
|
||||
if (!vm_table[i])
|
||||
return i;
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a VM table entry and insert a pointer to the new vm.
|
||||
*
|
||||
* Return a unique handle to the protected VM on success,
|
||||
* negative error code on failure.
|
||||
*/
|
||||
static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
|
||||
struct pkvm_hyp_vm *hyp_vm)
|
||||
{
|
||||
struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu;
|
||||
int idx;
|
||||
|
||||
hyp_assert_lock_held(&vm_table_lock);
|
||||
|
||||
/*
|
||||
* Initializing protected state might have failed, yet a malicious
|
||||
* host could trigger this function. Thus, ensure that 'vm_table'
|
||||
* exists.
|
||||
*/
|
||||
if (unlikely(!vm_table))
|
||||
return -EINVAL;
|
||||
|
||||
idx = find_free_vm_table_entry(host_kvm);
|
||||
if (idx < 0)
|
||||
return idx;
|
||||
|
||||
hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx);
|
||||
|
||||
/* VMID 0 is reserved for the host */
|
||||
atomic64_set(&mmu->vmid.id, idx + 1);
|
||||
|
||||
mmu->arch = &hyp_vm->kvm.arch;
|
||||
mmu->pgt = &hyp_vm->pgt;
|
||||
|
||||
vm_table[idx] = hyp_vm;
|
||||
return hyp_vm->kvm.arch.pkvm.handle;
|
||||
}
|
||||
|
||||
/*
|
||||
* Deallocate and remove the VM table entry corresponding to the handle.
|
||||
*/
|
||||
static void remove_vm_table_entry(pkvm_handle_t handle)
|
||||
{
|
||||
hyp_assert_lock_held(&vm_table_lock);
|
||||
vm_table[vm_handle_to_idx(handle)] = NULL;
|
||||
}
|
||||
|
||||
static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus)
|
||||
{
|
||||
return size_add(sizeof(struct pkvm_hyp_vm),
|
||||
size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus));
|
||||
}
|
||||
|
||||
static void *map_donated_memory_noclear(unsigned long host_va, size_t size)
|
||||
{
|
||||
void *va = (void *)kern_hyp_va(host_va);
|
||||
|
||||
if (!PAGE_ALIGNED(va))
|
||||
return NULL;
|
||||
|
||||
if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va),
|
||||
PAGE_ALIGN(size) >> PAGE_SHIFT))
|
||||
return NULL;
|
||||
|
||||
return va;
|
||||
}
|
||||
|
||||
static void *map_donated_memory(unsigned long host_va, size_t size)
|
||||
{
|
||||
void *va = map_donated_memory_noclear(host_va, size);
|
||||
|
||||
if (va)
|
||||
memset(va, 0, size);
|
||||
|
||||
return va;
|
||||
}
|
||||
|
||||
static void __unmap_donated_memory(void *va, size_t size)
|
||||
{
|
||||
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
|
||||
PAGE_ALIGN(size) >> PAGE_SHIFT));
|
||||
}
|
||||
|
||||
static void unmap_donated_memory(void *va, size_t size)
|
||||
{
|
||||
if (!va)
|
||||
return;
|
||||
|
||||
memset(va, 0, size);
|
||||
__unmap_donated_memory(va, size);
|
||||
}
|
||||
|
||||
static void unmap_donated_memory_noclear(void *va, size_t size)
|
||||
{
|
||||
if (!va)
|
||||
return;
|
||||
|
||||
__unmap_donated_memory(va, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the hypervisor copy of the protected VM state using the
|
||||
* memory donated by the host.
|
||||
*
|
||||
* Unmaps the donated memory from the host at stage 2.
|
||||
*
|
||||
* host_kvm: A pointer to the host's struct kvm.
|
||||
* vm_hva: The host va of the area being donated for the VM state.
|
||||
* Must be page aligned.
|
||||
* pgd_hva: The host va of the area being donated for the stage-2 PGD for
|
||||
* the VM. Must be page aligned. Its size is implied by the VM's
|
||||
* VTCR.
|
||||
*
|
||||
* Return a unique handle to the protected VM on success,
|
||||
* negative error code on failure.
|
||||
*/
|
||||
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
|
||||
unsigned long pgd_hva)
|
||||
{
|
||||
struct pkvm_hyp_vm *hyp_vm = NULL;
|
||||
size_t vm_size, pgd_size;
|
||||
unsigned int nr_vcpus;
|
||||
void *pgd = NULL;
|
||||
int ret;
|
||||
|
||||
ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
nr_vcpus = READ_ONCE(host_kvm->created_vcpus);
|
||||
if (nr_vcpus < 1) {
|
||||
ret = -EINVAL;
|
||||
goto err_unpin_kvm;
|
||||
}
|
||||
|
||||
vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
|
||||
pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr);
|
||||
|
||||
ret = -ENOMEM;
|
||||
|
||||
hyp_vm = map_donated_memory(vm_hva, vm_size);
|
||||
if (!hyp_vm)
|
||||
goto err_remove_mappings;
|
||||
|
||||
pgd = map_donated_memory_noclear(pgd_hva, pgd_size);
|
||||
if (!pgd)
|
||||
goto err_remove_mappings;
|
||||
|
||||
init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus);
|
||||
|
||||
hyp_spin_lock(&vm_table_lock);
|
||||
ret = insert_vm_table_entry(host_kvm, hyp_vm);
|
||||
if (ret < 0)
|
||||
goto err_unlock;
|
||||
|
||||
ret = kvm_guest_prepare_stage2(hyp_vm, pgd);
|
||||
if (ret)
|
||||
goto err_remove_vm_table_entry;
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
|
||||
return hyp_vm->kvm.arch.pkvm.handle;
|
||||
|
||||
err_remove_vm_table_entry:
|
||||
remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle);
|
||||
err_unlock:
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
err_remove_mappings:
|
||||
unmap_donated_memory(hyp_vm, vm_size);
|
||||
unmap_donated_memory(pgd, pgd_size);
|
||||
err_unpin_kvm:
|
||||
hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the hypervisor copy of the protected vCPU state using the
|
||||
* memory donated by the host.
|
||||
*
|
||||
* handle: The handle for the protected vm.
|
||||
* host_vcpu: A pointer to the corresponding host vcpu.
|
||||
* vcpu_hva: The host va of the area being donated for the vcpu state.
|
||||
* Must be page aligned. The size of the area must be equal to
|
||||
* the page-aligned size of 'struct pkvm_hyp_vcpu'.
|
||||
* Return 0 on success, negative error code on failure.
|
||||
*/
|
||||
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
|
||||
unsigned long vcpu_hva)
|
||||
{
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
struct pkvm_hyp_vm *hyp_vm;
|
||||
unsigned int idx;
|
||||
int ret;
|
||||
|
||||
hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu));
|
||||
if (!hyp_vcpu)
|
||||
return -ENOMEM;
|
||||
|
||||
hyp_spin_lock(&vm_table_lock);
|
||||
|
||||
hyp_vm = get_vm_by_handle(handle);
|
||||
if (!hyp_vm) {
|
||||
ret = -ENOENT;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
idx = hyp_vm->nr_vcpus;
|
||||
if (idx >= hyp_vm->kvm.created_vcpus) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
hyp_vm->vcpus[idx] = hyp_vcpu;
|
||||
hyp_vm->nr_vcpus++;
|
||||
unlock:
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
|
||||
if (ret)
|
||||
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
|
||||
{
|
||||
size = PAGE_ALIGN(size);
|
||||
memset(addr, 0, size);
|
||||
|
||||
for (void *start = addr; start < addr + size; start += PAGE_SIZE)
|
||||
push_hyp_memcache(mc, start, hyp_virt_to_phys);
|
||||
|
||||
unmap_donated_memory_noclear(addr, size);
|
||||
}
|
||||
|
||||
int __pkvm_teardown_vm(pkvm_handle_t handle)
|
||||
{
|
||||
struct kvm_hyp_memcache *mc;
|
||||
struct pkvm_hyp_vm *hyp_vm;
|
||||
struct kvm *host_kvm;
|
||||
unsigned int idx;
|
||||
size_t vm_size;
|
||||
int err;
|
||||
|
||||
hyp_spin_lock(&vm_table_lock);
|
||||
hyp_vm = get_vm_by_handle(handle);
|
||||
if (!hyp_vm) {
|
||||
err = -ENOENT;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
if (WARN_ON(hyp_page_count(hyp_vm))) {
|
||||
err = -EBUSY;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
host_kvm = hyp_vm->host_kvm;
|
||||
|
||||
/* Ensure the VMID is clean before it can be reallocated */
|
||||
__kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
|
||||
remove_vm_table_entry(handle);
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
|
||||
/* Reclaim guest pages (including page-table pages) */
|
||||
mc = &host_kvm->arch.pkvm.teardown_mc;
|
||||
reclaim_guest_pages(hyp_vm, mc);
|
||||
unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus);
|
||||
|
||||
/* Push the metadata pages to the teardown memcache */
|
||||
for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) {
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx];
|
||||
|
||||
teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu));
|
||||
}
|
||||
|
||||
vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus);
|
||||
teardown_donated_memory(mc, hyp_vm, vm_size);
|
||||
hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
|
||||
return 0;
|
||||
|
||||
err_unlock:
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
return err;
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/pkvm.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
unsigned long hyp_nr_cpus;
|
||||
@ -24,6 +25,7 @@ unsigned long hyp_nr_cpus;
|
||||
(unsigned long)__per_cpu_start)
|
||||
|
||||
static void *vmemmap_base;
|
||||
static void *vm_table_base;
|
||||
static void *hyp_pgt_base;
|
||||
static void *host_s2_pgt_base;
|
||||
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
|
||||
@ -31,16 +33,20 @@ static struct hyp_pool hpool;
|
||||
|
||||
static int divide_memory_pool(void *virt, unsigned long size)
|
||||
{
|
||||
unsigned long vstart, vend, nr_pages;
|
||||
unsigned long nr_pages;
|
||||
|
||||
hyp_early_alloc_init(virt, size);
|
||||
|
||||
hyp_vmemmap_range(__hyp_pa(virt), size, &vstart, &vend);
|
||||
nr_pages = (vend - vstart) >> PAGE_SHIFT;
|
||||
nr_pages = hyp_vmemmap_pages(sizeof(struct hyp_page));
|
||||
vmemmap_base = hyp_early_alloc_contig(nr_pages);
|
||||
if (!vmemmap_base)
|
||||
return -ENOMEM;
|
||||
|
||||
nr_pages = hyp_vm_table_pages();
|
||||
vm_table_base = hyp_early_alloc_contig(nr_pages);
|
||||
if (!vm_table_base)
|
||||
return -ENOMEM;
|
||||
|
||||
nr_pages = hyp_s1_pgtable_pages();
|
||||
hyp_pgt_base = hyp_early_alloc_contig(nr_pages);
|
||||
if (!hyp_pgt_base)
|
||||
@ -78,7 +84,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = hyp_back_vmemmap(phys, size, hyp_virt_to_phys(vmemmap_base));
|
||||
ret = hyp_back_vmemmap(hyp_virt_to_phys(vmemmap_base));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -138,20 +144,17 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
||||
}
|
||||
|
||||
/*
|
||||
* Map the host's .bss and .rodata sections RO in the hypervisor, but
|
||||
* transfer the ownership from the host to the hypervisor itself to
|
||||
* make sure it can't be donated or shared with another entity.
|
||||
* Map the host sections RO in the hypervisor, but transfer the
|
||||
* ownership from the host to the hypervisor itself to make sure they
|
||||
* can't be donated or shared with another entity.
|
||||
*
|
||||
* The ownership transition requires matching changes in the host
|
||||
* stage-2. This will be done later (see finalize_host_mappings()) once
|
||||
* the hyp_vmemmap is addressable.
|
||||
*/
|
||||
prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
|
||||
ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
|
||||
ret = pkvm_create_mappings(&kvm_vgic_global_state,
|
||||
&kvm_vgic_global_state + 1, prot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -186,33 +189,20 @@ static void hpool_put_page(void *addr)
|
||||
hyp_put_page(&hpool, addr);
|
||||
}
|
||||
|
||||
static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg)
|
||||
static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = arg;
|
||||
enum kvm_pgtable_prot prot;
|
||||
enum pkvm_page_state state;
|
||||
kvm_pte_t pte = *ptep;
|
||||
phys_addr_t phys;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
if (!kvm_pte_valid(ctx->old))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Fix-up the refcount for the page-table pages as the early allocator
|
||||
* was unable to access the hyp_vmemmap and so the buddy allocator has
|
||||
* initialised the refcount to '1'.
|
||||
*/
|
||||
mm_ops->get_page(ptep);
|
||||
if (flag != KVM_PGTABLE_WALK_LEAF)
|
||||
return 0;
|
||||
|
||||
if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
return -EINVAL;
|
||||
|
||||
phys = kvm_pte_to_phys(pte);
|
||||
phys = kvm_pte_to_phys(ctx->old);
|
||||
if (!addr_is_memory(phys))
|
||||
return -EINVAL;
|
||||
|
||||
@ -220,10 +210,10 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
|
||||
* Adjust the host stage-2 mappings to match the ownership attributes
|
||||
* configured in the hypervisor stage-1.
|
||||
*/
|
||||
state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
|
||||
state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old));
|
||||
switch (state) {
|
||||
case PKVM_PAGE_OWNED:
|
||||
return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id);
|
||||
return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
|
||||
case PKVM_PAGE_SHARED_OWNED:
|
||||
prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
|
||||
break;
|
||||
@ -237,12 +227,25 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
|
||||
return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
|
||||
}
|
||||
|
||||
static int finalize_host_mappings(void)
|
||||
static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit)
|
||||
{
|
||||
/*
|
||||
* Fix-up the refcount for the page-table pages as the early allocator
|
||||
* was unable to access the hyp_vmemmap and so the buddy allocator has
|
||||
* initialised the refcount to '1'.
|
||||
*/
|
||||
if (kvm_pte_valid(ctx->old))
|
||||
ctx->mm_ops->get_page(ctx->ptep);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fix_host_ownership(void)
|
||||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = finalize_host_mappings_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
|
||||
.arg = pkvm_pgtable.mm_ops,
|
||||
.cb = fix_host_ownership_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
};
|
||||
int i, ret;
|
||||
|
||||
@ -258,6 +261,18 @@ static int finalize_host_mappings(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fix_hyp_pgtable_refcnt(void)
|
||||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = fix_hyp_pgtable_refcnt_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
|
||||
.arg = pkvm_pgtable.mm_ops,
|
||||
};
|
||||
|
||||
return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits),
|
||||
&walker);
|
||||
}
|
||||
|
||||
void __noreturn __pkvm_init_finalise(void)
|
||||
{
|
||||
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
|
||||
@ -287,10 +302,19 @@ void __noreturn __pkvm_init_finalise(void)
|
||||
};
|
||||
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
|
||||
|
||||
ret = finalize_host_mappings();
|
||||
ret = fix_host_ownership();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = fix_hyp_pgtable_refcnt();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = hyp_create_pcpu_fixmap();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
pkvm_hyp_vm_table_init(vm_table_base);
|
||||
out:
|
||||
/*
|
||||
* We tail-called to here from handle___pkvm_init() and will not return,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
|
||||
# Makefile for Kernel-based Virtual Machine module, HYP/VHE part
|
||||
#
|
||||
|
||||
asflags-y := -D__KVM_VHE_HYPERVISOR__
|
||||
|
@ -128,6 +128,25 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
|
||||
free_pages_exact(virt, size);
|
||||
}
|
||||
|
||||
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
|
||||
|
||||
static void stage2_free_removed_table_rcu_cb(struct rcu_head *head)
|
||||
{
|
||||
struct page *page = container_of(head, struct page, rcu_head);
|
||||
void *pgtable = page_to_virt(page);
|
||||
u32 level = page_private(page);
|
||||
|
||||
kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level);
|
||||
}
|
||||
|
||||
static void stage2_free_removed_table(void *addr, u32 level)
|
||||
{
|
||||
struct page *page = virt_to_page(addr);
|
||||
|
||||
set_page_private(page, (unsigned long)level);
|
||||
call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb);
|
||||
}
|
||||
|
||||
static void kvm_host_get_page(void *addr)
|
||||
{
|
||||
get_page(virt_to_page(addr));
|
||||
@ -640,8 +659,8 @@ static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
|
||||
static int get_user_mapping_size(struct kvm *kvm, u64 addr)
|
||||
{
|
||||
struct kvm_pgtable pgt = {
|
||||
.pgd = (kvm_pte_t *)kvm->mm->pgd,
|
||||
.ia_bits = VA_BITS,
|
||||
.pgd = (kvm_pteref_t)kvm->mm->pgd,
|
||||
.ia_bits = vabits_actual,
|
||||
.start_level = (KVM_PGTABLE_MAX_LEVELS -
|
||||
CONFIG_PGTABLE_LEVELS),
|
||||
.mm_ops = &kvm_user_mm_ops,
|
||||
@ -662,6 +681,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
|
||||
.zalloc_page = stage2_memcache_zalloc_page,
|
||||
.zalloc_pages_exact = kvm_s2_zalloc_pages_exact,
|
||||
.free_pages_exact = kvm_s2_free_pages_exact,
|
||||
.free_removed_table = stage2_free_removed_table,
|
||||
.get_page = kvm_host_get_page,
|
||||
.put_page = kvm_s2_put_page,
|
||||
.page_count = kvm_host_page_count,
|
||||
@ -675,15 +695,42 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
|
||||
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
|
||||
* @kvm: The pointer to the KVM structure
|
||||
* @mmu: The pointer to the s2 MMU structure
|
||||
* @type: The machine type of the virtual machine
|
||||
*
|
||||
* Allocates only the stage-2 HW PGD level table(s).
|
||||
* Note we don't need locking here as this is only called when the VM is
|
||||
* created, which can only be done once.
|
||||
*/
|
||||
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
|
||||
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
|
||||
{
|
||||
u32 kvm_ipa_limit = get_kvm_ipa_limit();
|
||||
int cpu, err;
|
||||
struct kvm_pgtable *pgt;
|
||||
u64 mmfr0, mmfr1;
|
||||
u32 phys_shift;
|
||||
|
||||
if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
|
||||
if (is_protected_kvm_enabled()) {
|
||||
phys_shift = kvm_ipa_limit;
|
||||
} else if (phys_shift) {
|
||||
if (phys_shift > kvm_ipa_limit ||
|
||||
phys_shift < ARM64_MIN_PARANGE_BITS)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
phys_shift = KVM_PHYS_SHIFT;
|
||||
if (phys_shift > kvm_ipa_limit) {
|
||||
pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
|
||||
current->comm);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
|
||||
kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
|
||||
|
||||
if (mmu->pgt != NULL) {
|
||||
kvm_err("kvm_arch already initialized?\n");
|
||||
@ -807,6 +854,32 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
|
||||
}
|
||||
}
|
||||
|
||||
static void hyp_mc_free_fn(void *addr, void *unused)
|
||||
{
|
||||
free_page((unsigned long)addr);
|
||||
}
|
||||
|
||||
static void *hyp_mc_alloc_fn(void *unused)
|
||||
{
|
||||
return (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
|
||||
}
|
||||
|
||||
void free_hyp_memcache(struct kvm_hyp_memcache *mc)
|
||||
{
|
||||
if (is_protected_kvm_enabled())
|
||||
__free_hyp_memcache(mc, hyp_mc_free_fn,
|
||||
kvm_host_va, NULL);
|
||||
}
|
||||
|
||||
int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
|
||||
{
|
||||
if (!is_protected_kvm_enabled())
|
||||
return 0;
|
||||
|
||||
return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn,
|
||||
kvm_host_pa, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_phys_addr_ioremap - map a device range to guest IPA
|
||||
*
|
||||
@ -841,7 +914,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
|
||||
&cache);
|
||||
&cache, 0);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
if (ret)
|
||||
break;
|
||||
@ -1091,32 +1164,26 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
|
||||
* - mmap_lock protects between a VM faulting a page in and the VMM performing
|
||||
* an mprotect() to add VM_MTE
|
||||
*/
|
||||
static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
|
||||
unsigned long size)
|
||||
static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
|
||||
unsigned long size)
|
||||
{
|
||||
unsigned long i, nr_pages = size >> PAGE_SHIFT;
|
||||
struct page *page;
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (!kvm_has_mte(kvm))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* pfn_to_online_page() is used to reject ZONE_DEVICE pages
|
||||
* that may not support tags.
|
||||
*/
|
||||
page = pfn_to_online_page(pfn);
|
||||
|
||||
if (!page)
|
||||
return -EFAULT;
|
||||
return;
|
||||
|
||||
for (i = 0; i < nr_pages; i++, page++) {
|
||||
if (!test_bit(PG_mte_tagged, &page->flags)) {
|
||||
if (try_page_mte_tagging(page)) {
|
||||
mte_clear_page_tags(page_address(page));
|
||||
set_bit(PG_mte_tagged, &page->flags);
|
||||
set_page_mte_tagged(page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_flags & VM_MTE_ALLOWED;
|
||||
}
|
||||
|
||||
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
@ -1127,7 +1194,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
bool write_fault, writable, force_pte = false;
|
||||
bool exec_fault;
|
||||
bool device = false;
|
||||
bool shared;
|
||||
unsigned long mmu_seq;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
|
||||
@ -1136,7 +1202,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
gfn_t gfn;
|
||||
kvm_pfn_t pfn;
|
||||
bool logging_active = memslot_is_logging(memslot);
|
||||
bool use_read_lock = false;
|
||||
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
|
||||
unsigned long vma_pagesize, fault_granule;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
|
||||
@ -1171,14 +1236,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
if (logging_active) {
|
||||
force_pte = true;
|
||||
vma_shift = PAGE_SHIFT;
|
||||
use_read_lock = (fault_status == FSC_PERM && write_fault &&
|
||||
fault_granule == PAGE_SIZE);
|
||||
} else {
|
||||
vma_shift = get_vma_page_shift(vma, hva);
|
||||
}
|
||||
|
||||
shared = (vma->vm_flags & VM_SHARED);
|
||||
|
||||
switch (vma_shift) {
|
||||
#ifndef __PAGETABLE_PMD_FOLDED
|
||||
case PUD_SHIFT:
|
||||
@ -1271,15 +1332,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
if (exec_fault && device)
|
||||
return -ENOEXEC;
|
||||
|
||||
/*
|
||||
* To reduce MMU contentions and enhance concurrency during dirty
|
||||
* logging dirty logging, only acquire read lock for permission
|
||||
* relaxation.
|
||||
*/
|
||||
if (use_read_lock)
|
||||
read_lock(&kvm->mmu_lock);
|
||||
else
|
||||
write_lock(&kvm->mmu_lock);
|
||||
read_lock(&kvm->mmu_lock);
|
||||
pgt = vcpu->arch.hw_mmu->pgt;
|
||||
if (mmu_invalidate_retry(kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
@ -1298,13 +1351,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
}
|
||||
|
||||
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new VM_SHARED VMA */
|
||||
if (!shared)
|
||||
ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
|
||||
else
|
||||
/* Check the VMM hasn't introduced a new disallowed VMA */
|
||||
if (kvm_vma_mte_allowed(vma)) {
|
||||
sanitise_mte_tags(kvm, pfn, vma_pagesize);
|
||||
} else {
|
||||
ret = -EFAULT;
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
if (writable)
|
||||
@ -1323,15 +1376,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* permissions only if vma_pagesize equals fault_granule. Otherwise,
|
||||
* kvm_pgtable_stage2_map() should be called to change block size.
|
||||
*/
|
||||
if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
|
||||
if (fault_status == FSC_PERM && vma_pagesize == fault_granule)
|
||||
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
|
||||
} else {
|
||||
WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n");
|
||||
|
||||
else
|
||||
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
|
||||
__pfn_to_phys(pfn), prot,
|
||||
memcache);
|
||||
}
|
||||
memcache, KVM_PGTABLE_WALK_SHARED);
|
||||
|
||||
/* Mark the page dirty only if the fault is handled successfully */
|
||||
if (writable && !ret) {
|
||||
@ -1340,10 +1390,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
if (use_read_lock)
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
else
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
kvm_set_pfn_accessed(pfn);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return ret != -EAGAIN ? ret : 0;
|
||||
@ -1526,15 +1573,18 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
kvm_pfn_t pfn = pte_pfn(range->pte);
|
||||
int ret;
|
||||
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
return false;
|
||||
|
||||
WARN_ON(range->end - range->start != 1);
|
||||
|
||||
ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
|
||||
if (ret)
|
||||
/*
|
||||
* If the page isn't tagged, defer to user_mem_abort() for sanitising
|
||||
* the MTE tags. The S2 pte should have been unmapped by
|
||||
* mmu_notifier_invalidate_range_end().
|
||||
*/
|
||||
if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
@ -1549,7 +1599,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
*/
|
||||
kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
|
||||
PAGE_SIZE, __pfn_to_phys(pfn),
|
||||
KVM_PGTABLE_PROT_R, NULL);
|
||||
KVM_PGTABLE_PROT_R, NULL, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -1618,6 +1668,8 @@ static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = {
|
||||
int kvm_mmu_init(u32 *hyp_va_bits)
|
||||
{
|
||||
int err;
|
||||
u32 idmap_bits;
|
||||
u32 kernel_bits;
|
||||
|
||||
hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
|
||||
hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
|
||||
@ -1631,7 +1683,31 @@ int kvm_mmu_init(u32 *hyp_va_bits)
|
||||
*/
|
||||
BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
|
||||
|
||||
*hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
|
||||
/*
|
||||
* The ID map may be configured to use an extended virtual address
|
||||
* range. This is only the case if system RAM is out of range for the
|
||||
* currently configured page size and VA_BITS_MIN, in which case we will
|
||||
* also need the extended virtual range for the HYP ID map, or we won't
|
||||
* be able to enable the EL2 MMU.
|
||||
*
|
||||
* However, in some cases the ID map may be configured for fewer than
|
||||
* the number of VA bits used by the regular kernel stage 1. This
|
||||
* happens when VA_BITS=52 and the kernel image is placed in PA space
|
||||
* below 48 bits.
|
||||
*
|
||||
* At EL2, there is only one TTBR register, and we can't switch between
|
||||
* translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom
|
||||
* line: we need to use the extended range with *both* our translation
|
||||
* tables.
|
||||
*
|
||||
* So use the maximum of the idmap VA bits and the regular kernel stage
|
||||
* 1 VA bits to assure that the hypervisor can both ID map its code page
|
||||
* and map any kernel memory.
|
||||
*/
|
||||
idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
|
||||
kernel_bits = vabits_actual;
|
||||
*hyp_va_bits = max(idmap_bits, kernel_bits);
|
||||
|
||||
kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits);
|
||||
kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
|
||||
kvm_debug("HYP VA range: %lx:%lx\n",
|
||||
@ -1740,12 +1816,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
if (!vma)
|
||||
break;
|
||||
|
||||
/*
|
||||
* VM_SHARED mappings are not allowed with MTE to avoid races
|
||||
* when updating the PG_mte_tagged page flag, see
|
||||
* sanitise_mte_tags for more details.
|
||||
*/
|
||||
if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) {
|
||||
if (kvm_has_mte(kvm) && !kvm_vma_mte_allowed(vma)) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include <asm/kvm_pkvm.h>
|
||||
@ -53,7 +54,7 @@ static int __init register_memblock_regions(void)
|
||||
|
||||
void __init kvm_hyp_reserve(void)
|
||||
{
|
||||
u64 nr_pages, prev, hyp_mem_pages = 0;
|
||||
u64 hyp_mem_pages = 0;
|
||||
int ret;
|
||||
|
||||
if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
|
||||
@ -71,21 +72,8 @@ void __init kvm_hyp_reserve(void)
|
||||
|
||||
hyp_mem_pages += hyp_s1_pgtable_pages();
|
||||
hyp_mem_pages += host_s2_pgtable_pages();
|
||||
|
||||
/*
|
||||
* The hyp_vmemmap needs to be backed by pages, but these pages
|
||||
* themselves need to be present in the vmemmap, so compute the number
|
||||
* of pages needed by looking for a fixed point.
|
||||
*/
|
||||
nr_pages = 0;
|
||||
do {
|
||||
prev = nr_pages;
|
||||
nr_pages = hyp_mem_pages + prev;
|
||||
nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE,
|
||||
PAGE_SIZE);
|
||||
nr_pages += __hyp_pgtable_max_pages(nr_pages);
|
||||
} while (nr_pages != prev);
|
||||
hyp_mem_pages += nr_pages;
|
||||
hyp_mem_pages += hyp_vm_table_pages();
|
||||
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* Try to allocate a PMD-aligned region to reduce TLB pressure once
|
||||
@ -107,3 +95,121 @@ void __init kvm_hyp_reserve(void)
|
||||
kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
|
||||
hyp_mem_base);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocates and donates memory for hypervisor VM structs at EL2.
|
||||
*
|
||||
* Allocates space for the VM state, which includes the hyp vm as well as
|
||||
* the hyp vcpus.
|
||||
*
|
||||
* Stores an opaque handler in the kvm struct for future reference.
|
||||
*
|
||||
* Return 0 on success, negative error code on failure.
|
||||
*/
|
||||
static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
|
||||
{
|
||||
size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
|
||||
struct kvm_vcpu *host_vcpu;
|
||||
pkvm_handle_t handle;
|
||||
void *pgd, *hyp_vm;
|
||||
unsigned long idx;
|
||||
int ret;
|
||||
|
||||
if (host_kvm->created_vcpus < 1)
|
||||
return -EINVAL;
|
||||
|
||||
pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
|
||||
|
||||
/*
|
||||
* The PGD pages will be reclaimed using a hyp_memcache which implies
|
||||
* page granularity. So, use alloc_pages_exact() to get individual
|
||||
* refcounts.
|
||||
*/
|
||||
pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
|
||||
if (!pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Allocate memory to donate to hyp for vm and vcpu pointers. */
|
||||
hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
|
||||
size_mul(sizeof(void *),
|
||||
host_kvm->created_vcpus)));
|
||||
hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
|
||||
if (!hyp_vm) {
|
||||
ret = -ENOMEM;
|
||||
goto free_pgd;
|
||||
}
|
||||
|
||||
/* Donate the VM memory to hyp and let hyp initialize it. */
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
|
||||
if (ret < 0)
|
||||
goto free_vm;
|
||||
|
||||
handle = ret;
|
||||
|
||||
host_kvm->arch.pkvm.handle = handle;
|
||||
|
||||
/* Donate memory for the vcpus at hyp and initialize it. */
|
||||
hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
|
||||
kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
|
||||
void *hyp_vcpu;
|
||||
|
||||
/* Indexing of the vcpus to be sequential starting at 0. */
|
||||
if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
|
||||
ret = -EINVAL;
|
||||
goto destroy_vm;
|
||||
}
|
||||
|
||||
hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
|
||||
if (!hyp_vcpu) {
|
||||
ret = -ENOMEM;
|
||||
goto destroy_vm;
|
||||
}
|
||||
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
|
||||
hyp_vcpu);
|
||||
if (ret) {
|
||||
free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
|
||||
goto destroy_vm;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
destroy_vm:
|
||||
pkvm_destroy_hyp_vm(host_kvm);
|
||||
return ret;
|
||||
free_vm:
|
||||
free_pages_exact(hyp_vm, hyp_vm_sz);
|
||||
free_pgd:
|
||||
free_pages_exact(pgd, pgd_sz);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int pkvm_create_hyp_vm(struct kvm *host_kvm)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&host_kvm->lock);
|
||||
if (!host_kvm->arch.pkvm.handle)
|
||||
ret = __pkvm_create_hyp_vm(host_kvm);
|
||||
mutex_unlock(&host_kvm->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
|
||||
{
|
||||
if (host_kvm->arch.pkvm.handle) {
|
||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
|
||||
host_kvm->arch.pkvm.handle));
|
||||
}
|
||||
|
||||
host_kvm->arch.pkvm.handle = 0;
|
||||
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
|
||||
}
|
||||
|
||||
int pkvm_init_host_vm(struct kvm *host_kvm)
|
||||
{
|
||||
mutex_init(&host_kvm->lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -15,16 +15,25 @@
|
||||
#include <kvm/arm_pmu.h>
|
||||
#include <kvm/arm_vgic.h>
|
||||
|
||||
#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
|
||||
|
||||
static LIST_HEAD(arm_pmus);
|
||||
static DEFINE_MUTEX(arm_pmus_lock);
|
||||
|
||||
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
|
||||
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
|
||||
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
|
||||
static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
|
||||
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
|
||||
|
||||
#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
|
||||
static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
|
||||
{
|
||||
return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]);
|
||||
}
|
||||
|
||||
static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
|
||||
{
|
||||
return &vcpu->arch.pmu.pmc[cnt_idx];
|
||||
}
|
||||
|
||||
static u32 kvm_pmu_event_mask(struct kvm *kvm)
|
||||
{
|
||||
@ -47,113 +56,46 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
|
||||
* @vcpu: The vcpu pointer
|
||||
* @select_idx: The counter index
|
||||
* kvm_pmc_is_64bit - determine if counter is 64bit
|
||||
* @pmc: counter context
|
||||
*/
|
||||
static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc)
|
||||
{
|
||||
return (select_idx == ARMV8_PMU_CYCLE_IDX &&
|
||||
__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
|
||||
return (pmc->idx == ARMV8_PMU_CYCLE_IDX ||
|
||||
kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc)));
|
||||
}
|
||||
|
||||
static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
|
||||
static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_pmu *pmu;
|
||||
struct kvm_vcpu_arch *vcpu_arch;
|
||||
u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0);
|
||||
|
||||
pmc -= pmc->idx;
|
||||
pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
|
||||
vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
|
||||
return container_of(vcpu_arch, struct kvm_vcpu, arch);
|
||||
return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) ||
|
||||
(pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC));
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_pmc_is_chained - determine if the pmc is chained
|
||||
* @pmc: The PMU counter pointer
|
||||
*/
|
||||
static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
|
||||
static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc)
|
||||
{
|
||||
return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX &&
|
||||
!kvm_pmc_has_64bit_overflow(pmc));
|
||||
}
|
||||
|
||||
static u32 counter_index_to_reg(u64 idx)
|
||||
{
|
||||
return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx;
|
||||
}
|
||||
|
||||
static u32 counter_index_to_evtreg(u64 idx)
|
||||
{
|
||||
return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx;
|
||||
}
|
||||
|
||||
static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
||||
u64 counter, reg, enabled, running;
|
||||
|
||||
return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
|
||||
* @select_idx: The counter index
|
||||
*/
|
||||
static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
|
||||
{
|
||||
return select_idx & 0x1;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_get_canonical_pmc - obtain the canonical pmc
|
||||
* @pmc: The PMU counter pointer
|
||||
*
|
||||
* When a pair of PMCs are chained together we use the low counter (canonical)
|
||||
* to hold the underlying perf event.
|
||||
*/
|
||||
static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (kvm_pmu_pmc_is_chained(pmc) &&
|
||||
kvm_pmu_idx_is_high_counter(pmc->idx))
|
||||
return pmc - 1;
|
||||
|
||||
return pmc;
|
||||
}
|
||||
static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (kvm_pmu_idx_is_high_counter(pmc->idx))
|
||||
return pmc - 1;
|
||||
else
|
||||
return pmc + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
|
||||
* @vcpu: The vcpu pointer
|
||||
* @select_idx: The counter index
|
||||
*/
|
||||
static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
{
|
||||
u64 eventsel, reg;
|
||||
|
||||
select_idx |= 0x1;
|
||||
|
||||
if (select_idx == ARMV8_PMU_CYCLE_IDX)
|
||||
return false;
|
||||
|
||||
reg = PMEVTYPER0_EL0 + select_idx;
|
||||
eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
|
||||
|
||||
return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_get_pair_counter_value - get PMU counter value
|
||||
* @vcpu: The vcpu pointer
|
||||
* @pmc: The PMU counter pointer
|
||||
*/
|
||||
static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
|
||||
struct kvm_pmc *pmc)
|
||||
{
|
||||
u64 counter, counter_high, reg, enabled, running;
|
||||
|
||||
if (kvm_pmu_pmc_is_chained(pmc)) {
|
||||
pmc = kvm_pmu_get_canonical_pmc(pmc);
|
||||
reg = PMEVCNTR0_EL0 + pmc->idx;
|
||||
|
||||
counter = __vcpu_sys_reg(vcpu, reg);
|
||||
counter_high = __vcpu_sys_reg(vcpu, reg + 1);
|
||||
|
||||
counter = lower_32_bits(counter) | (counter_high << 32);
|
||||
} else {
|
||||
reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
|
||||
counter = __vcpu_sys_reg(vcpu, reg);
|
||||
}
|
||||
reg = counter_index_to_reg(pmc->idx);
|
||||
counter = __vcpu_sys_reg(vcpu, reg);
|
||||
|
||||
/*
|
||||
* The real counter value is equal to the value of counter register plus
|
||||
@ -163,6 +105,9 @@ static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
|
||||
counter += perf_event_read_value(pmc->perf_event, &enabled,
|
||||
&running);
|
||||
|
||||
if (!kvm_pmc_is_64bit(pmc))
|
||||
counter = lower_32_bits(counter);
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
@ -173,22 +118,37 @@ static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
|
||||
*/
|
||||
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
{
|
||||
u64 counter;
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return 0;
|
||||
|
||||
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
|
||||
return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx));
|
||||
}
|
||||
|
||||
if (kvm_pmu_pmc_is_chained(pmc) &&
|
||||
kvm_pmu_idx_is_high_counter(select_idx))
|
||||
counter = upper_32_bits(counter);
|
||||
else if (select_idx != ARMV8_PMU_CYCLE_IDX)
|
||||
counter = lower_32_bits(counter);
|
||||
static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
||||
u64 reg;
|
||||
|
||||
return counter;
|
||||
kvm_pmu_release_perf_event(pmc);
|
||||
|
||||
reg = counter_index_to_reg(pmc->idx);
|
||||
|
||||
if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX &&
|
||||
!force) {
|
||||
/*
|
||||
* Even with PMUv3p5, AArch32 cannot write to the top
|
||||
* 32bit of the counters. The only possible course of
|
||||
* action is to use PMCR.P, which will reset them to
|
||||
* 0 (the only use of the 'force' parameter).
|
||||
*/
|
||||
val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32);
|
||||
val |= lower_32_bits(val);
|
||||
}
|
||||
|
||||
__vcpu_sys_reg(vcpu, reg) = val;
|
||||
|
||||
/* Recreate the perf event to reflect the updated sample_period */
|
||||
kvm_pmu_create_perf_event(pmc);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -199,17 +159,10 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
*/
|
||||
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
|
||||
{
|
||||
u64 reg;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return;
|
||||
|
||||
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
|
||||
__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
|
||||
|
||||
/* Recreate the perf event to reflect the updated sample_period */
|
||||
kvm_pmu_create_perf_event(vcpu, select_idx);
|
||||
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -218,7 +171,6 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
|
||||
*/
|
||||
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
|
||||
{
|
||||
pmc = kvm_pmu_get_canonical_pmc(pmc);
|
||||
if (pmc->perf_event) {
|
||||
perf_event_disable(pmc->perf_event);
|
||||
perf_event_release_kernel(pmc->perf_event);
|
||||
@ -232,29 +184,20 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
|
||||
*
|
||||
* If this counter has been configured to monitor some event, release it here.
|
||||
*/
|
||||
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
|
||||
static void kvm_pmu_stop_counter(struct kvm_pmc *pmc)
|
||||
{
|
||||
u64 counter, reg, val;
|
||||
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
||||
u64 reg, val;
|
||||
|
||||
pmc = kvm_pmu_get_canonical_pmc(pmc);
|
||||
if (!pmc->perf_event)
|
||||
return;
|
||||
|
||||
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
|
||||
val = kvm_pmu_get_pmc_value(pmc);
|
||||
|
||||
if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
|
||||
reg = PMCCNTR_EL0;
|
||||
val = counter;
|
||||
} else {
|
||||
reg = PMEVCNTR0_EL0 + pmc->idx;
|
||||
val = lower_32_bits(counter);
|
||||
}
|
||||
reg = counter_index_to_reg(pmc->idx);
|
||||
|
||||
__vcpu_sys_reg(vcpu, reg) = val;
|
||||
|
||||
if (kvm_pmu_pmc_is_chained(pmc))
|
||||
__vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
|
||||
|
||||
kvm_pmu_release_perf_event(pmc);
|
||||
}
|
||||
|
||||
@ -280,13 +223,10 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
int i;
|
||||
|
||||
for_each_set_bit(i, &mask, 32)
|
||||
kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
|
||||
|
||||
bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
|
||||
kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -297,10 +237,9 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
|
||||
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
|
||||
kvm_pmu_release_perf_event(&pmu->pmc[i]);
|
||||
kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i));
|
||||
irq_work_sync(&vcpu->arch.pmu.overflow_work);
|
||||
}
|
||||
|
||||
@ -325,9 +264,6 @@ u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
|
||||
void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
int i;
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return;
|
||||
|
||||
@ -335,17 +271,16 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
if (!(val & BIT(i)))
|
||||
continue;
|
||||
|
||||
pmc = &pmu->pmc[i];
|
||||
pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
|
||||
|
||||
/* A change in the enable state may affect the chain state */
|
||||
kvm_pmu_update_pmc_chained(vcpu, i);
|
||||
kvm_pmu_create_perf_event(vcpu, i);
|
||||
|
||||
/* At this point, pmc must be the canonical */
|
||||
if (pmc->perf_event) {
|
||||
if (!pmc->perf_event) {
|
||||
kvm_pmu_create_perf_event(pmc);
|
||||
} else {
|
||||
perf_event_enable(pmc->perf_event);
|
||||
if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
|
||||
kvm_debug("fail to enable perf event\n");
|
||||
@ -363,23 +298,18 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
||||
void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
int i;
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu) || !val)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
|
||||
struct kvm_pmc *pmc;
|
||||
|
||||
if (!(val & BIT(i)))
|
||||
continue;
|
||||
|
||||
pmc = &pmu->pmc[i];
|
||||
pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
|
||||
|
||||
/* A change in the enable state may affect the chain state */
|
||||
kvm_pmu_update_pmc_chained(vcpu, i);
|
||||
kvm_pmu_create_perf_event(vcpu, i);
|
||||
|
||||
/* At this point, pmc must be the canonical */
|
||||
if (pmc->perf_event)
|
||||
perf_event_disable(pmc->perf_event);
|
||||
}
|
||||
@ -476,14 +406,69 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
|
||||
static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_pmu *pmu;
|
||||
|
||||
pmu = container_of(work, struct kvm_pmu, overflow_work);
|
||||
vcpu = kvm_pmc_to_vcpu(pmu->pmc);
|
||||
|
||||
vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform an increment on any of the counters described in @mask,
|
||||
* generating the overflow if required, and propagate it as a chained
|
||||
* event if possible.
|
||||
*/
|
||||
static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
|
||||
unsigned long mask, u32 event)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
|
||||
return;
|
||||
|
||||
/* Weed out disabled counters */
|
||||
mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
|
||||
|
||||
for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) {
|
||||
struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
|
||||
u64 type, reg;
|
||||
|
||||
/* Filter on event type */
|
||||
type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i));
|
||||
type &= kvm_pmu_event_mask(vcpu->kvm);
|
||||
if (type != event)
|
||||
continue;
|
||||
|
||||
/* Increment this counter */
|
||||
reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1;
|
||||
if (!kvm_pmc_is_64bit(pmc))
|
||||
reg = lower_32_bits(reg);
|
||||
__vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg;
|
||||
|
||||
/* No overflow? move on */
|
||||
if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg))
|
||||
continue;
|
||||
|
||||
/* Mark overflow */
|
||||
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
|
||||
|
||||
if (kvm_pmu_counter_can_chain(pmc))
|
||||
kvm_pmu_counter_increment(vcpu, BIT(i + 1),
|
||||
ARMV8_PMUV3_PERFCTR_CHAIN);
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the sample period for a given counter value */
|
||||
static u64 compute_period(struct kvm_pmc *pmc, u64 counter)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc))
|
||||
val = (-counter) & GENMASK(63, 0);
|
||||
else
|
||||
val = (-counter) & GENMASK(31, 0);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
/**
|
||||
* When the perf event overflows, set the overflow status and inform the vcpu.
|
||||
*/
|
||||
@ -503,10 +488,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
|
||||
* Reset the sample period to the architectural limit,
|
||||
* i.e. the point where the counter overflows.
|
||||
*/
|
||||
period = -(local64_read(&perf_event->count));
|
||||
|
||||
if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
|
||||
period &= GENMASK(31, 0);
|
||||
period = compute_period(pmc, local64_read(&perf_event->count));
|
||||
|
||||
local64_set(&perf_event->hw.period_left, 0);
|
||||
perf_event->attr.sample_period = period;
|
||||
@ -514,6 +496,10 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
|
||||
|
||||
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
|
||||
|
||||
if (kvm_pmu_counter_can_chain(pmc))
|
||||
kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
|
||||
ARMV8_PMUV3_PERFCTR_CHAIN);
|
||||
|
||||
if (kvm_pmu_overflow_status(vcpu)) {
|
||||
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
|
||||
|
||||
@ -533,50 +519,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
|
||||
*/
|
||||
void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
int i;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return;
|
||||
|
||||
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
|
||||
return;
|
||||
|
||||
/* Weed out disabled counters */
|
||||
val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
|
||||
|
||||
for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
|
||||
u64 type, reg;
|
||||
|
||||
if (!(val & BIT(i)))
|
||||
continue;
|
||||
|
||||
/* PMSWINC only applies to ... SW_INC! */
|
||||
type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
|
||||
type &= kvm_pmu_event_mask(vcpu->kvm);
|
||||
if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
|
||||
continue;
|
||||
|
||||
/* increment this even SW_INC counter */
|
||||
reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
|
||||
reg = lower_32_bits(reg);
|
||||
__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
|
||||
|
||||
if (reg) /* no overflow on the low part */
|
||||
continue;
|
||||
|
||||
if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
|
||||
/* increment the high counter */
|
||||
reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
|
||||
reg = lower_32_bits(reg);
|
||||
__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
|
||||
if (!reg) /* mark overflow on the high counter */
|
||||
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
|
||||
} else {
|
||||
/* mark overflow on low counter */
|
||||
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
|
||||
}
|
||||
}
|
||||
kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -591,6 +534,12 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
return;
|
||||
|
||||
/* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */
|
||||
if (!kvm_pmu_is_3p5(vcpu))
|
||||
val &= ~ARMV8_PMU_PMCR_LP;
|
||||
|
||||
__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
|
||||
|
||||
if (val & ARMV8_PMU_PMCR_E) {
|
||||
kvm_pmu_enable_counter_mask(vcpu,
|
||||
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
|
||||
@ -606,49 +555,44 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
||||
unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
|
||||
mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
|
||||
for_each_set_bit(i, &mask, 32)
|
||||
kvm_pmu_set_counter_value(vcpu, i, 0);
|
||||
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
|
||||
}
|
||||
}
|
||||
|
||||
static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
||||
return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
|
||||
(__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
|
||||
(__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx));
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_create_perf_event - create a perf event for a counter
|
||||
* @vcpu: The vcpu pointer
|
||||
* @select_idx: The number of selected counter
|
||||
* @pmc: Counter context
|
||||
*/
|
||||
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
||||
struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc;
|
||||
struct perf_event *event;
|
||||
struct perf_event_attr attr;
|
||||
u64 eventsel, counter, reg, data;
|
||||
u64 eventsel, reg, data;
|
||||
|
||||
/*
|
||||
* For chained counters the event type and filtering attributes are
|
||||
* obtained from the low/even counter. We also use this counter to
|
||||
* determine if the event is enabled/disabled.
|
||||
*/
|
||||
pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
|
||||
|
||||
reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
|
||||
reg = counter_index_to_evtreg(pmc->idx);
|
||||
data = __vcpu_sys_reg(vcpu, reg);
|
||||
|
||||
kvm_pmu_stop_counter(vcpu, pmc);
|
||||
kvm_pmu_stop_counter(pmc);
|
||||
if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
|
||||
eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
|
||||
else
|
||||
eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
|
||||
|
||||
/* Software increment event doesn't need to be backed by a perf event */
|
||||
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
|
||||
/*
|
||||
* Neither SW increment nor chained events need to be backed
|
||||
* by a perf event.
|
||||
*/
|
||||
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR ||
|
||||
eventsel == ARMV8_PMUV3_PERFCTR_CHAIN)
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -663,37 +607,25 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
attr.type = arm_pmu->pmu.type;
|
||||
attr.size = sizeof(attr);
|
||||
attr.pinned = 1;
|
||||
attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
|
||||
attr.disabled = !kvm_pmu_counter_is_enabled(pmc);
|
||||
attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
|
||||
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
|
||||
attr.exclude_hv = 1; /* Don't count EL2 events */
|
||||
attr.exclude_host = 1; /* Don't count host events */
|
||||
attr.config = eventsel;
|
||||
|
||||
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
|
||||
/*
|
||||
* If counting with a 64bit counter, advertise it to the perf
|
||||
* code, carefully dealing with the initial sample period
|
||||
* which also depends on the overflow.
|
||||
*/
|
||||
if (kvm_pmc_is_64bit(pmc))
|
||||
attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT;
|
||||
|
||||
if (kvm_pmu_pmc_is_chained(pmc)) {
|
||||
/**
|
||||
* The initial sample period (overflow count) of an event. For
|
||||
* chained counters we only support overflow interrupts on the
|
||||
* high counter.
|
||||
*/
|
||||
attr.sample_period = (-counter) & GENMASK(63, 0);
|
||||
attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
|
||||
attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc));
|
||||
|
||||
event = perf_event_create_kernel_counter(&attr, -1, current,
|
||||
kvm_pmu_perf_overflow,
|
||||
pmc + 1);
|
||||
} else {
|
||||
/* The initial sample period (overflow count) of an event. */
|
||||
if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
|
||||
attr.sample_period = (-counter) & GENMASK(63, 0);
|
||||
else
|
||||
attr.sample_period = (-counter) & GENMASK(31, 0);
|
||||
|
||||
event = perf_event_create_kernel_counter(&attr, -1, current,
|
||||
event = perf_event_create_kernel_counter(&attr, -1, current,
|
||||
kvm_pmu_perf_overflow, pmc);
|
||||
}
|
||||
|
||||
if (IS_ERR(event)) {
|
||||
pr_err_once("kvm: pmu event creation failed %ld\n",
|
||||
@ -704,41 +636,6 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
pmc->perf_event = event;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_update_pmc_chained - update chained bitmap
|
||||
* @vcpu: The vcpu pointer
|
||||
* @select_idx: The number of selected counter
|
||||
*
|
||||
* Update the chained bitmap based on the event type written in the
|
||||
* typer register and the enable state of the odd register.
|
||||
*/
|
||||
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
{
|
||||
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
||||
struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
|
||||
bool new_state, old_state;
|
||||
|
||||
old_state = kvm_pmu_pmc_is_chained(pmc);
|
||||
new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
|
||||
kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
|
||||
|
||||
if (old_state == new_state)
|
||||
return;
|
||||
|
||||
canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
|
||||
kvm_pmu_stop_counter(vcpu, canonical_pmc);
|
||||
if (new_state) {
|
||||
/*
|
||||
* During promotion from !chained to chained we must ensure
|
||||
* the adjacent counter is stopped and its event destroyed
|
||||
*/
|
||||
kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
|
||||
set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
|
||||
return;
|
||||
}
|
||||
clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_pmu_set_counter_event_type - set selected counter to monitor some event
|
||||
* @vcpu: The vcpu pointer
|
||||
@ -752,6 +649,7 @@ static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
|
||||
void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
|
||||
u64 select_idx)
|
||||
{
|
||||
struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx);
|
||||
u64 reg, mask;
|
||||
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
@ -761,20 +659,19 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
|
||||
mask &= ~ARMV8_PMU_EVTYPE_EVENT;
|
||||
mask |= kvm_pmu_event_mask(vcpu->kvm);
|
||||
|
||||
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
|
||||
? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
|
||||
reg = counter_index_to_evtreg(pmc->idx);
|
||||
|
||||
__vcpu_sys_reg(vcpu, reg) = data & mask;
|
||||
|
||||
kvm_pmu_update_pmc_chained(vcpu, select_idx);
|
||||
kvm_pmu_create_perf_event(vcpu, select_idx);
|
||||
kvm_pmu_create_perf_event(pmc);
|
||||
}
|
||||
|
||||
void kvm_host_pmu_init(struct arm_pmu *pmu)
|
||||
{
|
||||
struct arm_pmu_entry *entry;
|
||||
|
||||
if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
|
||||
if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
|
||||
pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
|
||||
return;
|
||||
|
||||
mutex_lock(&arm_pmus_lock);
|
||||
@ -827,7 +724,7 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void)
|
||||
|
||||
if (event->pmu) {
|
||||
pmu = to_arm_pmu(event->pmu);
|
||||
if (pmu->pmuver == 0 ||
|
||||
if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
|
||||
pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
|
||||
pmu = NULL;
|
||||
}
|
||||
@ -849,6 +746,8 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
|
||||
|
||||
if (!pmceid1) {
|
||||
val = read_sysreg(pmceid0_el0);
|
||||
/* always support CHAIN */
|
||||
val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
|
||||
base = 0;
|
||||
} else {
|
||||
val = read_sysreg(pmceid1_el0);
|
||||
@ -1150,3 +1049,14 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
u8 kvm_arm_pmu_get_pmuver_limit(void)
|
||||
{
|
||||
u64 tmp;
|
||||
|
||||
tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
|
||||
tmp = cpuid_feature_cap_perfmon_field(tmp,
|
||||
ID_AA64DFR0_EL1_PMUVer_SHIFT,
|
||||
ID_AA64DFR0_EL1_PMUVer_V3P5);
|
||||
return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp);
|
||||
}
|
||||
|
@ -395,32 +395,3 @@ int kvm_set_ipa_limit(void)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
u64 mmfr0, mmfr1;
|
||||
u32 phys_shift;
|
||||
|
||||
if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
|
||||
if (phys_shift) {
|
||||
if (phys_shift > kvm_ipa_limit ||
|
||||
phys_shift < ARM64_MIN_PARANGE_BITS)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
phys_shift = KVM_PHYS_SHIFT;
|
||||
if (phys_shift > kvm_ipa_limit) {
|
||||
pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
|
||||
current->comm);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
|
||||
kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -639,22 +639,18 @@ static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
|
||||
static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 pmcr, val;
|
||||
u64 pmcr;
|
||||
|
||||
/* No PMU available, PMCR_EL0 may UNDEF... */
|
||||
if (!kvm_arm_support_pmu_v3())
|
||||
return;
|
||||
|
||||
pmcr = read_sysreg(pmcr_el0);
|
||||
/*
|
||||
* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN
|
||||
* except PMCR.E resetting to zero.
|
||||
*/
|
||||
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
|
||||
| (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
|
||||
/* Only preserve PMCR_EL0.N, and reset the rest to 0 */
|
||||
pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK;
|
||||
if (!kvm_supports_32bit_el0())
|
||||
val |= ARMV8_PMU_PMCR_LC;
|
||||
__vcpu_sys_reg(vcpu, r->reg) = val;
|
||||
pmcr |= ARMV8_PMU_PMCR_LC;
|
||||
|
||||
__vcpu_sys_reg(vcpu, r->reg) = pmcr;
|
||||
}
|
||||
|
||||
static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
|
||||
@ -697,13 +693,15 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
return false;
|
||||
|
||||
if (p->is_write) {
|
||||
/* Only update writeable bits of PMCR */
|
||||
/*
|
||||
* Only update writeable bits of PMCR (continuing into
|
||||
* kvm_pmu_handle_pmcr() as well)
|
||||
*/
|
||||
val = __vcpu_sys_reg(vcpu, PMCR_EL0);
|
||||
val &= ~ARMV8_PMU_PMCR_MASK;
|
||||
val |= p->regval & ARMV8_PMU_PMCR_MASK;
|
||||
if (!kvm_supports_32bit_el0())
|
||||
val |= ARMV8_PMU_PMCR_LC;
|
||||
__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
|
||||
kvm_pmu_handle_pmcr(vcpu, val);
|
||||
kvm_vcpu_pmu_restore_guest(vcpu);
|
||||
} else {
|
||||
@ -1062,6 +1060,40 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
|
||||
return true;
|
||||
}
|
||||
|
||||
static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_vcpu_has_pmu(vcpu))
|
||||
return vcpu->kvm->arch.dfr0_pmuver.imp;
|
||||
|
||||
return vcpu->kvm->arch.dfr0_pmuver.unimp;
|
||||
}
|
||||
|
||||
static u8 perfmon_to_pmuver(u8 perfmon)
|
||||
{
|
||||
switch (perfmon) {
|
||||
case ID_DFR0_EL1_PerfMon_PMUv3:
|
||||
return ID_AA64DFR0_EL1_PMUVer_IMP;
|
||||
case ID_DFR0_EL1_PerfMon_IMPDEF:
|
||||
return ID_AA64DFR0_EL1_PMUVer_IMP_DEF;
|
||||
default:
|
||||
/* Anything ARMv8.1+ and NI have the same value. For now. */
|
||||
return perfmon;
|
||||
}
|
||||
}
|
||||
|
||||
static u8 pmuver_to_perfmon(u8 pmuver)
|
||||
{
|
||||
switch (pmuver) {
|
||||
case ID_AA64DFR0_EL1_PMUVer_IMP:
|
||||
return ID_DFR0_EL1_PerfMon_PMUv3;
|
||||
case ID_AA64DFR0_EL1_PMUVer_IMP_DEF:
|
||||
return ID_DFR0_EL1_PerfMon_IMPDEF;
|
||||
default:
|
||||
/* Anything ARMv8.1+ and NI have the same value. For now. */
|
||||
return pmuver;
|
||||
}
|
||||
}
|
||||
|
||||
/* Read a sanitised cpufeature ID register by sys_reg_desc */
|
||||
static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r)
|
||||
{
|
||||
@ -1111,18 +1143,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
|
||||
/* Limit debug to ARMv8.0 */
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6);
|
||||
/* Limit guests to PMUv3 for ARMv8.4 */
|
||||
val = cpuid_feature_cap_perfmon_field(val,
|
||||
ID_AA64DFR0_EL1_PMUVer_SHIFT,
|
||||
kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_V3P4 : 0);
|
||||
/* Set PMUver to the required version */
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer),
|
||||
vcpu_pmuver(vcpu));
|
||||
/* Hide SPE from guests */
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer);
|
||||
break;
|
||||
case SYS_ID_DFR0_EL1:
|
||||
/* Limit guests to PMUv3 for ARMv8.4 */
|
||||
val = cpuid_feature_cap_perfmon_field(val,
|
||||
ID_DFR0_PERFMON_SHIFT,
|
||||
kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_PERFMON_8_4 : 0);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon),
|
||||
pmuver_to_perfmon(vcpu_pmuver(vcpu)));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1222,6 +1253,85 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd,
|
||||
u64 val)
|
||||
{
|
||||
u8 pmuver, host_pmuver;
|
||||
bool valid_pmu;
|
||||
|
||||
host_pmuver = kvm_arm_pmu_get_pmuver_limit();
|
||||
|
||||
/*
|
||||
* Allow AA64DFR0_EL1.PMUver to be set from userspace as long
|
||||
* as it doesn't promise more than what the HW gives us. We
|
||||
* allow an IMPDEF PMU though, only if no PMU is supported
|
||||
* (KVM backward compatibility handling).
|
||||
*/
|
||||
pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), val);
|
||||
if ((pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver > host_pmuver))
|
||||
return -EINVAL;
|
||||
|
||||
valid_pmu = (pmuver != 0 && pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF);
|
||||
|
||||
/* Make sure view register and PMU support do match */
|
||||
if (kvm_vcpu_has_pmu(vcpu) != valid_pmu)
|
||||
return -EINVAL;
|
||||
|
||||
/* We can only differ with PMUver, and anything else is an error */
|
||||
val ^= read_id_reg(vcpu, rd);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer);
|
||||
if (val)
|
||||
return -EINVAL;
|
||||
|
||||
if (valid_pmu)
|
||||
vcpu->kvm->arch.dfr0_pmuver.imp = pmuver;
|
||||
else
|
||||
vcpu->kvm->arch.dfr0_pmuver.unimp = pmuver;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_id_dfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd,
|
||||
u64 val)
|
||||
{
|
||||
u8 perfmon, host_perfmon;
|
||||
bool valid_pmu;
|
||||
|
||||
host_perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit());
|
||||
|
||||
/*
|
||||
* Allow DFR0_EL1.PerfMon to be set from userspace as long as
|
||||
* it doesn't promise more than what the HW gives us on the
|
||||
* AArch64 side (as everything is emulated with that), and
|
||||
* that this is a PMUv3.
|
||||
*/
|
||||
perfmon = FIELD_GET(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), val);
|
||||
if ((perfmon != ID_DFR0_EL1_PerfMon_IMPDEF && perfmon > host_perfmon) ||
|
||||
(perfmon != 0 && perfmon < ID_DFR0_EL1_PerfMon_PMUv3))
|
||||
return -EINVAL;
|
||||
|
||||
valid_pmu = (perfmon != 0 && perfmon != ID_DFR0_EL1_PerfMon_IMPDEF);
|
||||
|
||||
/* Make sure view register and PMU support do match */
|
||||
if (kvm_vcpu_has_pmu(vcpu) != valid_pmu)
|
||||
return -EINVAL;
|
||||
|
||||
/* We can only differ with PerfMon, and anything else is an error */
|
||||
val ^= read_id_reg(vcpu, rd);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon);
|
||||
if (val)
|
||||
return -EINVAL;
|
||||
|
||||
if (valid_pmu)
|
||||
vcpu->kvm->arch.dfr0_pmuver.imp = perfmon_to_pmuver(perfmon);
|
||||
else
|
||||
vcpu->kvm->arch.dfr0_pmuver.unimp = perfmon_to_pmuver(perfmon);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* cpufeature ID register user accessors
|
||||
*
|
||||
@ -1443,7 +1553,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
/* CRm=1 */
|
||||
AA32_ID_SANITISED(ID_PFR0_EL1),
|
||||
AA32_ID_SANITISED(ID_PFR1_EL1),
|
||||
AA32_ID_SANITISED(ID_DFR0_EL1),
|
||||
{ SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg,
|
||||
.get_user = get_id_reg, .set_user = set_id_dfr0_el1,
|
||||
.visibility = aa32_id_visibility, },
|
||||
ID_HIDDEN(ID_AFR0_EL1),
|
||||
AA32_ID_SANITISED(ID_MMFR0_EL1),
|
||||
AA32_ID_SANITISED(ID_MMFR1_EL1),
|
||||
@ -1483,7 +1595,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
ID_UNALLOCATED(4,7),
|
||||
|
||||
/* CRm=5 */
|
||||
ID_SANITISED(ID_AA64DFR0_EL1),
|
||||
{ SYS_DESC(SYS_ID_AA64DFR0_EL1), .access = access_id_reg,
|
||||
.get_user = get_id_reg, .set_user = set_id_aa64dfr0_el1, },
|
||||
ID_SANITISED(ID_AA64DFR1_EL1),
|
||||
ID_UNALLOCATED(5,2),
|
||||
ID_UNALLOCATED(5,3),
|
||||
|
@ -2743,6 +2743,7 @@ static int vgic_its_has_attr(struct kvm_device *dev,
|
||||
static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
|
||||
{
|
||||
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
int ret = 0;
|
||||
|
||||
if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */
|
||||
@ -2762,7 +2763,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
|
||||
vgic_its_reset(kvm, its);
|
||||
break;
|
||||
case KVM_DEV_ARM_ITS_SAVE_TABLES:
|
||||
dist->save_its_tables_in_progress = true;
|
||||
ret = abi->save_tables(its);
|
||||
dist->save_its_tables_in_progress = false;
|
||||
break;
|
||||
case KVM_DEV_ARM_ITS_RESTORE_TABLES:
|
||||
ret = abi->restore_tables(its);
|
||||
@ -2775,6 +2778,23 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_arch_allow_write_without_running_vcpu - allow writing guest memory
|
||||
* without the running VCPU when dirty ring is enabled.
|
||||
*
|
||||
* The running VCPU is required to track dirty guest pages when dirty ring
|
||||
* is enabled. Otherwise, the backup bitmap should be used to track the
|
||||
* dirty guest pages. When vgic/its tables are being saved, the backup
|
||||
* bitmap is used to track the dirty guest pages due to the missed running
|
||||
* VCPU in the period.
|
||||
*/
|
||||
bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
|
||||
return dist->save_its_tables_in_progress;
|
||||
}
|
||||
|
||||
static int vgic_its_set_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
|
@ -18,7 +18,7 @@
|
||||
*/
|
||||
.macro multitag_transfer_size, reg, tmp
|
||||
mrs_s \reg, SYS_GMID_EL1
|
||||
ubfx \reg, \reg, #GMID_EL1_BS_SHIFT, #GMID_EL1_BS_SIZE
|
||||
ubfx \reg, \reg, #GMID_EL1_BS_SHIFT, #GMID_EL1_BS_WIDTH
|
||||
mov \tmp, #4
|
||||
lsl \reg, \tmp, \reg
|
||||
.endm
|
||||
|
@ -21,9 +21,12 @@ void copy_highpage(struct page *to, struct page *from)
|
||||
|
||||
copy_page(kto, kfrom);
|
||||
|
||||
if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) {
|
||||
set_bit(PG_mte_tagged, &to->flags);
|
||||
if (system_supports_mte() && page_mte_tagged(from)) {
|
||||
page_kasan_tag_reset(to);
|
||||
/* It's a new page, shouldn't have been tagged yet */
|
||||
WARN_ON_ONCE(!try_page_mte_tagging(to));
|
||||
mte_copy_page_tags(kto, kfrom);
|
||||
set_page_mte_tagged(to);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(copy_highpage);
|
||||
|
@ -937,6 +937,8 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
|
||||
|
||||
void tag_clear_highpage(struct page *page)
|
||||
{
|
||||
/* Newly allocated page, shouldn't have been tagged yet */
|
||||
WARN_ON_ONCE(!try_page_mte_tagging(page));
|
||||
mte_zero_clear_page_tags(page_address(page));
|
||||
set_bit(PG_mte_tagged, &page->flags);
|
||||
set_page_mte_tagged(page);
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ int mte_save_tags(struct page *page)
|
||||
{
|
||||
void *tag_storage, *ret;
|
||||
|
||||
if (!test_bit(PG_mte_tagged, &page->flags))
|
||||
if (!page_mte_tagged(page))
|
||||
return 0;
|
||||
|
||||
tag_storage = mte_allocate_tag_storage();
|
||||
@ -46,21 +46,17 @@ int mte_save_tags(struct page *page)
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool mte_restore_tags(swp_entry_t entry, struct page *page)
|
||||
void mte_restore_tags(swp_entry_t entry, struct page *page)
|
||||
{
|
||||
void *tags = xa_load(&mte_pages, entry.val);
|
||||
|
||||
if (!tags)
|
||||
return false;
|
||||
return;
|
||||
|
||||
/*
|
||||
* Test PG_mte_tagged again in case it was racing with another
|
||||
* set_pte_at().
|
||||
*/
|
||||
if (!test_and_set_bit(PG_mte_tagged, &page->flags))
|
||||
if (try_page_mte_tagging(page)) {
|
||||
mte_restore_page_tags(page_address(page), tags);
|
||||
|
||||
return true;
|
||||
set_page_mte_tagged(page);
|
||||
}
|
||||
}
|
||||
|
||||
void mte_invalidate_tags(int type, pgoff_t offset)
|
||||
|
@ -33,7 +33,7 @@ function expect_fields(nf) {
|
||||
# Print a CPP macro definition, padded with spaces so that the macro bodies
|
||||
# line up in a column
|
||||
function define(name, val) {
|
||||
printf "%-48s%s\n", "#define " name, val
|
||||
printf "%-56s%s\n", "#define " name, val
|
||||
}
|
||||
|
||||
# Print standard BITMASK/SHIFT/WIDTH CPP definitions for a field
|
||||
|
@ -46,6 +46,760 @@
|
||||
# feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration
|
||||
# item ACCDATA) though it may be more taseful to do something else.
|
||||
|
||||
Sysreg ID_PFR0_EL1 3 0 0 1 0
|
||||
Res0 63:32
|
||||
Enum 31:28 RAS
|
||||
0b0000 NI
|
||||
0b0001 RAS
|
||||
0b0010 RASv1p1
|
||||
EndEnum
|
||||
Enum 27:24 DIT
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 23:20 AMU
|
||||
0b0000 NI
|
||||
0b0001 AMUv1
|
||||
0b0010 AMUv1p1
|
||||
EndEnum
|
||||
Enum 19:16 CSV2
|
||||
0b0000 UNDISCLOSED
|
||||
0b0001 IMP
|
||||
0b0010 CSV2p1
|
||||
EndEnum
|
||||
Enum 15:12 State3
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 11:8 State2
|
||||
0b0000 NI
|
||||
0b0001 NO_CV
|
||||
0b0010 CV
|
||||
EndEnum
|
||||
Enum 7:4 State1
|
||||
0b0000 NI
|
||||
0b0001 THUMB
|
||||
0b0010 THUMB2
|
||||
EndEnum
|
||||
Enum 3:0 State0
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_PFR1_EL1 3 0 0 1 1
|
||||
Res0 63:32
|
||||
Enum 31:28 GIC
|
||||
0b0000 NI
|
||||
0b0001 GICv3
|
||||
0b0010 GICv4p1
|
||||
EndEnum
|
||||
Enum 27:24 Virt_frac
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 23:20 Sec_frac
|
||||
0b0000 NI
|
||||
0b0001 WALK_DISABLE
|
||||
0b0010 SECURE_MEMORY
|
||||
EndEnum
|
||||
Enum 19:16 GenTimer
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
0b0010 ECV
|
||||
EndEnum
|
||||
Enum 15:12 Virtualization
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 11:8 MProgMod
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 7:4 Security
|
||||
0b0000 NI
|
||||
0b0001 EL3
|
||||
0b0001 NSACR_RFR
|
||||
EndEnum
|
||||
Enum 3:0 ProgMod
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_DFR0_EL1 3 0 0 1 2
|
||||
Res0 63:32
|
||||
Enum 31:28 TraceFilt
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 27:24 PerfMon
|
||||
0b0000 NI
|
||||
0b0001 PMUv1
|
||||
0b0010 PMUv2
|
||||
0b0011 PMUv3
|
||||
0b0100 PMUv3p1
|
||||
0b0101 PMUv3p4
|
||||
0b0110 PMUv3p5
|
||||
0b0111 PMUv3p7
|
||||
0b1000 PMUv3p8
|
||||
0b1111 IMPDEF
|
||||
EndEnum
|
||||
Enum 23:20 MProfDbg
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 19:16 MMapTrc
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 15:12 CopTrc
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 11:8 MMapDbg
|
||||
0b0000 NI
|
||||
0b0100 Armv7
|
||||
0b0101 Armv7p1
|
||||
EndEnum
|
||||
Field 7:4 CopSDbg
|
||||
Enum 3:0 CopDbg
|
||||
0b0000 NI
|
||||
0b0010 Armv6
|
||||
0b0011 Armv6p1
|
||||
0b0100 Armv7
|
||||
0b0101 Armv7p1
|
||||
0b0110 Armv8
|
||||
0b0111 VHE
|
||||
0b1000 Debugv8p2
|
||||
0b1001 Debugv8p4
|
||||
0b1010 Debugv8p8
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_AFR0_EL1 3 0 0 1 3
|
||||
Res0 63:16
|
||||
Field 15:12 IMPDEF3
|
||||
Field 11:8 IMPDEF2
|
||||
Field 7:4 IMPDEF1
|
||||
Field 3:0 IMPDEF0
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_MMFR0_EL1 3 0 0 1 4
|
||||
Res0 63:32
|
||||
Enum 31:28 InnerShr
|
||||
0b0000 NC
|
||||
0b0001 HW
|
||||
0b1111 IGNORED
|
||||
EndEnum
|
||||
Enum 27:24 FCSE
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 23:20 AuxReg
|
||||
0b0000 NI
|
||||
0b0001 ACTLR
|
||||
0b0010 AIFSR
|
||||
EndEnum
|
||||
Enum 19:16 TCM
|
||||
0b0000 NI
|
||||
0b0001 IMPDEF
|
||||
0b0010 TCM
|
||||
0b0011 TCM_DMA
|
||||
EndEnum
|
||||
Enum 15:12 ShareLvl
|
||||
0b0000 ONE
|
||||
0b0001 TWO
|
||||
EndEnum
|
||||
Enum 11:8 OuterShr
|
||||
0b0000 NC
|
||||
0b0001 HW
|
||||
0b1111 IGNORED
|
||||
EndEnum
|
||||
Enum 7:4 PMSA
|
||||
0b0000 NI
|
||||
0b0001 IMPDEF
|
||||
0b0010 PMSAv6
|
||||
0b0011 PMSAv7
|
||||
EndEnum
|
||||
Enum 3:0 VMSA
|
||||
0b0000 NI
|
||||
0b0001 IMPDEF
|
||||
0b0010 VMSAv6
|
||||
0b0011 VMSAv7
|
||||
0b0100 VMSAv7_PXN
|
||||
0b0101 VMSAv7_LONG
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_MMFR1_EL1 3 0 0 1 5
|
||||
Res0 63:32
|
||||
Enum 31:28 BPred
|
||||
0b0000 NI
|
||||
0b0001 BP_SW_MANGED
|
||||
0b0010 BP_ASID_AWARE
|
||||
0b0011 BP_NOSNOOP
|
||||
0b0100 BP_INVISIBLE
|
||||
EndEnum
|
||||
Enum 27:24 L1TstCln
|
||||
0b0000 NI
|
||||
0b0001 NOINVALIDATE
|
||||
0b0010 INVALIDATE
|
||||
EndEnum
|
||||
Enum 23:20 L1Uni
|
||||
0b0000 NI
|
||||
0b0001 INVALIDATE
|
||||
0b0010 CLEAN_AND_INVALIDATE
|
||||
EndEnum
|
||||
Enum 19:16 L1Hvd
|
||||
0b0000 NI
|
||||
0b0001 INVALIDATE_ISIDE_ONLY
|
||||
0b0010 INVALIDATE
|
||||
0b0011 CLEAN_AND_INVALIDATE
|
||||
EndEnum
|
||||
Enum 15:12 L1UniSW
|
||||
0b0000 NI
|
||||
0b0001 CLEAN
|
||||
0b0010 CLEAN_AND_INVALIDATE
|
||||
0b0011 INVALIDATE
|
||||
EndEnum
|
||||
Enum 11:8 L1HvdSW
|
||||
0b0000 NI
|
||||
0b0001 CLEAN_AND_INVALIDATE
|
||||
0b0010 INVALIDATE_DSIDE_ONLY
|
||||
0b0011 INVALIDATE
|
||||
EndEnum
|
||||
Enum 7:4 L1UniVA
|
||||
0b0000 NI
|
||||
0b0001 CLEAN_AND_INVALIDATE
|
||||
0b0010 INVALIDATE_BP
|
||||
EndEnum
|
||||
Enum 3:0 L1HvdVA
|
||||
0b0000 NI
|
||||
0b0001 CLEAN_AND_INVALIDATE
|
||||
0b0010 INVALIDATE_BP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_MMFR2_EL1 3 0 0 1 6
|
||||
Res0 63:32
|
||||
Enum 31:28 HWAccFlg
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 27:24 WFIStall
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 23:20 MemBarr
|
||||
0b0000 NI
|
||||
0b0001 DSB_ONLY
|
||||
0b0010 IMP
|
||||
EndEnum
|
||||
Enum 19:16 UniTLB
|
||||
0b0000 NI
|
||||
0b0001 BY_VA
|
||||
0b0010 BY_MATCH_ASID
|
||||
0b0011 BY_ALL_ASID
|
||||
0b0100 OTHER_TLBS
|
||||
0b0101 BROADCAST
|
||||
0b0110 BY_IPA
|
||||
EndEnum
|
||||
Enum 15:12 HvdTLB
|
||||
0b0000 NI
|
||||
EndEnum
|
||||
Enum 11:8 L1HvdRng
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 7:4 L1HvdBG
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 3:0 L1HvdFG
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_MMFR3_EL1 3 0 0 1 7
|
||||
Res0 63:32
|
||||
Enum 31:28 Supersec
|
||||
0b0000 IMP
|
||||
0b1111 NI
|
||||
EndEnum
|
||||
Enum 27:24 CMemSz
|
||||
0b0000 4GB
|
||||
0b0001 64GB
|
||||
0b0010 1TB
|
||||
EndEnum
|
||||
Enum 23:20 CohWalk
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 19:16 PAN
|
||||
0b0000 NI
|
||||
0b0001 PAN
|
||||
0b0010 PAN2
|
||||
EndEnum
|
||||
Enum 15:12 MaintBcst
|
||||
0b0000 NI
|
||||
0b0001 NO_TLB
|
||||
0b0010 ALL
|
||||
EndEnum
|
||||
Enum 11:8 BPMaint
|
||||
0b0000 NI
|
||||
0b0001 ALL
|
||||
0b0010 BY_VA
|
||||
EndEnum
|
||||
Enum 7:4 CMaintSW
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 3:0 CMaintVA
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_ISAR0_EL1 3 0 0 2 0
|
||||
Res0 63:28
|
||||
Enum 27:24 Divide
|
||||
0b0000 NI
|
||||
0b0001 xDIV_T32
|
||||
0b0010 xDIV_A32
|
||||
EndEnum
|
||||
Enum 23:20 Debug
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 19:16 Coproc
|
||||
0b0000 NI
|
||||
0b0001 MRC
|
||||
0b0010 MRC2
|
||||
0b0011 MRRC
|
||||
0b0100 MRRC2
|
||||
EndEnum
|
||||
Enum 15:12 CmpBranch
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 11:8 BitField
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 7:4 BitCount
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 3:0 Swap
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_ISAR1_EL1 3 0 0 2 1
|
||||
Res0 63:32
|
||||
Enum 31:28 Jazelle
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 27:24 Interwork
|
||||
0b0000 NI
|
||||
0b0001 BX
|
||||
0b0010 BLX
|
||||
0b0011 A32_BX
|
||||
EndEnum
|
||||
Enum 23:20 Immediate
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 19:16 IfThen
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 15:12 Extend
|
||||
0b0000 NI
|
||||
0b0001 SXTB
|
||||
0b0010 SXTB16
|
||||
EndEnum
|
||||
Enum 11:8 Except_AR
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 7:4 Except
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 3:0 Endian
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_ISAR2_EL1 3 0 0 2 2
|
||||
Res0 63:32
|
||||
Enum 31:28 Reversal
|
||||
0b0000 NI
|
||||
0b0001 REV
|
||||
0b0010 RBIT
|
||||
EndEnum
|
||||
Enum 27:24 PSR_AR
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 23:20 MultU
|
||||
0b0000 NI
|
||||
0b0001 UMULL
|
||||
0b0010 UMAAL
|
||||
EndEnum
|
||||
Enum 19:16 MultS
|
||||
0b0000 NI
|
||||
0b0001 SMULL
|
||||
0b0010 SMLABB
|
||||
0b0011 SMLAD
|
||||
EndEnum
|
||||
Enum 15:12 Mult
|
||||
0b0000 NI
|
||||
0b0001 MLA
|
||||
0b0010 MLS
|
||||
EndEnum
|
||||
Enum 11:8 MultiAccessInt
|
||||
0b0000 NI
|
||||
0b0001 RESTARTABLE
|
||||
0b0010 CONTINUABLE
|
||||
EndEnum
|
||||
Enum 7:4 MemHint
|
||||
0b0000 NI
|
||||
0b0001 PLD
|
||||
0b0010 PLD2
|
||||
0b0011 PLI
|
||||
0b0100 PLDW
|
||||
EndEnum
|
||||
Enum 3:0 LoadStore
|
||||
0b0000 NI
|
||||
0b0001 DOUBLE
|
||||
0b0010 ACQUIRE
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_ISAR3_EL1 3 0 0 2 3
|
||||
Res0 63:32
|
||||
Enum 31:28 T32EE
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 27:24 TrueNOP
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 23:20 T32Copy
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 19:16 TabBranch
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 15:12 SynchPrim
|
||||
0b0000 NI
|
||||
0b0001 EXCLUSIVE
|
||||
0b0010 DOUBLE
|
||||
EndEnum
|
||||
Enum 11:8 SVC
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 7:4 SIMD
|
||||
0b0000 NI
|
||||
0b0001 SSAT
|
||||
0b0011 PKHBT
|
||||
EndEnum
|
||||
Enum 3:0 Saturate
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_ISAR4_EL1 3 0 0 2 4
|
||||
Res0 63:32
|
||||
Enum 31:28 SWP_frac
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 27:24 PSR_M
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 23:20 SynchPrim_frac
|
||||
0b0000 NI
|
||||
0b0011 IMP
|
||||
EndEnum
|
||||
Enum 19:16 Barrier
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 15:12 SMC
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 11:8 Writeback
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 7:4 WithShifts
|
||||
0b0000 NI
|
||||
0b0001 LSL3
|
||||
0b0011 LS
|
||||
0b0100 REG
|
||||
EndEnum
|
||||
Enum 3:0 Unpriv
|
||||
0b0000 NI
|
||||
0b0001 REG_BYTE
|
||||
0b0010 SIGNED_HALFWORD
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_ISAR5_EL1 3 0 0 2 5
|
||||
Res0 63:32
|
||||
Enum 31:28 VCMA
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 27:24 RDM
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Res0 23:20
|
||||
Enum 19:16 CRC32
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 15:12 SHA2
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 11:8 SHA1
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 7:4 AES
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
0b0010 VMULL
|
||||
EndEnum
|
||||
Enum 3:0 SEVL
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_ISAR6_EL1 3 0 0 2 7
|
||||
Res0 63:28
|
||||
Enum 27:24 I8MM
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 23:20 BF16
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 19:16 SPECRES
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 15:12 SB
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 11:8 FHM
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 7:4 DP
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 3:0 JSCVT
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_MMFR4_EL1 3 0 0 2 6
|
||||
Res0 63:32
|
||||
Enum 31:28 EVT
|
||||
0b0000 NI
|
||||
0b0001 NO_TLBIS
|
||||
0b0010 TLBIS
|
||||
EndEnum
|
||||
Enum 27:24 CCIDX
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 23:20 LSM
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 19:16 HPDS
|
||||
0b0000 NI
|
||||
0b0001 AA32HPD
|
||||
0b0010 HPDS2
|
||||
EndEnum
|
||||
Enum 15:12 CnP
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 11:8 XNX
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 7:4 AC2
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 3:0 SpecSEI
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg MVFR0_EL1 3 0 0 3 0
|
||||
Res0 63:32
|
||||
Enum 31:28 FPRound
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 27:24 FPShVec
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 23:20 FPSqrt
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 19:16 FPDivide
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 15:12 FPTrap
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 11:8 FPDP
|
||||
0b0000 NI
|
||||
0b0001 VFPv2
|
||||
0b0001 VFPv3
|
||||
EndEnum
|
||||
Enum 7:4 FPSP
|
||||
0b0000 NI
|
||||
0b0001 VFPv2
|
||||
0b0001 VFPv3
|
||||
EndEnum
|
||||
Enum 3:0 SIMDReg
|
||||
0b0000 NI
|
||||
0b0001 IMP_16x64
|
||||
0b0001 IMP_32x64
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg MVFR1_EL1 3 0 0 3 1
|
||||
Res0 63:32
|
||||
Enum 31:28 SIMDFMAC
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 27:24 FPHP
|
||||
0b0000 NI
|
||||
0b0001 FPHP
|
||||
0b0010 FPHP_CONV
|
||||
0b0011 FP16
|
||||
EndEnum
|
||||
Enum 23:20 SIMDHP
|
||||
0b0000 NI
|
||||
0b0001 SIMDHP
|
||||
0b0001 SIMDHP_FLOAT
|
||||
EndEnum
|
||||
Enum 19:16 SIMDSP
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 15:12 SIMDInt
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 11:8 SIMDLS
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 7:4 FPDNaN
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 3:0 FPFtZ
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg MVFR2_EL1 3 0 0 3 2
|
||||
Res0 63:8
|
||||
Enum 7:4 FPMisc
|
||||
0b0000 NI
|
||||
0b0001 FP
|
||||
0b0010 FP_DIRECTED_ROUNDING
|
||||
0b0011 FP_ROUNDING
|
||||
0b0100 FP_MAX_MIN
|
||||
EndEnum
|
||||
Enum 3:0 SIMDMisc
|
||||
0b0000 NI
|
||||
0b0001 SIMD_DIRECTED_ROUNDING
|
||||
0b0010 SIMD_ROUNDING
|
||||
0b0011 SIMD_MAX_MIN
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_PFR2_EL1 3 0 0 3 4
|
||||
Res0 63:12
|
||||
Enum 11:8 RAS_frac
|
||||
0b0000 NI
|
||||
0b0001 RASv1p1
|
||||
EndEnum
|
||||
Enum 7:4 SSBS
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 3:0 CSV3
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_DFR1_EL1 3 0 0 3 5
|
||||
Res0 63:8
|
||||
Enum 7:4 HPMN0
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 3:0 MTPMU
|
||||
0b0000 IMPDEF
|
||||
0b0001 IMP
|
||||
0b1111 NI
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_MMFR5_EL1 3 0 0 3 6
|
||||
Res0 63:8
|
||||
Enum 7:4 nTLBPA
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
Enum 3:0 ETS
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ID_AA64PFR0_EL1 3 0 0 4 0
|
||||
Enum 63:60 CSV3
|
||||
0b0000 NI
|
||||
|
@ -2154,8 +2154,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
|
||||
#endif
|
||||
}
|
||||
|
||||
int kvm_cpu_dirty_log_size(void);
|
||||
|
||||
int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
|
||||
|
||||
#define KVM_CLOCK_VALID_FLAGS \
|
||||
|
@ -10208,20 +10208,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
|
||||
bool req_immediate_exit = false;
|
||||
|
||||
/* Forbid vmenter if vcpu dirty ring is soft-full */
|
||||
if (unlikely(vcpu->kvm->dirty_ring_size &&
|
||||
kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
|
||||
trace_kvm_dirty_ring_exit(vcpu);
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kvm_request_pending(vcpu)) {
|
||||
if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
|
||||
r = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kvm_dirty_ring_check_request(vcpu)) {
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
|
||||
if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
|
||||
r = 0;
|
||||
|
@ -219,8 +219,9 @@ u64 stable_page_flags(struct page *page)
|
||||
u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2);
|
||||
u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1);
|
||||
u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1);
|
||||
#ifdef CONFIG_64BIT
|
||||
#ifdef CONFIG_ARCH_USES_PG_ARCH_X
|
||||
u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2);
|
||||
u |= kpf_copy_bit(k, KPF_ARCH_3, PG_arch_3);
|
||||
#endif
|
||||
|
||||
return u;
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <asm/perf_event.h>
|
||||
|
||||
#define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1)
|
||||
#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
|
||||
|
||||
#ifdef CONFIG_HW_PERF_EVENTS
|
||||
|
||||
@ -29,7 +28,6 @@ struct kvm_pmu {
|
||||
struct irq_work overflow_work;
|
||||
struct kvm_pmu_events events;
|
||||
struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
|
||||
DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
|
||||
int irq_num;
|
||||
bool created;
|
||||
bool irq_level;
|
||||
@ -91,6 +89,14 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
|
||||
vcpu->arch.pmu.events = *kvm_get_pmu_events(); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Evaluates as true when emulating PMUv3p5, and false otherwise.
|
||||
*/
|
||||
#define kvm_pmu_is_3p5(vcpu) \
|
||||
(vcpu->kvm->arch.dfr0_pmuver.imp >= ID_AA64DFR0_EL1_PMUVer_V3P5)
|
||||
|
||||
u8 kvm_arm_pmu_get_pmuver_limit(void);
|
||||
|
||||
#else
|
||||
struct kvm_pmu {
|
||||
};
|
||||
@ -153,9 +159,14 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
|
||||
}
|
||||
|
||||
#define kvm_vcpu_has_pmu(vcpu) ({ false; })
|
||||
#define kvm_pmu_is_3p5(vcpu) ({ false; })
|
||||
static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
|
||||
static inline u8 kvm_arm_pmu_get_pmuver_limit(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -263,6 +263,7 @@ struct vgic_dist {
|
||||
struct vgic_io_device dist_iodev;
|
||||
|
||||
bool has_its;
|
||||
bool save_its_tables_in_progress;
|
||||
|
||||
/*
|
||||
* Contains the attributes and gpa of the LPI configuration table.
|
||||
|
@ -18,5 +18,6 @@
|
||||
#define KPF_UNCACHED 39
|
||||
#define KPF_SOFTDIRTY 40
|
||||
#define KPF_ARCH_2 41
|
||||
#define KPF_ARCH_3 42
|
||||
|
||||
#endif /* LINUX_KERNEL_PAGE_FLAGS_H */
|
||||
|
@ -37,6 +37,11 @@ static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool kvm_use_dirty_bitmap(struct kvm *kvm)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
|
||||
int index, u32 size)
|
||||
{
|
||||
@ -49,7 +54,7 @@ static inline int kvm_dirty_ring_reset(struct kvm *kvm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring,
|
||||
static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu,
|
||||
u32 slot, u64 offset)
|
||||
{
|
||||
}
|
||||
@ -64,13 +69,11 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#else /* CONFIG_HAVE_KVM_DIRTY_RING */
|
||||
|
||||
int kvm_cpu_dirty_log_size(void);
|
||||
bool kvm_use_dirty_bitmap(struct kvm *kvm);
|
||||
bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm);
|
||||
u32 kvm_dirty_ring_get_rsvd_entries(void);
|
||||
int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
|
||||
|
||||
@ -84,13 +87,14 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
|
||||
* returns =0: successfully pushed
|
||||
* <0: unable to push, need to wait
|
||||
*/
|
||||
void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset);
|
||||
void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset);
|
||||
|
||||
bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* for use in vm_operations_struct */
|
||||
struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
|
||||
|
||||
void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
|
||||
bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
|
||||
|
||||
#endif /* CONFIG_HAVE_KVM_DIRTY_RING */
|
||||
|
||||
|
@ -163,10 +163,11 @@ static inline bool is_error_page(struct page *page)
|
||||
* Architecture-independent vcpu->requests bit members
|
||||
* Bits 3-7 are reserved for more arch-independent bits.
|
||||
*/
|
||||
#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_UNBLOCK 2
|
||||
#define KVM_REQUEST_ARCH_BASE 8
|
||||
#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_UNBLOCK 2
|
||||
#define KVM_REQ_DIRTY_RING_SOFT_FULL 3
|
||||
#define KVM_REQUEST_ARCH_BASE 8
|
||||
|
||||
/*
|
||||
* KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to
|
||||
@ -795,6 +796,7 @@ struct kvm {
|
||||
pid_t userspace_pid;
|
||||
unsigned int max_halt_poll_ns;
|
||||
u32 dirty_ring_size;
|
||||
bool dirty_ring_with_bitmap;
|
||||
bool vm_bugged;
|
||||
bool vm_dead;
|
||||
|
||||
|
@ -132,8 +132,9 @@ enum pageflags {
|
||||
PG_young,
|
||||
PG_idle,
|
||||
#endif
|
||||
#ifdef CONFIG_64BIT
|
||||
#ifdef CONFIG_ARCH_USES_PG_ARCH_X
|
||||
PG_arch_2,
|
||||
PG_arch_3,
|
||||
#endif
|
||||
#ifdef CONFIG_KASAN_HW_TAGS
|
||||
PG_skip_kasan_poison,
|
||||
|
@ -91,10 +91,10 @@
|
||||
#define IF_HAVE_PG_IDLE(flag,string)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define IF_HAVE_PG_ARCH_2(flag,string) ,{1UL << flag, string}
|
||||
#ifdef CONFIG_ARCH_USES_PG_ARCH_X
|
||||
#define IF_HAVE_PG_ARCH_X(flag,string) ,{1UL << flag, string}
|
||||
#else
|
||||
#define IF_HAVE_PG_ARCH_2(flag,string)
|
||||
#define IF_HAVE_PG_ARCH_X(flag,string)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN_HW_TAGS
|
||||
@ -130,7 +130,8 @@ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \
|
||||
IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \
|
||||
IF_HAVE_PG_IDLE(PG_young, "young" ) \
|
||||
IF_HAVE_PG_IDLE(PG_idle, "idle" ) \
|
||||
IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) \
|
||||
IF_HAVE_PG_ARCH_X(PG_arch_2, "arch_2" ) \
|
||||
IF_HAVE_PG_ARCH_X(PG_arch_3, "arch_3" ) \
|
||||
IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison")
|
||||
|
||||
#define show_page_flags(flags) \
|
||||
|
@ -1182,6 +1182,7 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_S390_CPU_TOPOLOGY 222
|
||||
#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
|
||||
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
|
||||
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -1005,6 +1005,14 @@ config ARCH_USES_HIGH_VMA_FLAGS
|
||||
config ARCH_HAS_PKEYS
|
||||
bool
|
||||
|
||||
config ARCH_USES_PG_ARCH_X
|
||||
bool
|
||||
help
|
||||
Enable the definition of PG_arch_x page flags with x > 1. Only
|
||||
suitable for 64-bit architectures with CONFIG_FLATMEM or
|
||||
CONFIG_SPARSEMEM_VMEMMAP enabled, otherwise there may not be
|
||||
enough room for additional bits in page->flags.
|
||||
|
||||
config VM_EVENT_COUNTERS
|
||||
default y
|
||||
bool "Enable VM event counters for /proc/vmstat" if EXPERT
|
||||
|
@ -2444,8 +2444,9 @@ static void __split_huge_page_tail(struct page *head, int tail,
|
||||
(1L << PG_workingset) |
|
||||
(1L << PG_locked) |
|
||||
(1L << PG_unevictable) |
|
||||
#ifdef CONFIG_64BIT
|
||||
#ifdef CONFIG_ARCH_USES_PG_ARCH_X
|
||||
(1L << PG_arch_2) |
|
||||
(1L << PG_arch_3) |
|
||||
#endif
|
||||
(1L << PG_dirty) |
|
||||
LRU_GEN_MASK | LRU_REFS_MASK));
|
||||
|
176
tools/include/linux/bitfield.h
Normal file
176
tools/include/linux/bitfield.h
Normal file
@ -0,0 +1,176 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2014 Felix Fietkau <nbd@nbd.name>
|
||||
* Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com>
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_BITFIELD_H
|
||||
#define _LINUX_BITFIELD_H
|
||||
|
||||
#include <linux/build_bug.h>
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
/*
|
||||
* Bitfield access macros
|
||||
*
|
||||
* FIELD_{GET,PREP} macros take as first parameter shifted mask
|
||||
* from which they extract the base mask and shift amount.
|
||||
* Mask must be a compilation time constant.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* #define REG_FIELD_A GENMASK(6, 0)
|
||||
* #define REG_FIELD_B BIT(7)
|
||||
* #define REG_FIELD_C GENMASK(15, 8)
|
||||
* #define REG_FIELD_D GENMASK(31, 16)
|
||||
*
|
||||
* Get:
|
||||
* a = FIELD_GET(REG_FIELD_A, reg);
|
||||
* b = FIELD_GET(REG_FIELD_B, reg);
|
||||
*
|
||||
* Set:
|
||||
* reg = FIELD_PREP(REG_FIELD_A, 1) |
|
||||
* FIELD_PREP(REG_FIELD_B, 0) |
|
||||
* FIELD_PREP(REG_FIELD_C, c) |
|
||||
* FIELD_PREP(REG_FIELD_D, 0x40);
|
||||
*
|
||||
* Modify:
|
||||
* reg &= ~REG_FIELD_C;
|
||||
* reg |= FIELD_PREP(REG_FIELD_C, c);
|
||||
*/
|
||||
|
||||
#define __bf_shf(x) (__builtin_ffsll(x) - 1)
|
||||
|
||||
#define __scalar_type_to_unsigned_cases(type) \
|
||||
unsigned type: (unsigned type)0, \
|
||||
signed type: (unsigned type)0
|
||||
|
||||
#define __unsigned_scalar_typeof(x) typeof( \
|
||||
_Generic((x), \
|
||||
char: (unsigned char)0, \
|
||||
__scalar_type_to_unsigned_cases(char), \
|
||||
__scalar_type_to_unsigned_cases(short), \
|
||||
__scalar_type_to_unsigned_cases(int), \
|
||||
__scalar_type_to_unsigned_cases(long), \
|
||||
__scalar_type_to_unsigned_cases(long long), \
|
||||
default: (x)))
|
||||
|
||||
#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x))
|
||||
|
||||
#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \
|
||||
({ \
|
||||
BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \
|
||||
_pfx "mask is not constant"); \
|
||||
BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \
|
||||
~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
|
||||
_pfx "value too large for the field"); \
|
||||
BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \
|
||||
__bf_cast_unsigned(_reg, ~0ull), \
|
||||
_pfx "type of reg too small for mask"); \
|
||||
__BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \
|
||||
(1ULL << __bf_shf(_mask))); \
|
||||
})
|
||||
|
||||
/**
|
||||
* FIELD_MAX() - produce the maximum value representable by a field
|
||||
* @_mask: shifted mask defining the field's length and position
|
||||
*
|
||||
* FIELD_MAX() returns the maximum value that can be held in the field
|
||||
* specified by @_mask.
|
||||
*/
|
||||
#define FIELD_MAX(_mask) \
|
||||
({ \
|
||||
__BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \
|
||||
(typeof(_mask))((_mask) >> __bf_shf(_mask)); \
|
||||
})
|
||||
|
||||
/**
|
||||
* FIELD_FIT() - check if value fits in the field
|
||||
* @_mask: shifted mask defining the field's length and position
|
||||
* @_val: value to test against the field
|
||||
*
|
||||
* Return: true if @_val can fit inside @_mask, false if @_val is too big.
|
||||
*/
|
||||
#define FIELD_FIT(_mask, _val) \
|
||||
({ \
|
||||
__BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \
|
||||
!((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \
|
||||
})
|
||||
|
||||
/**
|
||||
* FIELD_PREP() - prepare a bitfield element
|
||||
* @_mask: shifted mask defining the field's length and position
|
||||
* @_val: value to put in the field
|
||||
*
|
||||
* FIELD_PREP() masks and shifts up the value. The result should
|
||||
* be combined with other fields of the bitfield using logical OR.
|
||||
*/
|
||||
#define FIELD_PREP(_mask, _val) \
|
||||
({ \
|
||||
__BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \
|
||||
((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \
|
||||
})
|
||||
|
||||
/**
|
||||
* FIELD_GET() - extract a bitfield element
|
||||
* @_mask: shifted mask defining the field's length and position
|
||||
* @_reg: value of entire bitfield
|
||||
*
|
||||
* FIELD_GET() extracts the field specified by @_mask from the
|
||||
* bitfield passed in as @_reg by masking and shifting it down.
|
||||
*/
|
||||
#define FIELD_GET(_mask, _reg) \
|
||||
({ \
|
||||
__BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \
|
||||
(typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
|
||||
})
|
||||
|
||||
extern void __compiletime_error("value doesn't fit into mask")
|
||||
__field_overflow(void);
|
||||
extern void __compiletime_error("bad bitfield mask")
|
||||
__bad_mask(void);
|
||||
static __always_inline u64 field_multiplier(u64 field)
|
||||
{
|
||||
if ((field | (field - 1)) & ((field | (field - 1)) + 1))
|
||||
__bad_mask();
|
||||
return field & -field;
|
||||
}
|
||||
static __always_inline u64 field_mask(u64 field)
|
||||
{
|
||||
return field / field_multiplier(field);
|
||||
}
|
||||
#define field_max(field) ((typeof(field))field_mask(field))
|
||||
#define ____MAKE_OP(type,base,to,from) \
|
||||
static __always_inline __##type type##_encode_bits(base v, base field) \
|
||||
{ \
|
||||
if (__builtin_constant_p(v) && (v & ~field_mask(field))) \
|
||||
__field_overflow(); \
|
||||
return to((v & field_mask(field)) * field_multiplier(field)); \
|
||||
} \
|
||||
static __always_inline __##type type##_replace_bits(__##type old, \
|
||||
base val, base field) \
|
||||
{ \
|
||||
return (old & ~to(field)) | type##_encode_bits(val, field); \
|
||||
} \
|
||||
static __always_inline void type##p_replace_bits(__##type *p, \
|
||||
base val, base field) \
|
||||
{ \
|
||||
*p = (*p & ~to(field)) | type##_encode_bits(val, field); \
|
||||
} \
|
||||
static __always_inline base type##_get_bits(__##type v, base field) \
|
||||
{ \
|
||||
return (from(v) & field)/field_multiplier(field); \
|
||||
}
|
||||
#define __MAKE_OP(size) \
|
||||
____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \
|
||||
____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \
|
||||
____MAKE_OP(u##size,u##size,,)
|
||||
____MAKE_OP(u8,u8,,)
|
||||
__MAKE_OP(16)
|
||||
__MAKE_OP(32)
|
||||
__MAKE_OP(64)
|
||||
#undef __MAKE_OP
|
||||
#undef ____MAKE_OP
|
||||
|
||||
#endif
|
1
tools/testing/selftests/kvm/.gitignore
vendored
1
tools/testing/selftests/kvm/.gitignore
vendored
@ -4,6 +4,7 @@
|
||||
/aarch64/debug-exceptions
|
||||
/aarch64/get-reg-list
|
||||
/aarch64/hypercalls
|
||||
/aarch64/page_fault_test
|
||||
/aarch64/psci_test
|
||||
/aarch64/vcpu_width_config
|
||||
/aarch64/vgic_init
|
||||
|
@ -48,6 +48,7 @@ LIBKVM += lib/rbtree.c
|
||||
LIBKVM += lib/sparsebit.c
|
||||
LIBKVM += lib/test_util.c
|
||||
LIBKVM += lib/ucall_common.c
|
||||
LIBKVM += lib/userfaultfd_util.c
|
||||
|
||||
LIBKVM_STRING += lib/string_override.c
|
||||
|
||||
@ -158,10 +159,12 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/psci_test
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
|
||||
TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
|
||||
TEST_GEN_PROGS_aarch64 += demand_paging_test
|
||||
TEST_GEN_PROGS_aarch64 += dirty_log_test
|
||||
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "test_util.h"
|
||||
#include <linux/bitfield.h>
|
||||
|
||||
#define BAD_ID_REG_VAL 0x1badc0deul
|
||||
|
||||
@ -145,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
|
||||
|
||||
el0 = (val & ARM64_FEATURE_MASK(ID_AA64PFR0_EL0)) >> ID_AA64PFR0_EL0_SHIFT;
|
||||
el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), val);
|
||||
return el0 == ID_AA64PFR0_ELx_64BIT_ONLY;
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <test_util.h>
|
||||
#include <kvm_util.h>
|
||||
#include <processor.h>
|
||||
#include <linux/bitfield.h>
|
||||
|
||||
#define MDSCR_KDE (1 << 13)
|
||||
#define MDSCR_MDE (1 << 15)
|
||||
@ -11,17 +12,24 @@
|
||||
#define DBGBCR_EXEC (0x0 << 3)
|
||||
#define DBGBCR_EL1 (0x1 << 1)
|
||||
#define DBGBCR_E (0x1 << 0)
|
||||
#define DBGBCR_LBN_SHIFT 16
|
||||
#define DBGBCR_BT_SHIFT 20
|
||||
#define DBGBCR_BT_ADDR_LINK_CTX (0x1 << DBGBCR_BT_SHIFT)
|
||||
#define DBGBCR_BT_CTX_LINK (0x3 << DBGBCR_BT_SHIFT)
|
||||
|
||||
#define DBGWCR_LEN8 (0xff << 5)
|
||||
#define DBGWCR_RD (0x1 << 3)
|
||||
#define DBGWCR_WR (0x2 << 3)
|
||||
#define DBGWCR_EL1 (0x1 << 1)
|
||||
#define DBGWCR_E (0x1 << 0)
|
||||
#define DBGWCR_LBN_SHIFT 16
|
||||
#define DBGWCR_WT_SHIFT 20
|
||||
#define DBGWCR_WT_LINK (0x1 << DBGWCR_WT_SHIFT)
|
||||
|
||||
#define SPSR_D (1 << 9)
|
||||
#define SPSR_SS (1 << 21)
|
||||
|
||||
extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start;
|
||||
extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
|
||||
extern unsigned char iter_ss_begin, iter_ss_end;
|
||||
static volatile uint64_t sw_bp_addr, hw_bp_addr;
|
||||
static volatile uint64_t wp_addr, wp_data_addr;
|
||||
@ -29,8 +37,74 @@ static volatile uint64_t svc_addr;
|
||||
static volatile uint64_t ss_addr[4], ss_idx;
|
||||
#define PC(v) ((uint64_t)&(v))
|
||||
|
||||
#define GEN_DEBUG_WRITE_REG(reg_name) \
|
||||
static void write_##reg_name(int num, uint64_t val) \
|
||||
{ \
|
||||
switch (num) { \
|
||||
case 0: \
|
||||
write_sysreg(val, reg_name##0_el1); \
|
||||
break; \
|
||||
case 1: \
|
||||
write_sysreg(val, reg_name##1_el1); \
|
||||
break; \
|
||||
case 2: \
|
||||
write_sysreg(val, reg_name##2_el1); \
|
||||
break; \
|
||||
case 3: \
|
||||
write_sysreg(val, reg_name##3_el1); \
|
||||
break; \
|
||||
case 4: \
|
||||
write_sysreg(val, reg_name##4_el1); \
|
||||
break; \
|
||||
case 5: \
|
||||
write_sysreg(val, reg_name##5_el1); \
|
||||
break; \
|
||||
case 6: \
|
||||
write_sysreg(val, reg_name##6_el1); \
|
||||
break; \
|
||||
case 7: \
|
||||
write_sysreg(val, reg_name##7_el1); \
|
||||
break; \
|
||||
case 8: \
|
||||
write_sysreg(val, reg_name##8_el1); \
|
||||
break; \
|
||||
case 9: \
|
||||
write_sysreg(val, reg_name##9_el1); \
|
||||
break; \
|
||||
case 10: \
|
||||
write_sysreg(val, reg_name##10_el1); \
|
||||
break; \
|
||||
case 11: \
|
||||
write_sysreg(val, reg_name##11_el1); \
|
||||
break; \
|
||||
case 12: \
|
||||
write_sysreg(val, reg_name##12_el1); \
|
||||
break; \
|
||||
case 13: \
|
||||
write_sysreg(val, reg_name##13_el1); \
|
||||
break; \
|
||||
case 14: \
|
||||
write_sysreg(val, reg_name##14_el1); \
|
||||
break; \
|
||||
case 15: \
|
||||
write_sysreg(val, reg_name##15_el1); \
|
||||
break; \
|
||||
default: \
|
||||
GUEST_ASSERT(0); \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */
|
||||
GEN_DEBUG_WRITE_REG(dbgbcr)
|
||||
GEN_DEBUG_WRITE_REG(dbgbvr)
|
||||
GEN_DEBUG_WRITE_REG(dbgwcr)
|
||||
GEN_DEBUG_WRITE_REG(dbgwvr)
|
||||
|
||||
static void reset_debug_state(void)
|
||||
{
|
||||
uint8_t brps, wrps, i;
|
||||
uint64_t dfr0;
|
||||
|
||||
asm volatile("msr daifset, #8");
|
||||
|
||||
write_sysreg(0, osdlr_el1);
|
||||
@ -38,11 +112,21 @@ static void reset_debug_state(void)
|
||||
isb();
|
||||
|
||||
write_sysreg(0, mdscr_el1);
|
||||
/* This test only uses the first bp and wp slot. */
|
||||
write_sysreg(0, dbgbvr0_el1);
|
||||
write_sysreg(0, dbgbcr0_el1);
|
||||
write_sysreg(0, dbgwcr0_el1);
|
||||
write_sysreg(0, dbgwvr0_el1);
|
||||
write_sysreg(0, contextidr_el1);
|
||||
|
||||
/* Reset all bcr/bvr/wcr/wvr registers */
|
||||
dfr0 = read_sysreg(id_aa64dfr0_el1);
|
||||
brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), dfr0);
|
||||
for (i = 0; i <= brps; i++) {
|
||||
write_dbgbcr(i, 0);
|
||||
write_dbgbvr(i, 0);
|
||||
}
|
||||
wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), dfr0);
|
||||
for (i = 0; i <= wrps; i++) {
|
||||
write_dbgwcr(i, 0);
|
||||
write_dbgwvr(i, 0);
|
||||
}
|
||||
|
||||
isb();
|
||||
}
|
||||
|
||||
@ -54,16 +138,10 @@ static void enable_os_lock(void)
|
||||
GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
|
||||
}
|
||||
|
||||
static void install_wp(uint64_t addr)
|
||||
static void enable_monitor_debug_exceptions(void)
|
||||
{
|
||||
uint32_t wcr;
|
||||
uint32_t mdscr;
|
||||
|
||||
wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
|
||||
write_sysreg(wcr, dbgwcr0_el1);
|
||||
write_sysreg(addr, dbgwvr0_el1);
|
||||
isb();
|
||||
|
||||
asm volatile("msr daifclr, #8");
|
||||
|
||||
mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
|
||||
@ -71,21 +149,76 @@ static void install_wp(uint64_t addr)
|
||||
isb();
|
||||
}
|
||||
|
||||
static void install_hw_bp(uint64_t addr)
|
||||
static void install_wp(uint8_t wpn, uint64_t addr)
|
||||
{
|
||||
uint32_t wcr;
|
||||
|
||||
wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
|
||||
write_dbgwcr(wpn, wcr);
|
||||
write_dbgwvr(wpn, addr);
|
||||
|
||||
isb();
|
||||
|
||||
enable_monitor_debug_exceptions();
|
||||
}
|
||||
|
||||
static void install_hw_bp(uint8_t bpn, uint64_t addr)
|
||||
{
|
||||
uint32_t bcr;
|
||||
uint32_t mdscr;
|
||||
|
||||
bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
|
||||
write_sysreg(bcr, dbgbcr0_el1);
|
||||
write_sysreg(addr, dbgbvr0_el1);
|
||||
write_dbgbcr(bpn, bcr);
|
||||
write_dbgbvr(bpn, addr);
|
||||
isb();
|
||||
|
||||
asm volatile("msr daifclr, #8");
|
||||
enable_monitor_debug_exceptions();
|
||||
}
|
||||
|
||||
mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
|
||||
write_sysreg(mdscr, mdscr_el1);
|
||||
static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
|
||||
uint64_t ctx)
|
||||
{
|
||||
uint32_t wcr;
|
||||
uint64_t ctx_bcr;
|
||||
|
||||
/* Setup a context-aware breakpoint for Linked Context ID Match */
|
||||
ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
|
||||
DBGBCR_BT_CTX_LINK;
|
||||
write_dbgbcr(ctx_bp, ctx_bcr);
|
||||
write_dbgbvr(ctx_bp, ctx);
|
||||
|
||||
/* Setup a linked watchpoint (linked to the context-aware breakpoint) */
|
||||
wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
|
||||
DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
|
||||
write_dbgwcr(addr_wp, wcr);
|
||||
write_dbgwvr(addr_wp, addr);
|
||||
isb();
|
||||
|
||||
enable_monitor_debug_exceptions();
|
||||
}
|
||||
|
||||
void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
|
||||
uint64_t ctx)
|
||||
{
|
||||
uint32_t addr_bcr, ctx_bcr;
|
||||
|
||||
/* Setup a context-aware breakpoint for Linked Context ID Match */
|
||||
ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
|
||||
DBGBCR_BT_CTX_LINK;
|
||||
write_dbgbcr(ctx_bp, ctx_bcr);
|
||||
write_dbgbvr(ctx_bp, ctx);
|
||||
|
||||
/*
|
||||
* Setup a normal breakpoint for Linked Address Match, and link it
|
||||
* to the context-aware breakpoint.
|
||||
*/
|
||||
addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
|
||||
DBGBCR_BT_ADDR_LINK_CTX |
|
||||
((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
|
||||
write_dbgbcr(addr_bp, addr_bcr);
|
||||
write_dbgbvr(addr_bp, addr);
|
||||
isb();
|
||||
|
||||
enable_monitor_debug_exceptions();
|
||||
}
|
||||
|
||||
static void install_ss(void)
|
||||
@ -101,52 +234,42 @@ static void install_ss(void)
|
||||
|
||||
static volatile char write_data;
|
||||
|
||||
static void guest_code(void)
|
||||
static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
|
||||
{
|
||||
GUEST_SYNC(0);
|
||||
uint64_t ctx = 0xabcdef; /* a random context number */
|
||||
|
||||
/* Software-breakpoint */
|
||||
reset_debug_state();
|
||||
asm volatile("sw_bp: brk #0");
|
||||
GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
|
||||
|
||||
GUEST_SYNC(1);
|
||||
|
||||
/* Hardware-breakpoint */
|
||||
reset_debug_state();
|
||||
install_hw_bp(PC(hw_bp));
|
||||
install_hw_bp(bpn, PC(hw_bp));
|
||||
asm volatile("hw_bp: nop");
|
||||
GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
|
||||
|
||||
GUEST_SYNC(2);
|
||||
|
||||
/* Hardware-breakpoint + svc */
|
||||
reset_debug_state();
|
||||
install_hw_bp(PC(bp_svc));
|
||||
install_hw_bp(bpn, PC(bp_svc));
|
||||
asm volatile("bp_svc: svc #0");
|
||||
GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
|
||||
GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
|
||||
|
||||
GUEST_SYNC(3);
|
||||
|
||||
/* Hardware-breakpoint + software-breakpoint */
|
||||
reset_debug_state();
|
||||
install_hw_bp(PC(bp_brk));
|
||||
install_hw_bp(bpn, PC(bp_brk));
|
||||
asm volatile("bp_brk: brk #0");
|
||||
GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
|
||||
GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
|
||||
|
||||
GUEST_SYNC(4);
|
||||
|
||||
/* Watchpoint */
|
||||
reset_debug_state();
|
||||
install_wp(PC(write_data));
|
||||
install_wp(wpn, PC(write_data));
|
||||
write_data = 'x';
|
||||
GUEST_ASSERT_EQ(write_data, 'x');
|
||||
GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
|
||||
|
||||
GUEST_SYNC(5);
|
||||
|
||||
/* Single-step */
|
||||
reset_debug_state();
|
||||
install_ss();
|
||||
@ -160,8 +283,6 @@ static void guest_code(void)
|
||||
GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
|
||||
GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
|
||||
|
||||
GUEST_SYNC(6);
|
||||
|
||||
/* OS Lock does not block software-breakpoint */
|
||||
reset_debug_state();
|
||||
enable_os_lock();
|
||||
@ -169,30 +290,24 @@ static void guest_code(void)
|
||||
asm volatile("sw_bp2: brk #0");
|
||||
GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
|
||||
|
||||
GUEST_SYNC(7);
|
||||
|
||||
/* OS Lock blocking hardware-breakpoint */
|
||||
reset_debug_state();
|
||||
enable_os_lock();
|
||||
install_hw_bp(PC(hw_bp2));
|
||||
install_hw_bp(bpn, PC(hw_bp2));
|
||||
hw_bp_addr = 0;
|
||||
asm volatile("hw_bp2: nop");
|
||||
GUEST_ASSERT_EQ(hw_bp_addr, 0);
|
||||
|
||||
GUEST_SYNC(8);
|
||||
|
||||
/* OS Lock blocking watchpoint */
|
||||
reset_debug_state();
|
||||
enable_os_lock();
|
||||
write_data = '\0';
|
||||
wp_data_addr = 0;
|
||||
install_wp(PC(write_data));
|
||||
install_wp(wpn, PC(write_data));
|
||||
write_data = 'x';
|
||||
GUEST_ASSERT_EQ(write_data, 'x');
|
||||
GUEST_ASSERT_EQ(wp_data_addr, 0);
|
||||
|
||||
GUEST_SYNC(9);
|
||||
|
||||
/* OS Lock blocking single-step */
|
||||
reset_debug_state();
|
||||
enable_os_lock();
|
||||
@ -205,6 +320,27 @@ static void guest_code(void)
|
||||
: : : "x0");
|
||||
GUEST_ASSERT_EQ(ss_addr[0], 0);
|
||||
|
||||
/* Linked hardware-breakpoint */
|
||||
hw_bp_addr = 0;
|
||||
reset_debug_state();
|
||||
install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx);
|
||||
/* Set context id */
|
||||
write_sysreg(ctx, contextidr_el1);
|
||||
isb();
|
||||
asm volatile("hw_bp_ctx: nop");
|
||||
write_sysreg(0, contextidr_el1);
|
||||
GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
|
||||
|
||||
/* Linked watchpoint */
|
||||
reset_debug_state();
|
||||
install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx);
|
||||
/* Set context id */
|
||||
write_sysreg(ctx, contextidr_el1);
|
||||
isb();
|
||||
write_data = 'x';
|
||||
GUEST_ASSERT_EQ(write_data, 'x');
|
||||
GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
@ -276,20 +412,16 @@ static void guest_code_ss(int test_cnt)
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static int debug_version(struct kvm_vcpu *vcpu)
|
||||
static int debug_version(uint64_t id_aa64dfr0)
|
||||
{
|
||||
uint64_t id_aa64dfr0;
|
||||
|
||||
vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0);
|
||||
return id_aa64dfr0 & 0xf;
|
||||
return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0);
|
||||
}
|
||||
|
||||
static void test_guest_debug_exceptions(void)
|
||||
static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vm *vm;
|
||||
struct ucall uc;
|
||||
int stage;
|
||||
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
|
||||
|
||||
@ -307,23 +439,19 @@ static void test_guest_debug_exceptions(void)
|
||||
vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
|
||||
ESR_EC_SVC64, guest_svc_handler);
|
||||
|
||||
for (stage = 0; stage < 11; stage++) {
|
||||
vcpu_run(vcpu);
|
||||
/* Specify bpn/wpn/ctx_bpn to be tested */
|
||||
vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn);
|
||||
pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn);
|
||||
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_SYNC:
|
||||
TEST_ASSERT(uc.args[1] == stage,
|
||||
"Stage %d: Unexpected sync ucall, got %lx",
|
||||
stage, (ulong)uc.args[1]);
|
||||
break;
|
||||
case UCALL_ABORT:
|
||||
REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
|
||||
break;
|
||||
case UCALL_DONE:
|
||||
goto done;
|
||||
default:
|
||||
TEST_FAIL("Unknown ucall %lu", uc.cmd);
|
||||
}
|
||||
vcpu_run(vcpu);
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_ABORT:
|
||||
REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
|
||||
break;
|
||||
case UCALL_DONE:
|
||||
goto done;
|
||||
default:
|
||||
TEST_FAIL("Unknown ucall %lu", uc.cmd);
|
||||
}
|
||||
|
||||
done:
|
||||
@ -400,6 +528,43 @@ void test_single_step_from_userspace(int test_cnt)
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
/*
|
||||
* Run debug testing using the various breakpoint#, watchpoint# and
|
||||
* context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
|
||||
*/
|
||||
void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
|
||||
{
|
||||
uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
|
||||
int b, w, c;
|
||||
|
||||
/* Number of breakpoints */
|
||||
brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1;
|
||||
__TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
|
||||
|
||||
/* Number of watchpoints */
|
||||
wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), aa64dfr0) + 1;
|
||||
|
||||
/* Number of context aware breakpoints */
|
||||
ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_CTX_CMPS), aa64dfr0) + 1;
|
||||
|
||||
pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
|
||||
brp_num, wrp_num, ctx_brp_num);
|
||||
|
||||
/* Number of normal (non-context aware) breakpoints */
|
||||
normal_brp_num = brp_num - ctx_brp_num;
|
||||
|
||||
/* Lowest context aware breakpoint number */
|
||||
ctx_brp_base = normal_brp_num;
|
||||
|
||||
/* Run tests with all supported breakpoints/watchpoints */
|
||||
for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) {
|
||||
for (b = 0; b < normal_brp_num; b++) {
|
||||
for (w = 0; w < wrp_num; w++)
|
||||
test_guest_debug_exceptions(b, w, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void help(char *name)
|
||||
{
|
||||
puts("");
|
||||
@ -414,9 +579,11 @@ int main(int argc, char *argv[])
|
||||
struct kvm_vm *vm;
|
||||
int opt;
|
||||
int ss_iteration = 10000;
|
||||
uint64_t aa64dfr0;
|
||||
|
||||
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
|
||||
__TEST_REQUIRE(debug_version(vcpu) >= 6,
|
||||
vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0);
|
||||
__TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
|
||||
"Armv8 debug architecture not supported.");
|
||||
kvm_vm_free(vm);
|
||||
|
||||
@ -432,7 +599,7 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
|
||||
test_guest_debug_exceptions();
|
||||
test_guest_debug_exceptions_all(aa64dfr0);
|
||||
test_single_step_from_userspace(ss_iteration);
|
||||
|
||||
return 0;
|
||||
|
1117
tools/testing/selftests/kvm/aarch64/page_fault_test.c
Normal file
1117
tools/testing/selftests/kvm/aarch64/page_fault_test.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -58,9 +58,6 @@ static enum {
|
||||
ITERATION_MARK_IDLE,
|
||||
} iteration_work;
|
||||
|
||||
/* Set to true when vCPU threads should exit. */
|
||||
static bool done;
|
||||
|
||||
/* The iteration that was last completed by each vCPU. */
|
||||
static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
|
||||
|
||||
@ -211,7 +208,7 @@ static bool spin_wait_for_next_iteration(int *current_iteration)
|
||||
int last_iteration = *current_iteration;
|
||||
|
||||
do {
|
||||
if (READ_ONCE(done))
|
||||
if (READ_ONCE(memstress_args.stop_vcpus))
|
||||
return false;
|
||||
|
||||
*current_iteration = READ_ONCE(iteration);
|
||||
@ -321,9 +318,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
mark_memory_idle(vm, nr_vcpus);
|
||||
access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from idle memory");
|
||||
|
||||
/* Set done to signal the vCPU threads to exit */
|
||||
done = true;
|
||||
|
||||
memstress_join_vcpu_threads(nr_vcpus);
|
||||
memstress_destroy_vm(vm);
|
||||
}
|
||||
|
@ -22,23 +22,13 @@
|
||||
#include "test_util.h"
|
||||
#include "memstress.h"
|
||||
#include "guest_modes.h"
|
||||
#include "userfaultfd_util.h"
|
||||
|
||||
#ifdef __NR_userfaultfd
|
||||
|
||||
#ifdef PRINT_PER_PAGE_UPDATES
|
||||
#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_PER_VCPU_UPDATES
|
||||
#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
static int nr_vcpus = 1;
|
||||
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
|
||||
|
||||
static size_t demand_paging_size;
|
||||
static char *guest_data_prototype;
|
||||
|
||||
@ -67,9 +57,11 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
|
||||
ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
}
|
||||
|
||||
static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
|
||||
static int handle_uffd_page_request(int uffd_mode, int uffd,
|
||||
struct uffd_msg *msg)
|
||||
{
|
||||
pid_t tid = syscall(__NR_gettid);
|
||||
uint64_t addr = msg->arg.pagefault.address;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
int r;
|
||||
@ -116,157 +108,6 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool quit_uffd_thread;
|
||||
|
||||
struct uffd_handler_args {
|
||||
int uffd_mode;
|
||||
int uffd;
|
||||
int pipefd;
|
||||
useconds_t delay;
|
||||
};
|
||||
|
||||
static void *uffd_handler_thread_fn(void *arg)
|
||||
{
|
||||
struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
|
||||
int uffd = uffd_args->uffd;
|
||||
int pipefd = uffd_args->pipefd;
|
||||
useconds_t delay = uffd_args->delay;
|
||||
int64_t pages = 0;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
while (!quit_uffd_thread) {
|
||||
struct uffd_msg msg;
|
||||
struct pollfd pollfd[2];
|
||||
char tmp_chr;
|
||||
int r;
|
||||
uint64_t addr;
|
||||
|
||||
pollfd[0].fd = uffd;
|
||||
pollfd[0].events = POLLIN;
|
||||
pollfd[1].fd = pipefd;
|
||||
pollfd[1].events = POLLIN;
|
||||
|
||||
r = poll(pollfd, 2, -1);
|
||||
switch (r) {
|
||||
case -1:
|
||||
pr_info("poll err");
|
||||
continue;
|
||||
case 0:
|
||||
continue;
|
||||
case 1:
|
||||
break;
|
||||
default:
|
||||
pr_info("Polling uffd returned %d", r);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pollfd[0].revents & POLLERR) {
|
||||
pr_info("uffd revents has POLLERR");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pollfd[1].revents & POLLIN) {
|
||||
r = read(pollfd[1].fd, &tmp_chr, 1);
|
||||
TEST_ASSERT(r == 1,
|
||||
"Error reading pipefd in UFFD thread\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!(pollfd[0].revents & POLLIN))
|
||||
continue;
|
||||
|
||||
r = read(uffd, &msg, sizeof(msg));
|
||||
if (r == -1) {
|
||||
if (errno == EAGAIN)
|
||||
continue;
|
||||
pr_info("Read of uffd got errno %d\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (r != sizeof(msg)) {
|
||||
pr_info("Read on uffd returned unexpected size: %d bytes", r);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!(msg.event & UFFD_EVENT_PAGEFAULT))
|
||||
continue;
|
||||
|
||||
if (delay)
|
||||
usleep(delay);
|
||||
addr = msg.arg.pagefault.address;
|
||||
r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
|
||||
if (r < 0)
|
||||
return NULL;
|
||||
pages++;
|
||||
}
|
||||
|
||||
ts_diff = timespec_elapsed(start);
|
||||
PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
|
||||
pages, ts_diff.tv_sec, ts_diff.tv_nsec,
|
||||
pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void setup_demand_paging(struct kvm_vm *vm,
|
||||
pthread_t *uffd_handler_thread, int pipefd,
|
||||
int uffd_mode, useconds_t uffd_delay,
|
||||
struct uffd_handler_args *uffd_args,
|
||||
void *hva, void *alias, uint64_t len)
|
||||
{
|
||||
bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
|
||||
int uffd;
|
||||
struct uffdio_api uffdio_api;
|
||||
struct uffdio_register uffdio_register;
|
||||
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
|
||||
int ret;
|
||||
|
||||
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
|
||||
is_minor ? "MINOR" : "MISSING",
|
||||
is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
|
||||
|
||||
/* In order to get minor faults, prefault via the alias. */
|
||||
if (is_minor) {
|
||||
size_t p;
|
||||
|
||||
expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
|
||||
|
||||
TEST_ASSERT(alias != NULL, "Alias required for minor faults");
|
||||
for (p = 0; p < (len / demand_paging_size); ++p) {
|
||||
memcpy(alias + (p * demand_paging_size),
|
||||
guest_data_prototype, demand_paging_size);
|
||||
}
|
||||
}
|
||||
|
||||
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
|
||||
TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd));
|
||||
|
||||
uffdio_api.api = UFFD_API;
|
||||
uffdio_api.features = 0;
|
||||
ret = ioctl(uffd, UFFDIO_API, &uffdio_api);
|
||||
TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret));
|
||||
|
||||
uffdio_register.range.start = (uint64_t)hva;
|
||||
uffdio_register.range.len = len;
|
||||
uffdio_register.mode = uffd_mode;
|
||||
ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register);
|
||||
TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret));
|
||||
TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
|
||||
expected_ioctls, "missing userfaultfd ioctls");
|
||||
|
||||
uffd_args->uffd_mode = uffd_mode;
|
||||
uffd_args->uffd = uffd;
|
||||
uffd_args->pipefd = pipefd;
|
||||
uffd_args->delay = uffd_delay;
|
||||
pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
|
||||
uffd_args);
|
||||
|
||||
PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
|
||||
hva, hva + len);
|
||||
}
|
||||
|
||||
struct test_params {
|
||||
int uffd_mode;
|
||||
useconds_t uffd_delay;
|
||||
@ -274,16 +115,25 @@ struct test_params {
|
||||
bool partition_vcpu_memory_access;
|
||||
};
|
||||
|
||||
static void prefault_mem(void *alias, uint64_t len)
|
||||
{
|
||||
size_t p;
|
||||
|
||||
TEST_ASSERT(alias != NULL, "Alias required for minor faults");
|
||||
for (p = 0; p < (len / demand_paging_size); ++p) {
|
||||
memcpy(alias + (p * demand_paging_size),
|
||||
guest_data_prototype, demand_paging_size);
|
||||
}
|
||||
}
|
||||
|
||||
static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
{
|
||||
struct test_params *p = arg;
|
||||
pthread_t *uffd_handler_threads = NULL;
|
||||
struct uffd_handler_args *uffd_args = NULL;
|
||||
struct uffd_desc **uffd_descs = NULL;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
int *pipefds = NULL;
|
||||
struct kvm_vm *vm;
|
||||
int r, i;
|
||||
int i;
|
||||
|
||||
vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
|
||||
p->src_type, p->partition_vcpu_memory_access);
|
||||
@ -296,15 +146,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
memset(guest_data_prototype, 0xAB, demand_paging_size);
|
||||
|
||||
if (p->uffd_mode) {
|
||||
uffd_handler_threads =
|
||||
malloc(nr_vcpus * sizeof(*uffd_handler_threads));
|
||||
TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
|
||||
|
||||
uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
|
||||
TEST_ASSERT(uffd_args, "Memory allocation failed");
|
||||
|
||||
pipefds = malloc(sizeof(int) * nr_vcpus * 2);
|
||||
TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
|
||||
uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
|
||||
TEST_ASSERT(uffd_descs, "Memory allocation failed");
|
||||
|
||||
for (i = 0; i < nr_vcpus; i++) {
|
||||
struct memstress_vcpu_args *vcpu_args;
|
||||
@ -317,19 +160,17 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
|
||||
vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
|
||||
|
||||
prefault_mem(vcpu_alias,
|
||||
vcpu_args->pages * memstress_args.guest_page_size);
|
||||
|
||||
/*
|
||||
* Set up user fault fd to handle demand paging
|
||||
* requests.
|
||||
*/
|
||||
r = pipe2(&pipefds[i * 2],
|
||||
O_CLOEXEC | O_NONBLOCK);
|
||||
TEST_ASSERT(!r, "Failed to set up pipefd");
|
||||
|
||||
setup_demand_paging(vm, &uffd_handler_threads[i],
|
||||
pipefds[i * 2], p->uffd_mode,
|
||||
p->uffd_delay, &uffd_args[i],
|
||||
vcpu_hva, vcpu_alias,
|
||||
vcpu_args->pages * memstress_args.guest_page_size);
|
||||
uffd_descs[i] = uffd_setup_demand_paging(
|
||||
p->uffd_mode, p->uffd_delay, vcpu_hva,
|
||||
vcpu_args->pages * memstress_args.guest_page_size,
|
||||
&handle_uffd_page_request);
|
||||
}
|
||||
}
|
||||
|
||||
@ -344,15 +185,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
pr_info("All vCPU threads joined\n");
|
||||
|
||||
if (p->uffd_mode) {
|
||||
char c;
|
||||
|
||||
/* Tell the user fault fd handler threads to quit */
|
||||
for (i = 0; i < nr_vcpus; i++) {
|
||||
r = write(pipefds[i * 2 + 1], &c, 1);
|
||||
TEST_ASSERT(r == 1, "Unable to write to pipefd");
|
||||
|
||||
pthread_join(uffd_handler_threads[i], NULL);
|
||||
}
|
||||
for (i = 0; i < nr_vcpus; i++)
|
||||
uffd_stop_demand_paging(uffd_descs[i]);
|
||||
}
|
||||
|
||||
pr_info("Total guest execution time: %ld.%.9lds\n",
|
||||
@ -364,11 +199,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
memstress_destroy_vm(vm);
|
||||
|
||||
free(guest_data_prototype);
|
||||
if (p->uffd_mode) {
|
||||
free(uffd_handler_threads);
|
||||
free(uffd_args);
|
||||
free(pipefds);
|
||||
}
|
||||
if (p->uffd_mode)
|
||||
free(uffd_descs);
|
||||
}
|
||||
|
||||
static void help(char *name)
|
||||
|
@ -24,6 +24,9 @@
|
||||
#include "guest_modes.h"
|
||||
#include "processor.h"
|
||||
|
||||
#define DIRTY_MEM_BITS 30 /* 1G */
|
||||
#define PAGE_SHIFT_4K 12
|
||||
|
||||
/* The memory slot index to track dirty pages */
|
||||
#define TEST_MEM_SLOT_INDEX 1
|
||||
|
||||
@ -226,13 +229,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm)
|
||||
}
|
||||
|
||||
static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
void *bitmap, uint32_t num_pages,
|
||||
uint32_t *unused)
|
||||
{
|
||||
kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
|
||||
}
|
||||
|
||||
static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
void *bitmap, uint32_t num_pages,
|
||||
uint32_t *unused)
|
||||
{
|
||||
kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
|
||||
kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages);
|
||||
@ -271,6 +276,24 @@ static bool dirty_ring_supported(void)
|
||||
|
||||
static void dirty_ring_create_vm_done(struct kvm_vm *vm)
|
||||
{
|
||||
uint64_t pages;
|
||||
uint32_t limit;
|
||||
|
||||
/*
|
||||
* We rely on vcpu exit due to full dirty ring state. Adjust
|
||||
* the ring buffer size to ensure we're able to reach the
|
||||
* full dirty ring state.
|
||||
*/
|
||||
pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
|
||||
pages = vm_adjust_num_guest_pages(vm->mode, pages);
|
||||
if (vm->page_size < getpagesize())
|
||||
pages = vm_num_host_pages(vm->mode, pages);
|
||||
|
||||
limit = 1 << (31 - __builtin_clz(pages));
|
||||
test_dirty_ring_count = 1 << (31 - __builtin_clz(test_dirty_ring_count));
|
||||
test_dirty_ring_count = min(limit, test_dirty_ring_count);
|
||||
pr_info("dirty ring count: 0x%x\n", test_dirty_ring_count);
|
||||
|
||||
/*
|
||||
* Switch to dirty ring mode after VM creation but before any
|
||||
* of the vcpu creation.
|
||||
@ -329,10 +352,9 @@ static void dirty_ring_continue_vcpu(void)
|
||||
}
|
||||
|
||||
static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
void *bitmap, uint32_t num_pages,
|
||||
uint32_t *ring_buf_idx)
|
||||
{
|
||||
/* We only have one vcpu */
|
||||
static uint32_t fetch_index = 0;
|
||||
uint32_t count = 0, cleared;
|
||||
bool continued_vcpu = false;
|
||||
|
||||
@ -349,7 +371,8 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
|
||||
|
||||
/* Only have one vcpu */
|
||||
count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
|
||||
slot, bitmap, num_pages, &fetch_index);
|
||||
slot, bitmap, num_pages,
|
||||
ring_buf_idx);
|
||||
|
||||
cleared = kvm_vm_reset_dirty_ring(vcpu->vm);
|
||||
|
||||
@ -406,7 +429,8 @@ struct log_mode {
|
||||
void (*create_vm_done)(struct kvm_vm *vm);
|
||||
/* Hook to collect the dirty pages into the bitmap provided */
|
||||
void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot,
|
||||
void *bitmap, uint32_t num_pages);
|
||||
void *bitmap, uint32_t num_pages,
|
||||
uint32_t *ring_buf_idx);
|
||||
/* Hook to call when after each vcpu run */
|
||||
void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
|
||||
void (*before_vcpu_join) (void);
|
||||
@ -471,13 +495,14 @@ static void log_mode_create_vm_done(struct kvm_vm *vm)
|
||||
}
|
||||
|
||||
static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
void *bitmap, uint32_t num_pages,
|
||||
uint32_t *ring_buf_idx)
|
||||
{
|
||||
struct log_mode *mode = &log_modes[host_log_mode];
|
||||
|
||||
TEST_ASSERT(mode->collect_dirty_pages != NULL,
|
||||
"collect_dirty_pages() is required for any log mode!");
|
||||
mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages);
|
||||
mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
|
||||
}
|
||||
|
||||
static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
|
||||
@ -681,9 +706,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
|
||||
return vm;
|
||||
}
|
||||
|
||||
#define DIRTY_MEM_BITS 30 /* 1G */
|
||||
#define PAGE_SHIFT_4K 12
|
||||
|
||||
struct test_params {
|
||||
unsigned long iterations;
|
||||
unsigned long interval;
|
||||
@ -696,6 +718,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vm *vm;
|
||||
unsigned long *bmap;
|
||||
uint32_t ring_buf_idx = 0;
|
||||
|
||||
if (!log_mode_supported()) {
|
||||
print_skip("Log mode '%s' not supported",
|
||||
@ -769,6 +792,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
host_dirty_count = 0;
|
||||
host_clear_count = 0;
|
||||
host_track_next_count = 0;
|
||||
WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
|
||||
|
||||
pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
|
||||
|
||||
@ -776,7 +800,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
/* Give the vcpu thread some time to dirty some pages */
|
||||
usleep(p->interval * 1000);
|
||||
log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
|
||||
bmap, host_num_pages);
|
||||
bmap, host_num_pages,
|
||||
&ring_buf_idx);
|
||||
|
||||
/*
|
||||
* See vcpu_sync_stop_requested definition for details on why
|
||||
@ -820,7 +845,7 @@ static void help(char *name)
|
||||
printf("usage: %s [-h] [-i iterations] [-I interval] "
|
||||
"[-p offset] [-m mode]\n", name);
|
||||
puts("");
|
||||
printf(" -c: specify dirty ring size, in number of entries\n");
|
||||
printf(" -c: hint to dirty ring size, in number of entries\n");
|
||||
printf(" (only useful for dirty-ring test; default: %"PRIu32")\n",
|
||||
TEST_DIRTY_RING_COUNT);
|
||||
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
|
||||
|
@ -38,12 +38,25 @@
|
||||
* NORMAL 4 1111:1111
|
||||
* NORMAL_WT 5 1011:1011
|
||||
*/
|
||||
#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
|
||||
(0x04ul << (1 * 8)) | \
|
||||
(0x0cul << (2 * 8)) | \
|
||||
(0x44ul << (3 * 8)) | \
|
||||
(0xfful << (4 * 8)) | \
|
||||
(0xbbul << (5 * 8)))
|
||||
|
||||
/* Linux doesn't use these memory types, so let's define them. */
|
||||
#define MAIR_ATTR_DEVICE_GRE UL(0x0c)
|
||||
#define MAIR_ATTR_NORMAL_WT UL(0xbb)
|
||||
|
||||
#define MT_DEVICE_nGnRnE 0
|
||||
#define MT_DEVICE_nGnRE 1
|
||||
#define MT_DEVICE_GRE 2
|
||||
#define MT_NORMAL_NC 3
|
||||
#define MT_NORMAL 4
|
||||
#define MT_NORMAL_WT 5
|
||||
|
||||
#define DEFAULT_MAIR_EL1 \
|
||||
(MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \
|
||||
MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \
|
||||
MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \
|
||||
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \
|
||||
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
|
||||
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
|
||||
|
||||
#define MPIDR_HWID_BITMASK (0xff00fffffful)
|
||||
|
||||
@ -92,11 +105,19 @@ enum {
|
||||
#define ESR_EC_MASK (ESR_EC_NUM - 1)
|
||||
|
||||
#define ESR_EC_SVC64 0x15
|
||||
#define ESR_EC_IABT 0x21
|
||||
#define ESR_EC_DABT 0x25
|
||||
#define ESR_EC_HW_BP_CURRENT 0x31
|
||||
#define ESR_EC_SSTEP_CURRENT 0x33
|
||||
#define ESR_EC_WP_CURRENT 0x35
|
||||
#define ESR_EC_BRK_INS 0x3c
|
||||
|
||||
/* Access flag */
|
||||
#define PTE_AF (1ULL << 10)
|
||||
|
||||
/* Access flag update enable/disable */
|
||||
#define TCR_EL1_HA (1ULL << 39)
|
||||
|
||||
void aarch64_get_supported_page_sizes(uint32_t ipa,
|
||||
bool *ps4k, bool *ps16k, bool *ps64k);
|
||||
|
||||
@ -109,6 +130,8 @@ void vm_install_exception_handler(struct kvm_vm *vm,
|
||||
void vm_install_sync_handler(struct kvm_vm *vm,
|
||||
int vector, int ec, handler_fn handler);
|
||||
|
||||
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
|
||||
|
||||
static inline void cpu_relax(void)
|
||||
{
|
||||
asm volatile("yield" ::: "memory");
|
||||
|
@ -35,6 +35,7 @@ struct userspace_mem_region {
|
||||
struct sparsebit *unused_phy_pages;
|
||||
int fd;
|
||||
off_t offset;
|
||||
enum vm_mem_backing_src_type backing_src_type;
|
||||
void *host_mem;
|
||||
void *host_alias;
|
||||
void *mmap_start;
|
||||
@ -65,6 +66,14 @@ struct userspace_mem_regions {
|
||||
DECLARE_HASHTABLE(slot_hash, 9);
|
||||
};
|
||||
|
||||
enum kvm_mem_region_type {
|
||||
MEM_REGION_CODE,
|
||||
MEM_REGION_DATA,
|
||||
MEM_REGION_PT,
|
||||
MEM_REGION_TEST_DATA,
|
||||
NR_MEM_REGIONS,
|
||||
};
|
||||
|
||||
struct kvm_vm {
|
||||
int mode;
|
||||
unsigned long type;
|
||||
@ -94,6 +103,13 @@ struct kvm_vm {
|
||||
int stats_fd;
|
||||
struct kvm_stats_header stats_header;
|
||||
struct kvm_stats_desc *stats_desc;
|
||||
|
||||
/*
|
||||
* KVM region slots. These are the default memslots used by page
|
||||
* allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
|
||||
* memslot.
|
||||
*/
|
||||
uint32_t memslots[NR_MEM_REGIONS];
|
||||
};
|
||||
|
||||
|
||||
@ -106,6 +122,13 @@ struct kvm_vm {
|
||||
struct userspace_mem_region *
|
||||
memslot2region(struct kvm_vm *vm, uint32_t memslot);
|
||||
|
||||
static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
|
||||
enum kvm_mem_region_type type)
|
||||
{
|
||||
assert(type < NR_MEM_REGIONS);
|
||||
return memslot2region(vm, vm->memslots[type]);
|
||||
}
|
||||
|
||||
/* Minimum allocated guest virtual and physical addresses */
|
||||
#define KVM_UTIL_MIN_VADDR 0x2000
|
||||
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
|
||||
@ -387,7 +410,11 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
|
||||
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
|
||||
vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
|
||||
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
|
||||
vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
|
||||
enum kvm_mem_region_type type);
|
||||
vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
|
||||
vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
|
||||
enum kvm_mem_region_type type);
|
||||
vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
|
||||
|
||||
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
@ -649,13 +676,13 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
|
||||
* __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
|
||||
* calculate the amount of memory needed for per-vCPU data, e.g. stacks.
|
||||
*/
|
||||
struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages);
|
||||
struct kvm_vm *____vm_create(enum vm_guest_mode mode);
|
||||
struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
|
||||
uint64_t nr_extra_pages);
|
||||
|
||||
static inline struct kvm_vm *vm_create_barebones(void)
|
||||
{
|
||||
return ____vm_create(VM_MODE_DEFAULT, 0);
|
||||
return ____vm_create(VM_MODE_DEFAULT);
|
||||
}
|
||||
|
||||
static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
|
||||
|
@ -47,6 +47,9 @@ struct memstress_args {
|
||||
/* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */
|
||||
uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS];
|
||||
|
||||
/* Test is done, stop running vCPUs. */
|
||||
bool stop_vcpus;
|
||||
|
||||
struct memstress_vcpu_args vcpu_args[KVM_MAX_VCPUS];
|
||||
};
|
||||
|
||||
|
45
tools/testing/selftests/kvm/include/userfaultfd_util.h
Normal file
45
tools/testing/selftests/kvm/include/userfaultfd_util.h
Normal file
@ -0,0 +1,45 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KVM userfaultfd util
|
||||
*
|
||||
* Copyright (C) 2018, Red Hat, Inc.
|
||||
* Copyright (C) 2019-2022 Google LLC
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE /* for pipe2 */
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <time.h>
|
||||
#include <pthread.h>
|
||||
#include <linux/userfaultfd.h>
|
||||
|
||||
#include "test_util.h"
|
||||
|
||||
typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg);
|
||||
|
||||
struct uffd_desc {
|
||||
int uffd_mode;
|
||||
int uffd;
|
||||
int pipefds[2];
|
||||
useconds_t delay;
|
||||
uffd_handler_t handler;
|
||||
pthread_t thread;
|
||||
};
|
||||
|
||||
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
|
||||
void *hva, uint64_t len,
|
||||
uffd_handler_t handler);
|
||||
|
||||
void uffd_stop_demand_paging(struct uffd_desc *uffd);
|
||||
|
||||
#ifdef PRINT_PER_PAGE_UPDATES
|
||||
#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_PER_VCPU_UPDATES
|
||||
#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
|
||||
#endif
|
@ -11,6 +11,7 @@
|
||||
#include "guest_modes.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include <linux/bitfield.h>
|
||||
|
||||
#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
|
||||
|
||||
@ -76,13 +77,15 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
|
||||
|
||||
void virt_arch_pgd_alloc(struct kvm_vm *vm)
|
||||
{
|
||||
if (!vm->pgd_created) {
|
||||
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
|
||||
page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
|
||||
vm->pgd = paddr;
|
||||
vm->pgd_created = true;
|
||||
}
|
||||
size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
|
||||
|
||||
if (vm->pgd_created)
|
||||
return;
|
||||
|
||||
vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR,
|
||||
vm->memslots[MEM_REGION_PT]);
|
||||
vm->pgd_created = true;
|
||||
}
|
||||
|
||||
static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
@ -133,12 +136,12 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
|
||||
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
|
||||
{
|
||||
uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
|
||||
uint64_t attr_idx = MT_NORMAL;
|
||||
|
||||
_virt_pg_map(vm, vaddr, paddr, attr_idx);
|
||||
}
|
||||
|
||||
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
{
|
||||
uint64_t *ptep;
|
||||
|
||||
@ -169,11 +172,18 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
TEST_FAIL("Page table levels must be 2, 3, or 4");
|
||||
}
|
||||
|
||||
return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
|
||||
return ptep;
|
||||
|
||||
unmapped_gva:
|
||||
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
|
||||
exit(1);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
{
|
||||
uint64_t *ptep = virt_get_pte_hva(vm, gva);
|
||||
|
||||
return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
|
||||
}
|
||||
|
||||
static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
|
||||
@ -318,13 +328,16 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
|
||||
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
struct kvm_vcpu_init *init, void *guest_code)
|
||||
{
|
||||
size_t stack_size = vm->page_size == 4096 ?
|
||||
DEFAULT_STACK_PGS * vm->page_size :
|
||||
vm->page_size;
|
||||
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
|
||||
size_t stack_size;
|
||||
uint64_t stack_vaddr;
|
||||
struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
|
||||
|
||||
stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
|
||||
vm->page_size;
|
||||
stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
|
||||
MEM_REGION_DATA);
|
||||
|
||||
aarch64_vcpu_setup(vcpu, init);
|
||||
|
||||
vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
|
||||
@ -428,8 +441,8 @@ void route_exception(struct ex_regs *regs, int vector)
|
||||
|
||||
void vm_init_descriptor_tables(struct kvm_vm *vm)
|
||||
{
|
||||
vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers),
|
||||
vm->page_size);
|
||||
vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
|
||||
vm->page_size, MEM_REGION_DATA);
|
||||
|
||||
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
|
||||
}
|
||||
@ -486,9 +499,9 @@ void aarch64_get_supported_page_sizes(uint32_t ipa,
|
||||
err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®);
|
||||
TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
|
||||
|
||||
*ps4k = ((val >> 28) & 0xf) != 0xf;
|
||||
*ps64k = ((val >> 24) & 0xf) == 0;
|
||||
*ps16k = ((val >> 20) & 0xf) != 0;
|
||||
*ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN4), val) != 0xf;
|
||||
*ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN64), val) == 0;
|
||||
*ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN16), val) != 0;
|
||||
|
||||
close(vcpu_fd);
|
||||
close(vm_fd);
|
||||
|
@ -161,7 +161,8 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
|
||||
seg_vend |= vm->page_size - 1;
|
||||
size_t seg_size = seg_vend - seg_vstart + 1;
|
||||
|
||||
vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart);
|
||||
vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart,
|
||||
MEM_REGION_CODE);
|
||||
TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
|
||||
"virtual memory for segment at requested min addr,\n"
|
||||
" segment idx: %u\n"
|
||||
|
@ -186,13 +186,10 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
|
||||
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
|
||||
"Missing new mode params?");
|
||||
|
||||
struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
|
||||
struct kvm_vm *____vm_create(enum vm_guest_mode mode)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
|
||||
pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
|
||||
vm_guest_mode_string(mode), nr_pages);
|
||||
|
||||
vm = calloc(1, sizeof(*vm));
|
||||
TEST_ASSERT(vm != NULL, "Insufficient Memory");
|
||||
|
||||
@ -288,9 +285,6 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
|
||||
|
||||
/* Allocate and setup memory for guest. */
|
||||
vm->vpages_mapped = sparsebit_alloc();
|
||||
if (nr_pages != 0)
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
0, 0, nr_pages, 0);
|
||||
|
||||
return vm;
|
||||
}
|
||||
@ -337,8 +331,16 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
|
||||
nr_extra_pages);
|
||||
struct userspace_mem_region *slot0;
|
||||
struct kvm_vm *vm;
|
||||
int i;
|
||||
|
||||
vm = ____vm_create(mode, nr_pages);
|
||||
pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
|
||||
vm_guest_mode_string(mode), nr_pages);
|
||||
|
||||
vm = ____vm_create(mode);
|
||||
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
|
||||
for (i = 0; i < NR_MEM_REGIONS; i++)
|
||||
vm->memslots[i] = 0;
|
||||
|
||||
kvm_vm_elf_load(vm, program_invocation_name);
|
||||
|
||||
@ -649,6 +651,12 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
|
||||
sparsebit_free(®ion->unused_phy_pages);
|
||||
ret = munmap(region->mmap_start, region->mmap_size);
|
||||
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
|
||||
if (region->fd >= 0) {
|
||||
/* There's an extra map when using shared memory. */
|
||||
ret = munmap(region->mmap_alias, region->mmap_size);
|
||||
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
|
||||
close(region->fd);
|
||||
}
|
||||
|
||||
free(region);
|
||||
}
|
||||
@ -986,6 +994,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
|
||||
vm_mem_backing_src_alias(src_type)->name);
|
||||
}
|
||||
|
||||
region->backing_src_type = src_type;
|
||||
region->unused_phy_pages = sparsebit_alloc();
|
||||
sparsebit_set_num(region->unused_phy_pages,
|
||||
guest_paddr >> vm->page_shift, npages);
|
||||
@ -1280,32 +1289,15 @@ vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
|
||||
return pgidx_start * vm->page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Virtual Address Allocate
|
||||
*
|
||||
* Input Args:
|
||||
* vm - Virtual Machine
|
||||
* sz - Size in bytes
|
||||
* vaddr_min - Minimum starting virtual address
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
* Return:
|
||||
* Starting guest virtual address
|
||||
*
|
||||
* Allocates at least sz bytes within the virtual address space of the vm
|
||||
* given by vm. The allocated bytes are mapped to a virtual address >=
|
||||
* the address given by vaddr_min. Note that each allocation uses a
|
||||
* a unique set of pages, with the minimum real allocation being at least
|
||||
* a page.
|
||||
*/
|
||||
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
|
||||
vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
|
||||
enum kvm_mem_region_type type)
|
||||
{
|
||||
uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
|
||||
|
||||
virt_pgd_alloc(vm);
|
||||
vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
|
||||
KVM_UTIL_MIN_PFN * vm->page_size, 0);
|
||||
KVM_UTIL_MIN_PFN * vm->page_size,
|
||||
vm->memslots[type]);
|
||||
|
||||
/*
|
||||
* Find an unused range of virtual page addresses of at least
|
||||
@ -1325,6 +1317,30 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
|
||||
return vaddr_start;
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Virtual Address Allocate
|
||||
*
|
||||
* Input Args:
|
||||
* vm - Virtual Machine
|
||||
* sz - Size in bytes
|
||||
* vaddr_min - Minimum starting virtual address
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
* Return:
|
||||
* Starting guest virtual address
|
||||
*
|
||||
* Allocates at least sz bytes within the virtual address space of the vm
|
||||
* given by vm. The allocated bytes are mapped to a virtual address >=
|
||||
* the address given by vaddr_min. Note that each allocation uses a
|
||||
* a unique set of pages, with the minimum real allocation being at least
|
||||
* a page. The allocated physical space comes from the TEST_DATA memory region.
|
||||
*/
|
||||
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
|
||||
{
|
||||
return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA);
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Virtual Address Allocate Pages
|
||||
*
|
||||
@ -1344,6 +1360,11 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
|
||||
return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
|
||||
}
|
||||
|
||||
vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)
|
||||
{
|
||||
return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Virtual Address Allocate Page
|
||||
*
|
||||
@ -1570,7 +1591,7 @@ struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)
|
||||
|
||||
void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
uint32_t page_size = vcpu->vm->page_size;
|
||||
uint32_t page_size = getpagesize();
|
||||
uint32_t size = vcpu->vm->dirty_ring_size;
|
||||
|
||||
TEST_ASSERT(size > 0, "Should enable dirty ring first");
|
||||
@ -1911,7 +1932,8 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
|
||||
|
||||
vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
|
||||
{
|
||||
return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
|
||||
return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR,
|
||||
vm->memslots[MEM_REGION_PT]);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -292,6 +292,7 @@ void memstress_start_vcpu_threads(int nr_vcpus,
|
||||
|
||||
vcpu_thread_fn = vcpu_fn;
|
||||
WRITE_ONCE(all_vcpu_threads_running, false);
|
||||
WRITE_ONCE(memstress_args.stop_vcpus, false);
|
||||
|
||||
for (i = 0; i < nr_vcpus; i++) {
|
||||
struct vcpu_thread *vcpu = &vcpu_threads[i];
|
||||
@ -314,6 +315,8 @@ void memstress_join_vcpu_threads(int nr_vcpus)
|
||||
{
|
||||
int i;
|
||||
|
||||
WRITE_ONCE(memstress_args.stop_vcpus, true);
|
||||
|
||||
for (i = 0; i < nr_vcpus; i++)
|
||||
pthread_join(vcpu_threads[i].thread, NULL);
|
||||
}
|
||||
|
@ -55,13 +55,15 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
|
||||
|
||||
void virt_arch_pgd_alloc(struct kvm_vm *vm)
|
||||
{
|
||||
if (!vm->pgd_created) {
|
||||
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
|
||||
page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
|
||||
vm->pgd = paddr;
|
||||
vm->pgd_created = true;
|
||||
}
|
||||
size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
|
||||
|
||||
if (vm->pgd_created)
|
||||
return;
|
||||
|
||||
vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR,
|
||||
vm->memslots[MEM_REGION_PT]);
|
||||
vm->pgd_created = true;
|
||||
}
|
||||
|
||||
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
|
||||
@ -279,15 +281,18 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
void *guest_code)
|
||||
{
|
||||
int r;
|
||||
size_t stack_size = vm->page_size == 4096 ?
|
||||
DEFAULT_STACK_PGS * vm->page_size :
|
||||
vm->page_size;
|
||||
unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_RISCV_GUEST_STACK_VADDR_MIN);
|
||||
size_t stack_size;
|
||||
unsigned long stack_vaddr;
|
||||
unsigned long current_gp = 0;
|
||||
struct kvm_mp_state mps;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
|
||||
vm->page_size;
|
||||
stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
|
||||
MEM_REGION_DATA);
|
||||
|
||||
vcpu = __vm_vcpu_add(vm, vcpu_id);
|
||||
riscv_vcpu_mmu_setup(vcpu);
|
||||
|
||||
|
@ -21,7 +21,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
|
||||
return;
|
||||
|
||||
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR,
|
||||
vm->memslots[MEM_REGION_PT]);
|
||||
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
|
||||
|
||||
vm->pgd = paddr;
|
||||
@ -167,8 +168,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
|
||||
vm->page_size);
|
||||
|
||||
stack_vaddr = vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_GUEST_STACK_VADDR_MIN);
|
||||
stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_GUEST_STACK_VADDR_MIN,
|
||||
MEM_REGION_DATA);
|
||||
|
||||
vcpu = __vm_vcpu_add(vm, vcpu_id);
|
||||
|
||||
|
186
tools/testing/selftests/kvm/lib/userfaultfd_util.c
Normal file
186
tools/testing/selftests/kvm/lib/userfaultfd_util.c
Normal file
@ -0,0 +1,186 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* KVM userfaultfd util
|
||||
* Adapted from demand_paging_test.c
|
||||
*
|
||||
* Copyright (C) 2018, Red Hat, Inc.
|
||||
* Copyright (C) 2019-2022 Google LLC
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE /* for pipe2 */
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <poll.h>
|
||||
#include <pthread.h>
|
||||
#include <linux/userfaultfd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "test_util.h"
|
||||
#include "memstress.h"
|
||||
#include "userfaultfd_util.h"
|
||||
|
||||
#ifdef __NR_userfaultfd
|
||||
|
||||
static void *uffd_handler_thread_fn(void *arg)
|
||||
{
|
||||
struct uffd_desc *uffd_desc = (struct uffd_desc *)arg;
|
||||
int uffd = uffd_desc->uffd;
|
||||
int pipefd = uffd_desc->pipefds[0];
|
||||
useconds_t delay = uffd_desc->delay;
|
||||
int64_t pages = 0;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
while (1) {
|
||||
struct uffd_msg msg;
|
||||
struct pollfd pollfd[2];
|
||||
char tmp_chr;
|
||||
int r;
|
||||
|
||||
pollfd[0].fd = uffd;
|
||||
pollfd[0].events = POLLIN;
|
||||
pollfd[1].fd = pipefd;
|
||||
pollfd[1].events = POLLIN;
|
||||
|
||||
r = poll(pollfd, 2, -1);
|
||||
switch (r) {
|
||||
case -1:
|
||||
pr_info("poll err");
|
||||
continue;
|
||||
case 0:
|
||||
continue;
|
||||
case 1:
|
||||
break;
|
||||
default:
|
||||
pr_info("Polling uffd returned %d", r);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pollfd[0].revents & POLLERR) {
|
||||
pr_info("uffd revents has POLLERR");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pollfd[1].revents & POLLIN) {
|
||||
r = read(pollfd[1].fd, &tmp_chr, 1);
|
||||
TEST_ASSERT(r == 1,
|
||||
"Error reading pipefd in UFFD thread\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!(pollfd[0].revents & POLLIN))
|
||||
continue;
|
||||
|
||||
r = read(uffd, &msg, sizeof(msg));
|
||||
if (r == -1) {
|
||||
if (errno == EAGAIN)
|
||||
continue;
|
||||
pr_info("Read of uffd got errno %d\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (r != sizeof(msg)) {
|
||||
pr_info("Read on uffd returned unexpected size: %d bytes", r);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!(msg.event & UFFD_EVENT_PAGEFAULT))
|
||||
continue;
|
||||
|
||||
if (delay)
|
||||
usleep(delay);
|
||||
r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg);
|
||||
if (r < 0)
|
||||
return NULL;
|
||||
pages++;
|
||||
}
|
||||
|
||||
ts_diff = timespec_elapsed(start);
|
||||
PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
|
||||
pages, ts_diff.tv_sec, ts_diff.tv_nsec,
|
||||
pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
|
||||
void *hva, uint64_t len,
|
||||
uffd_handler_t handler)
|
||||
{
|
||||
struct uffd_desc *uffd_desc;
|
||||
bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
|
||||
int uffd;
|
||||
struct uffdio_api uffdio_api;
|
||||
struct uffdio_register uffdio_register;
|
||||
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
|
||||
int ret;
|
||||
|
||||
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
|
||||
is_minor ? "MINOR" : "MISSING",
|
||||
is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
|
||||
|
||||
uffd_desc = malloc(sizeof(struct uffd_desc));
|
||||
TEST_ASSERT(uffd_desc, "malloc failed");
|
||||
|
||||
/* In order to get minor faults, prefault via the alias. */
|
||||
if (is_minor)
|
||||
expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
|
||||
|
||||
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
|
||||
TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
|
||||
|
||||
uffdio_api.api = UFFD_API;
|
||||
uffdio_api.features = 0;
|
||||
TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
|
||||
"ioctl UFFDIO_API failed: %" PRIu64,
|
||||
(uint64_t)uffdio_api.api);
|
||||
|
||||
uffdio_register.range.start = (uint64_t)hva;
|
||||
uffdio_register.range.len = len;
|
||||
uffdio_register.mode = uffd_mode;
|
||||
TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
|
||||
"ioctl UFFDIO_REGISTER failed");
|
||||
TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
|
||||
expected_ioctls, "missing userfaultfd ioctls");
|
||||
|
||||
ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
|
||||
TEST_ASSERT(!ret, "Failed to set up pipefd");
|
||||
|
||||
uffd_desc->uffd_mode = uffd_mode;
|
||||
uffd_desc->uffd = uffd;
|
||||
uffd_desc->delay = delay;
|
||||
uffd_desc->handler = handler;
|
||||
pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
|
||||
uffd_desc);
|
||||
|
||||
PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
|
||||
hva, hva + len);
|
||||
|
||||
return uffd_desc;
|
||||
}
|
||||
|
||||
void uffd_stop_demand_paging(struct uffd_desc *uffd)
|
||||
{
|
||||
char c = 0;
|
||||
int ret;
|
||||
|
||||
ret = write(uffd->pipefds[1], &c, 1);
|
||||
TEST_ASSERT(ret == 1, "Unable to write to pipefd");
|
||||
|
||||
ret = pthread_join(uffd->thread, NULL);
|
||||
TEST_ASSERT(ret == 0, "Pthread_join failed.");
|
||||
|
||||
close(uffd->uffd);
|
||||
|
||||
close(uffd->pipefds[1]);
|
||||
close(uffd->pipefds[0]);
|
||||
|
||||
free(uffd);
|
||||
}
|
||||
|
||||
#endif /* __NR_userfaultfd */
|
@ -499,7 +499,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
|
||||
{
|
||||
if (!vm->gdt)
|
||||
vm->gdt = vm_vaddr_alloc_page(vm);
|
||||
vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
|
||||
|
||||
dt->base = vm->gdt;
|
||||
dt->limit = getpagesize();
|
||||
@ -509,7 +509,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
|
||||
int selector)
|
||||
{
|
||||
if (!vm->tss)
|
||||
vm->tss = vm_vaddr_alloc_page(vm);
|
||||
vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
|
||||
|
||||
memset(segp, 0, sizeof(*segp));
|
||||
segp->base = vm->tss;
|
||||
@ -599,8 +599,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
vm_vaddr_t stack_vaddr;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
|
||||
DEFAULT_GUEST_STACK_VADDR_MIN);
|
||||
stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
|
||||
DEFAULT_GUEST_STACK_VADDR_MIN,
|
||||
MEM_REGION_DATA);
|
||||
|
||||
vcpu = __vm_vcpu_add(vm, vcpu_id);
|
||||
vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
|
||||
@ -1093,8 +1094,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm)
|
||||
extern void *idt_handlers;
|
||||
int i;
|
||||
|
||||
vm->idt = vm_vaddr_alloc_page(vm);
|
||||
vm->handlers = vm_vaddr_alloc_page(vm);
|
||||
vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
|
||||
vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
|
||||
/* Handlers have the same address in both address spaces.*/
|
||||
for (i = 0; i < NUM_INTERRUPTS; i++)
|
||||
set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
|
||||
|
@ -34,8 +34,6 @@
|
||||
static int nr_vcpus = 1;
|
||||
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
|
||||
|
||||
static bool run_vcpus = true;
|
||||
|
||||
static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = vcpu_args->vcpu;
|
||||
@ -45,7 +43,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
|
||||
run = vcpu->run;
|
||||
|
||||
/* Let the guest access its memory until a stop signal is received */
|
||||
while (READ_ONCE(run_vcpus)) {
|
||||
while (!READ_ONCE(memstress_args.stop_vcpus)) {
|
||||
ret = _vcpu_run(vcpu);
|
||||
TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
|
||||
|
||||
@ -109,8 +107,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
|
||||
add_remove_memslot(vm, p->delay, p->nr_iterations);
|
||||
|
||||
run_vcpus = false;
|
||||
|
||||
memstress_join_vcpu_threads(nr_vcpus);
|
||||
pr_info("All vCPU threads joined\n");
|
||||
|
||||
|
@ -20,20 +20,20 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include <test_util.h>
|
||||
#include <kvm_util.h>
|
||||
#include <processor.h>
|
||||
|
||||
#define MEM_SIZE ((512U << 20) + 4096)
|
||||
#define MEM_SIZE_PAGES (MEM_SIZE / 4096)
|
||||
#define MEM_GPA 0x10000000UL
|
||||
#define MEM_EXTRA_SIZE SZ_64K
|
||||
|
||||
#define MEM_SIZE (SZ_512M + MEM_EXTRA_SIZE)
|
||||
#define MEM_GPA SZ_256M
|
||||
#define MEM_AUX_GPA MEM_GPA
|
||||
#define MEM_SYNC_GPA MEM_AUX_GPA
|
||||
#define MEM_TEST_GPA (MEM_AUX_GPA + 4096)
|
||||
#define MEM_TEST_SIZE (MEM_SIZE - 4096)
|
||||
static_assert(MEM_SIZE % 4096 == 0, "invalid mem size");
|
||||
static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
|
||||
#define MEM_TEST_GPA (MEM_AUX_GPA + MEM_EXTRA_SIZE)
|
||||
#define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE)
|
||||
|
||||
/*
|
||||
* 32 MiB is max size that gets well over 100 iterations on 509 slots.
|
||||
@ -41,44 +41,38 @@ static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
|
||||
* 8194 slots in use can then be tested (although with slightly
|
||||
* limited resolution).
|
||||
*/
|
||||
#define MEM_SIZE_MAP ((32U << 20) + 4096)
|
||||
#define MEM_SIZE_MAP_PAGES (MEM_SIZE_MAP / 4096)
|
||||
#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096)
|
||||
#define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096)
|
||||
static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size");
|
||||
static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size");
|
||||
static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size");
|
||||
static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size");
|
||||
#define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE)
|
||||
#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE)
|
||||
|
||||
/*
|
||||
* 128 MiB is min size that fills 32k slots with at least one page in each
|
||||
* while at the same time gets 100+ iterations in such test
|
||||
*
|
||||
* 2 MiB chunk size like a typical huge page
|
||||
*/
|
||||
#define MEM_TEST_UNMAP_SIZE (128U << 20)
|
||||
#define MEM_TEST_UNMAP_SIZE_PAGES (MEM_TEST_UNMAP_SIZE / 4096)
|
||||
/* 2 MiB chunk size like a typical huge page */
|
||||
#define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12))
|
||||
static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE,
|
||||
"invalid unmap test region size");
|
||||
static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0,
|
||||
"invalid unmap test region size");
|
||||
static_assert(MEM_TEST_UNMAP_SIZE_PAGES %
|
||||
(2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0,
|
||||
"invalid unmap test region size");
|
||||
#define MEM_TEST_UNMAP_SIZE SZ_128M
|
||||
#define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M
|
||||
|
||||
/*
|
||||
* For the move active test the middle of the test area is placed on
|
||||
* a memslot boundary: half lies in the memslot being moved, half in
|
||||
* other memslot(s).
|
||||
*
|
||||
* When running this test with 32k memslots (32764, really) each memslot
|
||||
* contains 4 pages.
|
||||
* The last one additionally contains the remaining 21 pages of memory,
|
||||
* for the total size of 25 pages.
|
||||
* Hence, the maximum size here is 50 pages.
|
||||
* We have different number of memory slots, excluding the reserved
|
||||
* memory slot 0, on various architectures and configurations. The
|
||||
* memory size in this test is calculated by picking the maximal
|
||||
* last memory slot's memory size, with alignment to the largest
|
||||
* supported page size (64KB). In this way, the selected memory
|
||||
* size for this test is compatible with test_memslot_move_prepare().
|
||||
*
|
||||
* architecture slots memory-per-slot memory-on-last-slot
|
||||
* --------------------------------------------------------------
|
||||
* x86-4KB 32763 16KB 160KB
|
||||
* arm64-4KB 32766 16KB 112KB
|
||||
* arm64-16KB 32766 16KB 112KB
|
||||
* arm64-64KB 8192 64KB 128KB
|
||||
*/
|
||||
#define MEM_TEST_MOVE_SIZE_PAGES (50)
|
||||
#define MEM_TEST_MOVE_SIZE (MEM_TEST_MOVE_SIZE_PAGES * 4096)
|
||||
#define MEM_TEST_MOVE_SIZE (3 * SZ_64K)
|
||||
#define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE)
|
||||
static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
|
||||
"invalid move test region size");
|
||||
@ -100,6 +94,7 @@ struct vm_data {
|
||||
};
|
||||
|
||||
struct sync_area {
|
||||
uint32_t guest_page_size;
|
||||
atomic_bool start_flag;
|
||||
atomic_bool exit_flag;
|
||||
atomic_bool sync_flag;
|
||||
@ -192,14 +187,15 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
|
||||
uint64_t gpage, pgoffs;
|
||||
uint32_t slot, slotoffs;
|
||||
void *base;
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
|
||||
TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
|
||||
TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096,
|
||||
TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size,
|
||||
"Too high gpa to translate");
|
||||
gpa -= MEM_GPA;
|
||||
|
||||
gpage = gpa / 4096;
|
||||
pgoffs = gpa % 4096;
|
||||
gpage = gpa / guest_page_size;
|
||||
pgoffs = gpa % guest_page_size;
|
||||
slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
|
||||
slotoffs = gpage - (slot * data->pages_per_slot);
|
||||
|
||||
@ -217,14 +213,16 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
|
||||
}
|
||||
|
||||
base = data->hva_slots[slot];
|
||||
return (uint8_t *)base + slotoffs * 4096 + pgoffs;
|
||||
return (uint8_t *)base + slotoffs * guest_page_size + pgoffs;
|
||||
}
|
||||
|
||||
static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
|
||||
{
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
|
||||
TEST_ASSERT(slot < data->nslots, "Too high slot number");
|
||||
|
||||
return MEM_GPA + slot * data->pages_per_slot * 4096;
|
||||
return MEM_GPA + slot * data->pages_per_slot * guest_page_size;
|
||||
}
|
||||
|
||||
static struct vm_data *alloc_vm(void)
|
||||
@ -241,81 +239,111 @@ static struct vm_data *alloc_vm(void)
|
||||
return data;
|
||||
}
|
||||
|
||||
static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size,
|
||||
uint64_t pages_per_slot, uint64_t rempages)
|
||||
{
|
||||
if (!pages_per_slot)
|
||||
return false;
|
||||
|
||||
if ((pages_per_slot * guest_page_size) % host_page_size)
|
||||
return false;
|
||||
|
||||
if ((rempages * guest_page_size) % host_page_size)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size)
|
||||
{
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
uint64_t mempages, pages_per_slot, rempages;
|
||||
uint64_t slots;
|
||||
|
||||
mempages = data->npages;
|
||||
slots = data->nslots;
|
||||
while (--slots > 1) {
|
||||
pages_per_slot = mempages / slots;
|
||||
rempages = mempages % pages_per_slot;
|
||||
if (check_slot_pages(host_page_size, guest_page_size,
|
||||
pages_per_slot, rempages))
|
||||
return slots + 1; /* slot 0 is reserved */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
|
||||
void *guest_code, uint64_t mempages,
|
||||
void *guest_code, uint64_t mem_size,
|
||||
struct timespec *slot_runtime)
|
||||
{
|
||||
uint32_t max_mem_slots;
|
||||
uint64_t rempages;
|
||||
uint64_t mempages, rempages;
|
||||
uint64_t guest_addr;
|
||||
uint32_t slot;
|
||||
uint32_t slot, host_page_size, guest_page_size;
|
||||
struct timespec tstart;
|
||||
struct sync_area *sync;
|
||||
|
||||
max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
|
||||
TEST_ASSERT(max_mem_slots > 1,
|
||||
"KVM_CAP_NR_MEMSLOTS should be greater than 1");
|
||||
TEST_ASSERT(nslots > 1 || nslots == -1,
|
||||
"Slot count cap should be greater than 1");
|
||||
if (nslots != -1)
|
||||
max_mem_slots = min(max_mem_slots, (uint32_t)nslots);
|
||||
pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots);
|
||||
host_page_size = getpagesize();
|
||||
guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
|
||||
mempages = mem_size / guest_page_size;
|
||||
|
||||
TEST_ASSERT(mempages > 1,
|
||||
"Can't test without any memory");
|
||||
data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
|
||||
TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size");
|
||||
|
||||
data->npages = mempages;
|
||||
data->nslots = max_mem_slots - 1;
|
||||
data->pages_per_slot = mempages / data->nslots;
|
||||
if (!data->pages_per_slot) {
|
||||
*maxslots = mempages + 1;
|
||||
TEST_ASSERT(data->npages > 1, "Can't test without any memory");
|
||||
data->nslots = nslots;
|
||||
data->pages_per_slot = data->npages / data->nslots;
|
||||
rempages = data->npages % data->nslots;
|
||||
if (!check_slot_pages(host_page_size, guest_page_size,
|
||||
data->pages_per_slot, rempages)) {
|
||||
*maxslots = get_max_slots(data, host_page_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
rempages = mempages % data->nslots;
|
||||
data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
|
||||
TEST_ASSERT(data->hva_slots, "malloc() fail");
|
||||
|
||||
data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
|
||||
|
||||
pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
|
||||
max_mem_slots - 1, data->pages_per_slot, rempages);
|
||||
data->nslots, data->pages_per_slot, rempages);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &tstart);
|
||||
for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) {
|
||||
for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
|
||||
uint64_t npages;
|
||||
|
||||
npages = data->pages_per_slot;
|
||||
if (slot == max_mem_slots - 1)
|
||||
if (slot == data->nslots)
|
||||
npages += rempages;
|
||||
|
||||
vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
|
||||
guest_addr, slot, npages,
|
||||
0);
|
||||
guest_addr += npages * 4096;
|
||||
guest_addr += npages * guest_page_size;
|
||||
}
|
||||
*slot_runtime = timespec_elapsed(tstart);
|
||||
|
||||
for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) {
|
||||
for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
|
||||
uint64_t npages;
|
||||
uint64_t gpa;
|
||||
|
||||
npages = data->pages_per_slot;
|
||||
if (slot == max_mem_slots - 2)
|
||||
if (slot == data->nslots)
|
||||
npages += rempages;
|
||||
|
||||
gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr,
|
||||
slot + 1);
|
||||
gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot);
|
||||
TEST_ASSERT(gpa == guest_addr,
|
||||
"vm_phy_pages_alloc() failed\n");
|
||||
|
||||
data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr);
|
||||
memset(data->hva_slots[slot], 0, npages * 4096);
|
||||
data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr);
|
||||
memset(data->hva_slots[slot - 1], 0, npages * guest_page_size);
|
||||
|
||||
guest_addr += npages * 4096;
|
||||
guest_addr += npages * guest_page_size;
|
||||
}
|
||||
|
||||
virt_map(data->vm, MEM_GPA, MEM_GPA, mempages);
|
||||
virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages);
|
||||
|
||||
sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
|
||||
atomic_init(&sync->start_flag, false);
|
||||
@ -414,6 +442,7 @@ static bool guest_perform_sync(void)
|
||||
static void guest_code_test_memslot_move(void)
|
||||
{
|
||||
struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
|
||||
uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
|
||||
uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
|
||||
|
||||
GUEST_SYNC(0);
|
||||
@ -424,7 +453,7 @@ static void guest_code_test_memslot_move(void)
|
||||
uintptr_t ptr;
|
||||
|
||||
for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
|
||||
ptr += 4096)
|
||||
ptr += page_size)
|
||||
*(uint64_t *)ptr = MEM_TEST_VAL_1;
|
||||
|
||||
/*
|
||||
@ -442,6 +471,7 @@ static void guest_code_test_memslot_move(void)
|
||||
static void guest_code_test_memslot_map(void)
|
||||
{
|
||||
struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
|
||||
uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
|
||||
|
||||
GUEST_SYNC(0);
|
||||
|
||||
@ -451,14 +481,16 @@ static void guest_code_test_memslot_map(void)
|
||||
uintptr_t ptr;
|
||||
|
||||
for (ptr = MEM_TEST_GPA;
|
||||
ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096)
|
||||
ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
|
||||
ptr += page_size)
|
||||
*(uint64_t *)ptr = MEM_TEST_VAL_1;
|
||||
|
||||
if (!guest_perform_sync())
|
||||
break;
|
||||
|
||||
for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
|
||||
ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096)
|
||||
ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE;
|
||||
ptr += page_size)
|
||||
*(uint64_t *)ptr = MEM_TEST_VAL_2;
|
||||
|
||||
if (!guest_perform_sync())
|
||||
@ -505,6 +537,9 @@ static void guest_code_test_memslot_unmap(void)
|
||||
|
||||
static void guest_code_test_memslot_rw(void)
|
||||
{
|
||||
struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
|
||||
uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
|
||||
|
||||
GUEST_SYNC(0);
|
||||
|
||||
guest_spin_until_start();
|
||||
@ -513,14 +548,14 @@ static void guest_code_test_memslot_rw(void)
|
||||
uintptr_t ptr;
|
||||
|
||||
for (ptr = MEM_TEST_GPA;
|
||||
ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096)
|
||||
ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size)
|
||||
*(uint64_t *)ptr = MEM_TEST_VAL_1;
|
||||
|
||||
if (!guest_perform_sync())
|
||||
break;
|
||||
|
||||
for (ptr = MEM_TEST_GPA + 4096 / 2;
|
||||
ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) {
|
||||
for (ptr = MEM_TEST_GPA + page_size / 2;
|
||||
ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
|
||||
uint64_t val = *(uint64_t *)ptr;
|
||||
|
||||
GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
|
||||
@ -538,6 +573,7 @@ static bool test_memslot_move_prepare(struct vm_data *data,
|
||||
struct sync_area *sync,
|
||||
uint64_t *maxslots, bool isactive)
|
||||
{
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
uint64_t movesrcgpa, movetestgpa;
|
||||
|
||||
movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
|
||||
@ -546,7 +582,7 @@ static bool test_memslot_move_prepare(struct vm_data *data,
|
||||
uint64_t lastpages;
|
||||
|
||||
vm_gpa2hva(data, movesrcgpa, &lastpages);
|
||||
if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) {
|
||||
if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) {
|
||||
*maxslots = 0;
|
||||
return false;
|
||||
}
|
||||
@ -592,8 +628,9 @@ static void test_memslot_do_unmap(struct vm_data *data,
|
||||
uint64_t offsp, uint64_t count)
|
||||
{
|
||||
uint64_t gpa, ctr;
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
|
||||
for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) {
|
||||
for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) {
|
||||
uint64_t npages;
|
||||
void *hva;
|
||||
int ret;
|
||||
@ -601,12 +638,12 @@ static void test_memslot_do_unmap(struct vm_data *data,
|
||||
hva = vm_gpa2hva(data, gpa, &npages);
|
||||
TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
|
||||
npages = min(npages, count - ctr);
|
||||
ret = madvise(hva, npages * 4096, MADV_DONTNEED);
|
||||
ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED);
|
||||
TEST_ASSERT(!ret,
|
||||
"madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
|
||||
hva, gpa);
|
||||
ctr += npages;
|
||||
gpa += npages * 4096;
|
||||
gpa += npages * guest_page_size;
|
||||
}
|
||||
TEST_ASSERT(ctr == count,
|
||||
"madvise(MADV_DONTNEED) should exactly cover all of the requested area");
|
||||
@ -617,11 +654,12 @@ static void test_memslot_map_unmap_check(struct vm_data *data,
|
||||
{
|
||||
uint64_t gpa;
|
||||
uint64_t *val;
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
|
||||
if (!map_unmap_verify)
|
||||
return;
|
||||
|
||||
gpa = MEM_TEST_GPA + offsp * 4096;
|
||||
gpa = MEM_TEST_GPA + offsp * guest_page_size;
|
||||
val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
|
||||
TEST_ASSERT(*val == valexp,
|
||||
"Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
|
||||
@ -631,12 +669,14 @@ static void test_memslot_map_unmap_check(struct vm_data *data,
|
||||
|
||||
static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
|
||||
{
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size;
|
||||
|
||||
/*
|
||||
* Unmap the second half of the test area while guest writes to (maps)
|
||||
* the first half.
|
||||
*/
|
||||
test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2,
|
||||
MEM_TEST_MAP_SIZE_PAGES / 2);
|
||||
test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2);
|
||||
|
||||
/*
|
||||
* Wait for the guest to finish writing the first half of the test
|
||||
@ -647,10 +687,8 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
|
||||
*/
|
||||
host_perform_sync(sync);
|
||||
test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
|
||||
test_memslot_map_unmap_check(data,
|
||||
MEM_TEST_MAP_SIZE_PAGES / 2 - 1,
|
||||
MEM_TEST_VAL_1);
|
||||
test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2);
|
||||
test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1);
|
||||
test_memslot_do_unmap(data, 0, guest_pages / 2);
|
||||
|
||||
|
||||
/*
|
||||
@ -663,16 +701,16 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
|
||||
* the test area.
|
||||
*/
|
||||
host_perform_sync(sync);
|
||||
test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2,
|
||||
MEM_TEST_VAL_2);
|
||||
test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1,
|
||||
MEM_TEST_VAL_2);
|
||||
test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
|
||||
test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2);
|
||||
}
|
||||
|
||||
static void test_memslot_unmap_loop_common(struct vm_data *data,
|
||||
struct sync_area *sync,
|
||||
uint64_t chunk)
|
||||
{
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size;
|
||||
uint64_t ctr;
|
||||
|
||||
/*
|
||||
@ -684,42 +722,49 @@ static void test_memslot_unmap_loop_common(struct vm_data *data,
|
||||
*/
|
||||
host_perform_sync(sync);
|
||||
test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
|
||||
for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk)
|
||||
for (ctr = 0; ctr < guest_pages / 2; ctr += chunk)
|
||||
test_memslot_do_unmap(data, ctr, chunk);
|
||||
|
||||
/* Likewise, but for the opposite host / guest areas */
|
||||
host_perform_sync(sync);
|
||||
test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2,
|
||||
MEM_TEST_VAL_2);
|
||||
for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2;
|
||||
ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk)
|
||||
test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
|
||||
for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk)
|
||||
test_memslot_do_unmap(data, ctr, chunk);
|
||||
}
|
||||
|
||||
static void test_memslot_unmap_loop(struct vm_data *data,
|
||||
struct sync_area *sync)
|
||||
{
|
||||
test_memslot_unmap_loop_common(data, sync, 1);
|
||||
uint32_t host_page_size = getpagesize();
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
uint64_t guest_chunk_pages = guest_page_size >= host_page_size ?
|
||||
1 : host_page_size / guest_page_size;
|
||||
|
||||
test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
|
||||
}
|
||||
|
||||
static void test_memslot_unmap_loop_chunked(struct vm_data *data,
|
||||
struct sync_area *sync)
|
||||
{
|
||||
test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES);
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size;
|
||||
|
||||
test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
|
||||
}
|
||||
|
||||
static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
|
||||
{
|
||||
uint64_t gptr;
|
||||
uint32_t guest_page_size = data->vm->page_size;
|
||||
|
||||
for (gptr = MEM_TEST_GPA + 4096 / 2;
|
||||
gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096)
|
||||
for (gptr = MEM_TEST_GPA + guest_page_size / 2;
|
||||
gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size)
|
||||
*(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
|
||||
|
||||
host_perform_sync(sync);
|
||||
|
||||
for (gptr = MEM_TEST_GPA;
|
||||
gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) {
|
||||
gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) {
|
||||
uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
|
||||
uint64_t val = *vptr;
|
||||
|
||||
@ -748,7 +793,7 @@ static bool test_execute(int nslots, uint64_t *maxslots,
|
||||
struct timespec *slot_runtime,
|
||||
struct timespec *guest_runtime)
|
||||
{
|
||||
uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES;
|
||||
uint64_t mem_size = tdata->mem_size ? : MEM_SIZE;
|
||||
struct vm_data *data;
|
||||
struct sync_area *sync;
|
||||
struct timespec tstart;
|
||||
@ -763,6 +808,7 @@ static bool test_execute(int nslots, uint64_t *maxslots,
|
||||
|
||||
sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
|
||||
|
||||
sync->guest_page_size = data->vm->page_size;
|
||||
if (tdata->prepare &&
|
||||
!tdata->prepare(data, sync, maxslots)) {
|
||||
ret = false;
|
||||
@ -796,19 +842,19 @@ static bool test_execute(int nslots, uint64_t *maxslots,
|
||||
static const struct test_data tests[] = {
|
||||
{
|
||||
.name = "map",
|
||||
.mem_size = MEM_SIZE_MAP_PAGES,
|
||||
.mem_size = MEM_SIZE_MAP,
|
||||
.guest_code = guest_code_test_memslot_map,
|
||||
.loop = test_memslot_map_loop,
|
||||
},
|
||||
{
|
||||
.name = "unmap",
|
||||
.mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
|
||||
.mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
|
||||
.guest_code = guest_code_test_memslot_unmap,
|
||||
.loop = test_memslot_unmap_loop,
|
||||
},
|
||||
{
|
||||
.name = "unmap chunked",
|
||||
.mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
|
||||
.mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
|
||||
.guest_code = guest_code_test_memslot_unmap,
|
||||
.loop = test_memslot_unmap_loop_chunked,
|
||||
},
|
||||
@ -866,9 +912,46 @@ static void help(char *name, struct test_args *targs)
|
||||
pr_info("%d: %s\n", ctr, tests[ctr].name);
|
||||
}
|
||||
|
||||
static bool check_memory_sizes(void)
|
||||
{
|
||||
uint32_t host_page_size = getpagesize();
|
||||
uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
|
||||
|
||||
if (host_page_size > SZ_64K || guest_page_size > SZ_64K) {
|
||||
pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n",
|
||||
host_page_size, guest_page_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (MEM_SIZE % guest_page_size ||
|
||||
MEM_TEST_SIZE % guest_page_size) {
|
||||
pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (MEM_SIZE_MAP % guest_page_size ||
|
||||
MEM_TEST_MAP_SIZE % guest_page_size ||
|
||||
(MEM_TEST_MAP_SIZE / guest_page_size) <= 2 ||
|
||||
(MEM_TEST_MAP_SIZE / guest_page_size) % 2) {
|
||||
pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE ||
|
||||
MEM_TEST_UNMAP_SIZE % guest_page_size ||
|
||||
(MEM_TEST_UNMAP_SIZE / guest_page_size) %
|
||||
(2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) {
|
||||
pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_args(int argc, char *argv[],
|
||||
struct test_args *targs)
|
||||
{
|
||||
uint32_t max_mem_slots;
|
||||
int opt;
|
||||
|
||||
while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
|
||||
@ -885,8 +968,8 @@ static bool parse_args(int argc, char *argv[],
|
||||
break;
|
||||
case 's':
|
||||
targs->nslots = atoi_paranoid(optarg);
|
||||
if (targs->nslots <= 0 && targs->nslots != -1) {
|
||||
pr_info("Slot count cap has to be positive or -1 for no cap\n");
|
||||
if (targs->nslots <= 1 && targs->nslots != -1) {
|
||||
pr_info("Slot count cap must be larger than 1 or -1 for no cap\n");
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@ -920,6 +1003,21 @@ static bool parse_args(int argc, char *argv[],
|
||||
return false;
|
||||
}
|
||||
|
||||
max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
|
||||
if (max_mem_slots <= 1) {
|
||||
pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Memory slot 0 is reserved */
|
||||
if (targs->nslots == -1)
|
||||
targs->nslots = max_mem_slots - 1;
|
||||
else
|
||||
targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1;
|
||||
|
||||
pr_info_v("Allowed Number of memory slots: %"PRIu32"\n",
|
||||
targs->nslots + 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -994,6 +1092,9 @@ int main(int argc, char *argv[])
|
||||
struct test_result rbestslottime;
|
||||
int tctr;
|
||||
|
||||
if (!check_memory_sizes())
|
||||
return -1;
|
||||
|
||||
if (!parse_args(argc, argv, &targs))
|
||||
return -1;
|
||||
|
||||
|
@ -33,6 +33,12 @@ config HAVE_KVM_DIRTY_RING_ACQ_REL
|
||||
bool
|
||||
select HAVE_KVM_DIRTY_RING
|
||||
|
||||
# Allow enabling both the dirty bitmap and dirty ring. Only architectures
|
||||
# that need to dirty memory outside of a vCPU context should select this.
|
||||
config NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
bool
|
||||
depends on HAVE_KVM_DIRTY_RING
|
||||
|
||||
config HAVE_KVM_EVENTFD
|
||||
bool
|
||||
select EVENTFD
|
||||
|
@ -21,12 +21,26 @@ u32 kvm_dirty_ring_get_rsvd_entries(void)
|
||||
return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size();
|
||||
}
|
||||
|
||||
bool kvm_use_dirty_bitmap(struct kvm *kvm)
|
||||
{
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
return !kvm->dirty_ring_size || kvm->dirty_ring_with_bitmap;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring)
|
||||
{
|
||||
return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index);
|
||||
}
|
||||
|
||||
bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
|
||||
static bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
|
||||
{
|
||||
return kvm_dirty_ring_used(ring) >= ring->soft_limit;
|
||||
}
|
||||
@ -142,13 +156,19 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
|
||||
|
||||
kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
|
||||
|
||||
/*
|
||||
* The request KVM_REQ_DIRTY_RING_SOFT_FULL will be cleared
|
||||
* by the VCPU thread next time when it enters the guest.
|
||||
*/
|
||||
|
||||
trace_kvm_dirty_ring_reset(ring);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
|
||||
void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset)
|
||||
{
|
||||
struct kvm_dirty_ring *ring = &vcpu->dirty_ring;
|
||||
struct kvm_dirty_gfn *entry;
|
||||
|
||||
/* It should never get full */
|
||||
@ -166,6 +186,28 @@ void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
|
||||
kvm_dirty_gfn_set_dirtied(entry);
|
||||
ring->dirty_index++;
|
||||
trace_kvm_dirty_ring_push(ring, slot, offset);
|
||||
|
||||
if (kvm_dirty_ring_soft_full(ring))
|
||||
kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu);
|
||||
}
|
||||
|
||||
bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* The VCPU isn't runnable when the dirty ring becomes soft full.
|
||||
* The KVM_REQ_DIRTY_RING_SOFT_FULL event is always set to prevent
|
||||
* the VCPU from running until the dirty pages are harvested and
|
||||
* the dirty ring is reset by userspace.
|
||||
*/
|
||||
if (kvm_check_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu) &&
|
||||
kvm_dirty_ring_soft_full(&vcpu->dirty_ring)) {
|
||||
kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu);
|
||||
vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
|
||||
trace_kvm_dirty_ring_exit(vcpu);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset)
|
||||
|
@ -1617,7 +1617,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
|
||||
new->dirty_bitmap = NULL;
|
||||
else if (old && old->dirty_bitmap)
|
||||
new->dirty_bitmap = old->dirty_bitmap;
|
||||
else if (!kvm->dirty_ring_size) {
|
||||
else if (kvm_use_dirty_bitmap(kvm)) {
|
||||
r = kvm_alloc_dirty_bitmap(new);
|
||||
if (r)
|
||||
return r;
|
||||
@ -2068,8 +2068,8 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
|
||||
unsigned long n;
|
||||
unsigned long any = 0;
|
||||
|
||||
/* Dirty ring tracking is exclusive to dirty log tracking */
|
||||
if (kvm->dirty_ring_size)
|
||||
/* Dirty ring tracking may be exclusive to dirty log tracking */
|
||||
if (!kvm_use_dirty_bitmap(kvm))
|
||||
return -ENXIO;
|
||||
|
||||
*memslot = NULL;
|
||||
@ -2133,8 +2133,8 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
unsigned long *dirty_bitmap_buffer;
|
||||
bool flush;
|
||||
|
||||
/* Dirty ring tracking is exclusive to dirty log tracking */
|
||||
if (kvm->dirty_ring_size)
|
||||
/* Dirty ring tracking may be exclusive to dirty log tracking */
|
||||
if (!kvm_use_dirty_bitmap(kvm))
|
||||
return -ENXIO;
|
||||
|
||||
as_id = log->slot >> 16;
|
||||
@ -2245,8 +2245,8 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
|
||||
unsigned long *dirty_bitmap_buffer;
|
||||
bool flush;
|
||||
|
||||
/* Dirty ring tracking is exclusive to dirty log tracking */
|
||||
if (kvm->dirty_ring_size)
|
||||
/* Dirty ring tracking may be exclusive to dirty log tracking */
|
||||
if (!kvm_use_dirty_bitmap(kvm))
|
||||
return -ENXIO;
|
||||
|
||||
as_id = log->slot >> 16;
|
||||
@ -3321,18 +3321,19 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
|
||||
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_DIRTY_RING
|
||||
if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm))
|
||||
if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!vcpu && !kvm_arch_allow_write_without_running_vcpu(kvm));
|
||||
#endif
|
||||
|
||||
if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
|
||||
unsigned long rel_gfn = gfn - memslot->base_gfn;
|
||||
u32 slot = (memslot->as_id << 16) | memslot->id;
|
||||
|
||||
if (kvm->dirty_ring_size)
|
||||
kvm_dirty_ring_push(&vcpu->dirty_ring,
|
||||
slot, rel_gfn);
|
||||
else
|
||||
if (kvm->dirty_ring_size && vcpu)
|
||||
kvm_dirty_ring_push(vcpu, slot, rel_gfn);
|
||||
else if (memslot->dirty_bitmap)
|
||||
set_bit_le(rel_gfn, memslot->dirty_bitmap);
|
||||
}
|
||||
}
|
||||
@ -4499,6 +4500,9 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
||||
return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
#ifdef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
|
||||
case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP:
|
||||
#endif
|
||||
case KVM_CAP_BINARY_STATS_FD:
|
||||
case KVM_CAP_SYSTEM_EVENT_DATA:
|
||||
@ -4575,6 +4579,20 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static bool kvm_are_all_memslots_empty(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
if (!kvm_memslots_empty(__kvm_memslots(kvm, i)))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap)
|
||||
{
|
||||
@ -4605,6 +4623,29 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
|
||||
return -EINVAL;
|
||||
|
||||
return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
|
||||
case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP: {
|
||||
int r = -EINVAL;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP) ||
|
||||
!kvm->dirty_ring_size || cap->flags)
|
||||
return r;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
/*
|
||||
* For simplicity, allow enabling ring+bitmap if and only if
|
||||
* there are no memslots, e.g. to ensure all memslots allocate
|
||||
* a bitmap after the capability is enabled.
|
||||
*/
|
||||
if (kvm_are_all_memslots_empty(kvm)) {
|
||||
kvm->dirty_ring_with_bitmap = true;
|
||||
r = 0;
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
default:
|
||||
return kvm_vm_ioctl_enable_cap(kvm, cap);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user