mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-19 20:12:32 +00:00
Merge branch kvm-arm64/eager-page-splitting into kvmarm/next
* kvm-arm64/eager-page-splitting: : Eager Page Splitting, courtesy of Ricardo Koller. : : Dirty logging performance is dominated by the cost of splitting : hugepages to PTE granularity. On systems that mere mortals can get their : hands on, each fault incurs the cost of a full break-before-make : pattern, wherein the broadcast invalidation and ensuing serialization : significantly increases fault latency. : : The goal of eager page splitting is to move the cost of hugepage : splitting out of the stage-2 fault path and instead into the ioctls : responsible for managing the dirty log: : : - If manual protection is enabled for the VM, hugepage splitting : happens in the KVM_CLEAR_DIRTY_LOG ioctl. This is desirable as it : provides userspace granular control over hugepage splitting. : : - Otherwise, if userspace relies on the legacy dirty log behavior : (clear on collection), hugepage splitting is done at the moment dirty : logging is enabled for a particular memslot. : : Support for eager page splitting requires explicit opt-in from : userspace, which is realized through the : KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE capability. arm64: kvm: avoid overflow in integer division KVM: arm64: Use local TLBI on permission relaxation KVM: arm64: Split huge pages during KVM_CLEAR_DIRTY_LOG KVM: arm64: Open-code kvm_mmu_write_protect_pt_masked() KVM: arm64: Split huge pages when dirty logging is enabled KVM: arm64: Add kvm_uninit_stage2_mmu() KVM: arm64: Refactor kvm_arch_commit_memory_region() KVM: arm64: Add kvm_pgtable_stage2_split() KVM: arm64: Add KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE KVM: arm64: Export kvm_are_all_memslots_empty() KVM: arm64: Add helper for creating unlinked stage2 subtrees KVM: arm64: Add KVM_PGTABLE_WALK flags for skipping CMOs and BBM TLBIs KVM: arm64: Rename free_removed to free_unlinked Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
This commit is contained in:
commit
83510396c0
@ -8445,6 +8445,33 @@ structure.
|
||||
When getting the Modified Change Topology Report value, the attr->addr
|
||||
must point to a byte where the value will be stored or retrieved from.
|
||||
|
||||
8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
|
||||
---------------------------------------
|
||||
|
||||
:Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
|
||||
:Architectures: arm64
|
||||
:Type: vm
|
||||
:Parameters: arg[0] is the new split chunk size.
|
||||
:Returns: 0 on success, -EINVAL if any memslot was already created.
|
||||
|
||||
This capability sets the chunk size used in Eager Page Splitting.
|
||||
|
||||
Eager Page Splitting improves the performance of dirty-logging (used
|
||||
in live migrations) when guest memory is backed by huge-pages. It
|
||||
avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing
|
||||
it eagerly when enabling dirty logging (with the
|
||||
KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using
|
||||
KVM_CLEAR_DIRTY_LOG.
|
||||
|
||||
The chunk size specifies how many pages to break at a time, using a
|
||||
single allocation for each chunk. Bigger the chunk size, more pages
|
||||
need to be allocated ahead of time.
|
||||
|
||||
The chunk size needs to be a valid block size. The list of acceptable
|
||||
block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
|
||||
64-bit bitmap (each bit describing a block size). The default value is
|
||||
0, to disable the eager page splitting.
|
||||
|
||||
9. Known KVM API problems
|
||||
=========================
|
||||
|
||||
|
@ -68,6 +68,7 @@ enum __kvm_host_smccc_func {
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
|
||||
@ -225,6 +226,9 @@ extern void __kvm_flush_vm_context(void);
|
||||
extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
|
||||
extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
|
||||
int level);
|
||||
extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t ipa,
|
||||
int level);
|
||||
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
|
||||
|
||||
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
|
||||
|
@ -159,6 +159,21 @@ struct kvm_s2_mmu {
|
||||
/* The last vcpu id that ran on each physical CPU */
|
||||
int __percpu *last_vcpu_ran;
|
||||
|
||||
#define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0
|
||||
/*
|
||||
* Memory cache used to split
|
||||
* KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It
|
||||
* is used to allocate stage2 page tables while splitting huge
|
||||
* pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
|
||||
* influences both the capacity of the split page cache, and
|
||||
* how often KVM reschedules. Be wary of raising CHUNK_SIZE
|
||||
* too high.
|
||||
*
|
||||
* Protected by kvm->slots_lock.
|
||||
*/
|
||||
struct kvm_mmu_memory_cache split_page_cache;
|
||||
uint64_t split_page_chunk_size;
|
||||
|
||||
struct kvm_arch *arch;
|
||||
};
|
||||
|
||||
|
@ -172,6 +172,7 @@ void __init free_hyp_pgds(void);
|
||||
|
||||
void stage2_unmap_vm(struct kvm *kvm);
|
||||
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
|
||||
void kvm_uninit_stage2_mmu(struct kvm *kvm);
|
||||
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
|
||||
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
||||
phys_addr_t pa, unsigned long size, bool writable);
|
||||
|
@ -92,6 +92,24 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
|
||||
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
}
|
||||
|
||||
static inline u32 kvm_supported_block_sizes(void)
|
||||
{
|
||||
u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
u32 r = 0;
|
||||
|
||||
for (; level < KVM_PGTABLE_MAX_LEVELS; level++)
|
||||
r |= BIT(kvm_granule_shift(level));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline bool kvm_is_block_size_supported(u64 size)
|
||||
{
|
||||
bool is_power_of_two = IS_ALIGNED(size, size);
|
||||
|
||||
return is_power_of_two && (size & kvm_supported_block_sizes());
|
||||
}
|
||||
|
||||
/**
|
||||
* struct kvm_pgtable_mm_ops - Memory management callbacks.
|
||||
* @zalloc_page: Allocate a single zeroed memory page.
|
||||
@ -104,7 +122,7 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
|
||||
* allocation is physically contiguous.
|
||||
* @free_pages_exact: Free an exact number of memory pages previously
|
||||
* allocated by zalloc_pages_exact.
|
||||
* @free_removed_table: Free a removed paging structure by unlinking and
|
||||
* @free_unlinked_table: Free an unlinked paging structure by unlinking and
|
||||
* dropping references.
|
||||
* @get_page: Increment the refcount on a page.
|
||||
* @put_page: Decrement the refcount on a page. When the
|
||||
@ -124,7 +142,7 @@ struct kvm_pgtable_mm_ops {
|
||||
void* (*zalloc_page)(void *arg);
|
||||
void* (*zalloc_pages_exact)(size_t size);
|
||||
void (*free_pages_exact)(void *addr, size_t size);
|
||||
void (*free_removed_table)(void *addr, u32 level);
|
||||
void (*free_unlinked_table)(void *addr, u32 level);
|
||||
void (*get_page)(void *addr);
|
||||
void (*put_page)(void *addr);
|
||||
int (*page_count)(void *addr);
|
||||
@ -195,6 +213,12 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
|
||||
* with other software walkers.
|
||||
* @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was
|
||||
* invoked from a fault handler.
|
||||
* @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries
|
||||
* without Break-before-make's
|
||||
* TLB invalidation.
|
||||
* @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries
|
||||
* without Cache maintenance
|
||||
* operations required.
|
||||
*/
|
||||
enum kvm_pgtable_walk_flags {
|
||||
KVM_PGTABLE_WALK_LEAF = BIT(0),
|
||||
@ -202,6 +226,8 @@ enum kvm_pgtable_walk_flags {
|
||||
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
|
||||
KVM_PGTABLE_WALK_SHARED = BIT(3),
|
||||
KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4),
|
||||
KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5),
|
||||
KVM_PGTABLE_WALK_SKIP_CMO = BIT(6),
|
||||
};
|
||||
|
||||
struct kvm_pgtable_visit_ctx {
|
||||
@ -441,7 +467,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
|
||||
* kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
|
||||
* @mm_ops: Memory management callbacks.
|
||||
* @pgtable: Unlinked stage-2 paging structure to be freed.
|
||||
* @level: Level of the stage-2 paging structure to be freed.
|
||||
@ -449,7 +475,33 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
* The page-table is assumed to be unreachable by any hardware walkers prior to
|
||||
* freeing and therefore no TLB invalidation is performed.
|
||||
*/
|
||||
void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @phys: Physical address of the memory to map.
|
||||
* @level: Starting level of the stage-2 paging structure to be created.
|
||||
* @prot: Permissions and attributes for the mapping.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
* @force_pte: Force mappings to PAGE_SIZE granularity.
|
||||
*
|
||||
* Returns an unlinked page-table tree. This new page-table tree is
|
||||
* not reachable (i.e., it is unlinked) from the root pgd and it's
|
||||
* therefore unreachableby the hardware page-table walker. No TLB
|
||||
* invalidation or CMOs are performed.
|
||||
*
|
||||
* If device attributes are not explicitly requested in @prot, then the
|
||||
* mapping will be normal, cacheable.
|
||||
*
|
||||
* Return: The fully populated (unlinked) stage-2 paging structure, or
|
||||
* an ERR_PTR(error) on failure.
|
||||
*/
|
||||
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
u64 phys, u32 level,
|
||||
enum kvm_pgtable_prot prot,
|
||||
void *mc, bool force_pte);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
|
||||
@ -620,6 +672,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
|
||||
*/
|
||||
int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
|
||||
* to PAGE_SIZE guest pages.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @addr: Intermediate physical address from which to split.
|
||||
* @size: Size of the range.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
*
|
||||
* The function tries to split any level 1 or 2 entry that overlaps
|
||||
* with the input range (given by @addr and @size).
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure. Note that
|
||||
* kvm_pgtable_stage2_split() is best effort: it tries to break as many
|
||||
* blocks in the input range as allowed by @mc_capacity.
|
||||
*/
|
||||
int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_mmu_memory_cache *mc);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_walk() - Walk a page-table.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_*_init().
|
||||
|
@ -65,6 +65,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap)
|
||||
{
|
||||
int r;
|
||||
u64 new_cap;
|
||||
|
||||
if (cap->flags)
|
||||
return -EINVAL;
|
||||
@ -89,6 +90,24 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
r = 0;
|
||||
set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
|
||||
break;
|
||||
case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
|
||||
new_cap = cap->args[0];
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
/*
|
||||
* To keep things simple, allow changing the chunk
|
||||
* size only when no memory slots have been created.
|
||||
*/
|
||||
if (!kvm_are_all_memslots_empty(kvm)) {
|
||||
r = -EINVAL;
|
||||
} else if (new_cap && !kvm_is_block_size_supported(new_cap)) {
|
||||
r = -EINVAL;
|
||||
} else {
|
||||
r = 0;
|
||||
kvm->arch.mmu.split_page_chunk_size = new_cap;
|
||||
}
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
@ -302,6 +321,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_ARM_PTRAUTH_GENERIC:
|
||||
r = system_has_full_ptr_auth();
|
||||
break;
|
||||
case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
|
||||
if (kvm)
|
||||
r = kvm->arch.mmu.split_page_chunk_size;
|
||||
else
|
||||
r = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
|
||||
break;
|
||||
case KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES:
|
||||
r = kvm_supported_block_sizes();
|
||||
break;
|
||||
default:
|
||||
r = 0;
|
||||
}
|
||||
|
@ -125,6 +125,15 @@ static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt)
|
||||
__kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
|
||||
}
|
||||
|
||||
static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
|
||||
DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2);
|
||||
DECLARE_REG(int, level, host_ctxt, 3);
|
||||
|
||||
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
|
||||
}
|
||||
|
||||
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
|
||||
@ -315,6 +324,7 @@ static const hcall_t host_hcall[] = {
|
||||
HANDLE_FUNC(__kvm_vcpu_run),
|
||||
HANDLE_FUNC(__kvm_flush_vm_context),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid),
|
||||
HANDLE_FUNC(__kvm_flush_cpu_context),
|
||||
HANDLE_FUNC(__kvm_timer_set_cntvoff),
|
||||
|
@ -91,9 +91,9 @@ static void host_s2_put_page(void *addr)
|
||||
hyp_put_page(&host_s2_pool, addr);
|
||||
}
|
||||
|
||||
static void host_s2_free_removed_table(void *addr, u32 level)
|
||||
static void host_s2_free_unlinked_table(void *addr, u32 level)
|
||||
{
|
||||
kvm_pgtable_stage2_free_removed(&host_mmu.mm_ops, addr, level);
|
||||
kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
|
||||
}
|
||||
|
||||
static int prepare_s2_pool(void *pgt_pool_base)
|
||||
@ -110,7 +110,7 @@ static int prepare_s2_pool(void *pgt_pool_base)
|
||||
host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
|
||||
.zalloc_pages_exact = host_s2_zalloc_pages_exact,
|
||||
.zalloc_page = host_s2_zalloc_page,
|
||||
.free_removed_table = host_s2_free_removed_table,
|
||||
.free_unlinked_table = host_s2_free_unlinked_table,
|
||||
.phys_to_virt = hyp_phys_to_virt,
|
||||
.virt_to_phys = hyp_virt_to_phys,
|
||||
.page_count = hyp_page_count,
|
||||
|
@ -130,6 +130,58 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t ipa, int level)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
||||
/* Switch to requested VMID */
|
||||
__tlb_switch_to_guest(mmu, &cxt, true);
|
||||
|
||||
/*
|
||||
* We could do so much better if we had the VA as well.
|
||||
* Instead, we invalidate Stage-2 for this IPA, and the
|
||||
* whole of Stage-1. Weep...
|
||||
*/
|
||||
ipa >>= 12;
|
||||
__tlbi_level(ipas2e1, ipa, level);
|
||||
|
||||
/*
|
||||
* We have to ensure completion of the invalidation at Stage-2,
|
||||
* since a table walk on another CPU could refill a TLB with a
|
||||
* complete (S1 + S2) walk based on the old Stage-2 mapping if
|
||||
* the Stage-1 invalidation happened first.
|
||||
*/
|
||||
dsb(nsh);
|
||||
__tlbi(vmalle1);
|
||||
dsb(nsh);
|
||||
isb();
|
||||
|
||||
/*
|
||||
* If the host is running at EL1 and we have a VPIPT I-cache,
|
||||
* then we must perform I-cache maintenance at EL2 in order for
|
||||
* it to have an effect on the guest. Since the guest cannot hit
|
||||
* I-cache lines allocated with a different VMID, we don't need
|
||||
* to worry about junk out of guest reset (we nuke the I-cache on
|
||||
* VMID rollover), but we do need to be careful when remapping
|
||||
* executable pages for the same guest. This can happen when KSM
|
||||
* takes a CoW fault on an executable page, copies the page into
|
||||
* a page that was previously mapped in the guest and then needs
|
||||
* to invalidate the guest view of the I-cache for that page
|
||||
* from EL1. To solve this, we invalidate the entire I-cache when
|
||||
* unmapping a page from a guest if we have a VPIPT I-cache but
|
||||
* the host is running at EL1. As above, we could do better if
|
||||
* we had the VA.
|
||||
*
|
||||
* The moral of this story is: if you have a VPIPT I-cache, then
|
||||
* you should be running with VHE enabled.
|
||||
*/
|
||||
if (icache_is_vpipt())
|
||||
icache_inval_all_pou();
|
||||
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
@ -63,6 +63,16 @@ struct kvm_pgtable_walk_data {
|
||||
const u64 end;
|
||||
};
|
||||
|
||||
static bool kvm_pgtable_walk_skip_bbm_tlbi(const struct kvm_pgtable_visit_ctx *ctx)
|
||||
{
|
||||
return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_BBM_TLBI);
|
||||
}
|
||||
|
||||
static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
|
||||
{
|
||||
return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO);
|
||||
}
|
||||
|
||||
static bool kvm_phys_is_valid(u64 phys)
|
||||
{
|
||||
return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
|
||||
@ -743,14 +753,17 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Perform the appropriate TLB invalidation based on the evicted pte
|
||||
* value (if any).
|
||||
*/
|
||||
if (kvm_pte_table(ctx->old, ctx->level))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
|
||||
else if (kvm_pte_valid(ctx->old))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
|
||||
if (!kvm_pgtable_walk_skip_bbm_tlbi(ctx)) {
|
||||
/*
|
||||
* Perform the appropriate TLB invalidation based on the
|
||||
* evicted pte value (if any).
|
||||
*/
|
||||
if (kvm_pte_table(ctx->old, ctx->level))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
|
||||
else if (kvm_pte_valid(ctx->old))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
|
||||
ctx->addr, ctx->level);
|
||||
}
|
||||
|
||||
if (stage2_pte_is_counted(ctx->old))
|
||||
mm_ops->put_page(ctx->ptep);
|
||||
@ -857,11 +870,13 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
return -EAGAIN;
|
||||
|
||||
/* Perform CMOs before installation of the guest stage-2 PTE */
|
||||
if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
|
||||
if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->dcache_clean_inval_poc &&
|
||||
stage2_pte_cacheable(pgt, new))
|
||||
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
|
||||
granule);
|
||||
granule);
|
||||
|
||||
if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
|
||||
if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->icache_inval_pou &&
|
||||
stage2_pte_executable(new))
|
||||
mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
|
||||
|
||||
stage2_make_pte(ctx, new);
|
||||
@ -883,7 +898,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mm_ops->free_removed_table(childp, ctx->level);
|
||||
mm_ops->free_unlinked_table(childp, ctx->level);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -928,7 +943,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
* The TABLE_PRE callback runs for table entries on the way down, looking
|
||||
* for table entries which we could conceivably replace with a block entry
|
||||
* for this mapping. If it finds one it replaces the entry and calls
|
||||
* kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table.
|
||||
* kvm_pgtable_mm_ops::free_unlinked_table() to tear down the detached table.
|
||||
*
|
||||
* Otherwise, the LEAF callback performs the mapping at the existing leaves
|
||||
* instead.
|
||||
@ -1197,7 +1212,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
|
||||
KVM_PGTABLE_WALK_HANDLE_FAULT |
|
||||
KVM_PGTABLE_WALK_SHARED);
|
||||
if (!ret)
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1230,6 +1245,162 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
return kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
}
|
||||
|
||||
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
u64 phys, u32 level,
|
||||
enum kvm_pgtable_prot prot,
|
||||
void *mc, bool force_pte)
|
||||
{
|
||||
struct stage2_map_data map_data = {
|
||||
.phys = phys,
|
||||
.mmu = pgt->mmu,
|
||||
.memcache = mc,
|
||||
.force_pte = force_pte,
|
||||
};
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_map_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF |
|
||||
KVM_PGTABLE_WALK_SKIP_BBM_TLBI |
|
||||
KVM_PGTABLE_WALK_SKIP_CMO,
|
||||
.arg = &map_data,
|
||||
};
|
||||
/*
|
||||
* The input address (.addr) is irrelevant for walking an
|
||||
* unlinked table. Construct an ambiguous IA range to map
|
||||
* kvm_granule_size(level) worth of memory.
|
||||
*/
|
||||
struct kvm_pgtable_walk_data data = {
|
||||
.walker = &walker,
|
||||
.addr = 0,
|
||||
.end = kvm_granule_size(level),
|
||||
};
|
||||
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
|
||||
kvm_pte_t *pgtable;
|
||||
int ret;
|
||||
|
||||
if (!IS_ALIGNED(phys, kvm_granule_size(level)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
pgtable = mm_ops->zalloc_page(mc);
|
||||
if (!pgtable)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = __kvm_pgtable_walk(&data, mm_ops, (kvm_pteref_t)pgtable,
|
||||
level + 1);
|
||||
if (ret) {
|
||||
kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level);
|
||||
mm_ops->put_page(pgtable);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return pgtable;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the number of page-tables needed to replace a block with a
|
||||
* fully populated tree up to the PTE entries. Note that @level is
|
||||
* interpreted as in "level @level entry".
|
||||
*/
|
||||
static int stage2_block_get_nr_page_tables(u32 level)
|
||||
{
|
||||
switch (level) {
|
||||
case 1:
|
||||
return PTRS_PER_PTE + 1;
|
||||
case 2:
|
||||
return 1;
|
||||
case 3:
|
||||
return 0;
|
||||
default:
|
||||
WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
|
||||
level >= KVM_PGTABLE_MAX_LEVELS);
|
||||
return -EINVAL;
|
||||
};
|
||||
}
|
||||
|
||||
static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
|
||||
struct kvm_mmu_memory_cache *mc = ctx->arg;
|
||||
struct kvm_s2_mmu *mmu;
|
||||
kvm_pte_t pte = ctx->old, new, *childp;
|
||||
enum kvm_pgtable_prot prot;
|
||||
u32 level = ctx->level;
|
||||
bool force_pte;
|
||||
int nr_pages;
|
||||
u64 phys;
|
||||
|
||||
/* No huge-pages exist at the last level */
|
||||
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
return 0;
|
||||
|
||||
/* We only split valid block mappings */
|
||||
if (!kvm_pte_valid(pte))
|
||||
return 0;
|
||||
|
||||
nr_pages = stage2_block_get_nr_page_tables(level);
|
||||
if (nr_pages < 0)
|
||||
return nr_pages;
|
||||
|
||||
if (mc->nobjs >= nr_pages) {
|
||||
/* Build a tree mapped down to the PTE granularity. */
|
||||
force_pte = true;
|
||||
} else {
|
||||
/*
|
||||
* Don't force PTEs, so create_unlinked() below does
|
||||
* not populate the tree up to the PTE level. The
|
||||
* consequence is that the call will require a single
|
||||
* page of level 2 entries at level 1, or a single
|
||||
* page of PTEs at level 2. If we are at level 1, the
|
||||
* PTEs will be created recursively.
|
||||
*/
|
||||
force_pte = false;
|
||||
nr_pages = 1;
|
||||
}
|
||||
|
||||
if (mc->nobjs < nr_pages)
|
||||
return -ENOMEM;
|
||||
|
||||
mmu = container_of(mc, struct kvm_s2_mmu, split_page_cache);
|
||||
phys = kvm_pte_to_phys(pte);
|
||||
prot = kvm_pgtable_stage2_pte_prot(pte);
|
||||
|
||||
childp = kvm_pgtable_stage2_create_unlinked(mmu->pgt, phys,
|
||||
level, prot, mc, force_pte);
|
||||
if (IS_ERR(childp))
|
||||
return PTR_ERR(childp);
|
||||
|
||||
if (!stage2_try_break_pte(ctx, mmu)) {
|
||||
kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
|
||||
mm_ops->put_page(childp);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note, the contents of the page table are guaranteed to be made
|
||||
* visible before the new PTE is assigned because stage2_make_pte()
|
||||
* writes the PTE using smp_store_release().
|
||||
*/
|
||||
new = kvm_init_table_pte(childp, mm_ops);
|
||||
stage2_make_pte(ctx, new);
|
||||
dsb(ishst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_mmu_memory_cache *mc)
|
||||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_split_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
.arg = mc,
|
||||
};
|
||||
|
||||
return kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
}
|
||||
|
||||
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
@ -1299,7 +1470,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
|
||||
pgt->pgd = NULL;
|
||||
}
|
||||
|
||||
void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
|
||||
{
|
||||
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
|
||||
struct kvm_pgtable_walker walker = {
|
||||
|
@ -111,6 +111,38 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t ipa, int level)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
||||
dsb(nshst);
|
||||
|
||||
/* Switch to requested VMID */
|
||||
__tlb_switch_to_guest(mmu, &cxt);
|
||||
|
||||
/*
|
||||
* We could do so much better if we had the VA as well.
|
||||
* Instead, we invalidate Stage-2 for this IPA, and the
|
||||
* whole of Stage-1. Weep...
|
||||
*/
|
||||
ipa >>= 12;
|
||||
__tlbi_level(ipas2e1, ipa, level);
|
||||
|
||||
/*
|
||||
* We have to ensure completion of the invalidation at Stage-2,
|
||||
* since a table walk on another CPU could refill a TLB with a
|
||||
* complete (S1 + S2) walk based on the old Stage-2 mapping if
|
||||
* the Stage-1 invalidation happened first.
|
||||
*/
|
||||
dsb(nsh);
|
||||
__tlbi(vmalle1);
|
||||
dsb(nsh);
|
||||
isb();
|
||||
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
@ -31,14 +31,21 @@ static phys_addr_t __ro_after_init hyp_idmap_vector;
|
||||
|
||||
static unsigned long __ro_after_init io_map_base;
|
||||
|
||||
static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
|
||||
static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end,
|
||||
phys_addr_t size)
|
||||
{
|
||||
phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL);
|
||||
phys_addr_t boundary = ALIGN_DOWN(addr + size, size);
|
||||
|
||||
return (boundary - 1 < end - 1) ? boundary : end;
|
||||
}
|
||||
|
||||
static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
|
||||
{
|
||||
phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL);
|
||||
|
||||
return __stage2_range_addr_end(addr, end, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Release kvm_mmu_lock periodically if the memory region is large. Otherwise,
|
||||
* we may see kernel panics with CONFIG_DETECT_HUNG_TASK,
|
||||
@ -75,6 +82,79 @@ static int stage2_apply_range(struct kvm_s2_mmu *mmu, phys_addr_t addr,
|
||||
#define stage2_apply_range_resched(mmu, addr, end, fn) \
|
||||
stage2_apply_range(mmu, addr, end, fn, true)
|
||||
|
||||
/*
|
||||
* Get the maximum number of page-tables pages needed to split a range
|
||||
* of blocks into PAGE_SIZE PTEs. It assumes the range is already
|
||||
* mapped at level 2, or at level 1 if allowed.
|
||||
*/
|
||||
static int kvm_mmu_split_nr_page_tables(u64 range)
|
||||
{
|
||||
int n = 0;
|
||||
|
||||
if (KVM_PGTABLE_MIN_BLOCK_LEVEL < 2)
|
||||
n += DIV_ROUND_UP(range, PUD_SIZE);
|
||||
n += DIV_ROUND_UP(range, PMD_SIZE);
|
||||
return n;
|
||||
}
|
||||
|
||||
static bool need_split_memcache_topup_or_resched(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_mmu_memory_cache *cache;
|
||||
u64 chunk_size, min;
|
||||
|
||||
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
|
||||
return true;
|
||||
|
||||
chunk_size = kvm->arch.mmu.split_page_chunk_size;
|
||||
min = kvm_mmu_split_nr_page_tables(chunk_size);
|
||||
cache = &kvm->arch.mmu.split_page_cache;
|
||||
return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
|
||||
}
|
||||
|
||||
static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr,
|
||||
phys_addr_t end)
|
||||
{
|
||||
struct kvm_mmu_memory_cache *cache;
|
||||
struct kvm_pgtable *pgt;
|
||||
int ret, cache_capacity;
|
||||
u64 next, chunk_size;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
chunk_size = kvm->arch.mmu.split_page_chunk_size;
|
||||
cache_capacity = kvm_mmu_split_nr_page_tables(chunk_size);
|
||||
|
||||
if (chunk_size == 0)
|
||||
return 0;
|
||||
|
||||
cache = &kvm->arch.mmu.split_page_cache;
|
||||
|
||||
do {
|
||||
if (need_split_memcache_topup_or_resched(kvm)) {
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
cond_resched();
|
||||
/* Eager page splitting is best-effort. */
|
||||
ret = __kvm_mmu_topup_memory_cache(cache,
|
||||
cache_capacity,
|
||||
cache_capacity);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
pgt = kvm->arch.mmu.pgt;
|
||||
if (!pgt)
|
||||
return -EINVAL;
|
||||
|
||||
next = __stage2_range_addr_end(addr, end, chunk_size);
|
||||
ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache);
|
||||
if (ret)
|
||||
break;
|
||||
} while (addr = next, addr != end);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool memslot_is_logging(struct kvm_memory_slot *memslot)
|
||||
{
|
||||
return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
|
||||
@ -131,21 +211,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
|
||||
|
||||
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
|
||||
|
||||
static void stage2_free_removed_table_rcu_cb(struct rcu_head *head)
|
||||
static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
|
||||
{
|
||||
struct page *page = container_of(head, struct page, rcu_head);
|
||||
void *pgtable = page_to_virt(page);
|
||||
u32 level = page_private(page);
|
||||
|
||||
kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level);
|
||||
kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
|
||||
}
|
||||
|
||||
static void stage2_free_removed_table(void *addr, u32 level)
|
||||
static void stage2_free_unlinked_table(void *addr, u32 level)
|
||||
{
|
||||
struct page *page = virt_to_page(addr);
|
||||
|
||||
set_page_private(page, (unsigned long)level);
|
||||
call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb);
|
||||
call_rcu(&page->rcu_head, stage2_free_unlinked_table_rcu_cb);
|
||||
}
|
||||
|
||||
static void kvm_host_get_page(void *addr)
|
||||
@ -701,7 +781,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
|
||||
.zalloc_page = stage2_memcache_zalloc_page,
|
||||
.zalloc_pages_exact = kvm_s2_zalloc_pages_exact,
|
||||
.free_pages_exact = kvm_s2_free_pages_exact,
|
||||
.free_removed_table = stage2_free_removed_table,
|
||||
.free_unlinked_table = stage2_free_unlinked_table,
|
||||
.get_page = kvm_host_get_page,
|
||||
.put_page = kvm_s2_put_page,
|
||||
.page_count = kvm_host_page_count,
|
||||
@ -775,6 +855,10 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
|
||||
for_each_possible_cpu(cpu)
|
||||
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
|
||||
|
||||
/* The eager page splitting is disabled by default */
|
||||
mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
|
||||
mmu->split_page_cache.gfp_zero = __GFP_ZERO;
|
||||
|
||||
mmu->pgt = pgt;
|
||||
mmu->pgd_phys = __pa(pgt->pgd);
|
||||
return 0;
|
||||
@ -786,6 +870,12 @@ out_free_pgtable:
|
||||
return err;
|
||||
}
|
||||
|
||||
void kvm_uninit_stage2_mmu(struct kvm *kvm)
|
||||
{
|
||||
kvm_free_stage2_pgd(&kvm->arch.mmu);
|
||||
kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache);
|
||||
}
|
||||
|
||||
static void stage2_unmap_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot)
|
||||
{
|
||||
@ -989,17 +1079,45 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_mmu_write_protect_pt_masked() - write protect dirty pages
|
||||
* kvm_mmu_split_memory_region() - split the stage 2 blocks into PAGE_SIZE
|
||||
* pages for memory slot
|
||||
* @kvm: The KVM pointer
|
||||
* @slot: The memory slot to split
|
||||
*
|
||||
* Acquires kvm->mmu_lock. Called with kvm->slots_lock mutex acquired,
|
||||
* serializing operations for VM memory regions.
|
||||
*/
|
||||
static void kvm_mmu_split_memory_region(struct kvm *kvm, int slot)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
phys_addr_t start, end;
|
||||
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, slot);
|
||||
|
||||
start = memslot->base_gfn << PAGE_SHIFT;
|
||||
end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
kvm_mmu_split_huge_pages(kvm, start, end);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_arch_mmu_enable_log_dirty_pt_masked() - enable dirty logging for selected pages.
|
||||
* @kvm: The KVM pointer
|
||||
* @slot: The memory slot associated with mask
|
||||
* @gfn_offset: The gfn offset in memory slot
|
||||
* @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
|
||||
* slot to be write protected
|
||||
* @mask: The mask of pages at offset 'gfn_offset' in this memory
|
||||
* slot to enable dirty logging on
|
||||
*
|
||||
* Walks bits set in mask write protects the associated pte's. Caller must
|
||||
* acquire kvm_mmu_lock.
|
||||
* Writes protect selected pages to enable dirty logging, and then
|
||||
* splits them to PAGE_SIZE. Caller must acquire kvm->mmu_lock.
|
||||
*/
|
||||
static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask)
|
||||
{
|
||||
@ -1007,21 +1125,20 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
|
||||
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
|
||||
|
||||
stage2_wp_range(&kvm->arch.mmu, start, end);
|
||||
}
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
* kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
|
||||
* dirty pages.
|
||||
*
|
||||
* It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
|
||||
* enable dirty logging for them.
|
||||
*/
|
||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask)
|
||||
{
|
||||
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
|
||||
stage2_wp_range(&kvm->arch.mmu, start, end);
|
||||
|
||||
/*
|
||||
* Eager-splitting is done when manual-protect is set. We
|
||||
* also check for initially-all-set because we can avoid
|
||||
* eager-splitting if initially-all-set is false.
|
||||
* Initially-all-set equal false implies that huge-pages were
|
||||
* already split when enabling dirty logging: no need to do it
|
||||
* again.
|
||||
*/
|
||||
if (kvm_dirty_log_manual_protect_and_init_set(kvm))
|
||||
kvm_mmu_split_huge_pages(kvm, start, end);
|
||||
}
|
||||
|
||||
static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
|
||||
@ -1790,20 +1907,42 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
bool log_dirty_pages = new && new->flags & KVM_MEM_LOG_DIRTY_PAGES;
|
||||
|
||||
/*
|
||||
* At this point memslot has been committed and there is an
|
||||
* allocated dirty_bitmap[], dirty pages will be tracked while the
|
||||
* memory slot is write protected.
|
||||
*/
|
||||
if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
if (log_dirty_pages) {
|
||||
|
||||
if (change == KVM_MR_DELETE)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we're with initial-all-set, we don't need to write
|
||||
* protect any pages because they're all reported as dirty.
|
||||
* Huge pages and normal pages will be write protect gradually.
|
||||
* Huge and normal pages are write-protected and split
|
||||
* on either of these two cases:
|
||||
*
|
||||
* 1. with initial-all-set: gradually with CLEAR ioctls,
|
||||
*/
|
||||
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
|
||||
kvm_mmu_wp_memory_region(kvm, new->id);
|
||||
}
|
||||
if (kvm_dirty_log_manual_protect_and_init_set(kvm))
|
||||
return;
|
||||
/*
|
||||
* or
|
||||
* 2. without initial-all-set: all in one shot when
|
||||
* enabling dirty logging.
|
||||
*/
|
||||
kvm_mmu_wp_memory_region(kvm, new->id);
|
||||
kvm_mmu_split_memory_region(kvm, new->id);
|
||||
} else {
|
||||
/*
|
||||
* Free any leftovers from the eager page splitting cache. Do
|
||||
* this when deleting, moving, disabling dirty logging, or
|
||||
* creating the memslot (a nop). Doing it for deletes makes
|
||||
* sure we don't leak memory, and there's no need to keep the
|
||||
* cache around for any of the other cases.
|
||||
*/
|
||||
kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1877,7 +2016,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
kvm_free_stage2_pgd(&kvm->arch.mmu);
|
||||
kvm_uninit_stage2_mmu(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
|
@ -991,6 +991,8 @@ static inline bool kvm_memslots_empty(struct kvm_memslots *slots)
|
||||
return RB_EMPTY_ROOT(&slots->gfn_tree);
|
||||
}
|
||||
|
||||
bool kvm_are_all_memslots_empty(struct kvm *kvm);
|
||||
|
||||
#define kvm_for_each_memslot(memslot, bkt, slots) \
|
||||
hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \
|
||||
if (WARN_ON_ONCE(!memslot->npages)) { \
|
||||
|
@ -1190,6 +1190,8 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
|
||||
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
|
||||
#define KVM_CAP_COUNTER_OFFSET 227
|
||||
#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
|
||||
#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -4602,7 +4602,7 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static bool kvm_are_all_memslots_empty(struct kvm *kvm)
|
||||
bool kvm_are_all_memslots_empty(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -4615,6 +4615,7 @@ static bool kvm_are_all_memslots_empty(struct kvm *kvm)
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_are_all_memslots_empty);
|
||||
|
||||
static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap)
|
||||
|
Loading…
x
Reference in New Issue
Block a user