Merge branch kvm-arm64/eager-page-splitting into kvmarm/next

* kvm-arm64/eager-page-splitting: : Eager Page Splitting, courtesy of Ricardo Koller. : : Dirty logging performance is dominated by the cost of splitting : hugepages to PTE granularity. On systems that mere mortals can get their : hands on, each fault incurs the cost of a full break-before-make : pattern, wherein the broadcast invalidation and ensuing serialization : significantly increases fault latency. : : The goal of eager page splitting is to move the cost of hugepage : splitting out of the stage-2 fault path and instead into the ioctls : responsible for managing the dirty log: : : - If manual protection is enabled for the VM, hugepage splitting : happens in the KVM_CLEAR_DIRTY_LOG ioctl. This is desirable as it : provides userspace granular control over hugepage splitting. : : - Otherwise, if userspace relies on the legacy dirty log behavior : (clear on collection), hugepage splitting is done at the moment dirty : logging is enabled for a particular memslot. : : Support for eager page splitting requires explicit opt-in from : userspace, which is realized through the : KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE capability. arm64: kvm: avoid overflow in integer division KVM: arm64: Use local TLBI on permission relaxation KVM: arm64: Split huge pages during KVM_CLEAR_DIRTY_LOG KVM: arm64: Open-code kvm_mmu_write_protect_pt_masked() KVM: arm64: Split huge pages when dirty logging is enabled KVM: arm64: Add kvm_uninit_stage2_mmu() KVM: arm64: Refactor kvm_arch_commit_memory_region() KVM: arm64: Add kvm_pgtable_stage2_split() KVM: arm64: Add KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE KVM: arm64: Export kvm_are_all_memslots_empty() KVM: arm64: Add helper for creating unlinked stage2 subtrees KVM: arm64: Add KVM_PGTABLE_WALK flags for skipping CMOs and BBM TLBIs KVM: arm64: Rename free_removed to free_unlinked Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
2025-01-19 20:12:32 +00:00 · 2023-06-15 13:02:11 +00:00 · 2023-06-15 13:02:11 +00:00 · 83510396c0
commit 83510396c0
parent 44c026a73b 14c3555f05
15 changed files with 613 additions and 58 deletions
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@ -8445,6 +8445,33 @@ structure.
 When getting the Modified Change Topology Report value, the attr->addr
 must point to a byte where the value will be stored or retrieved from.
 8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
 ---------------------------------------
 :Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
 :Architectures: arm64
 :Type: vm
 :Parameters: arg[0] is the new split chunk size.
 :Returns: 0 on success, -EINVAL if any memslot was already created.
 This capability sets the chunk size used in Eager Page Splitting.
 Eager Page Splitting improves the performance of dirty-logging (used
 in live migrations) when guest memory is backed by huge-pages.  It
 avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing
 it eagerly when enabling dirty logging (with the
 KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using
 KVM_CLEAR_DIRTY_LOG.
 The chunk size specifies how many pages to break at a time, using a
 single allocation for each chunk. Bigger the chunk size, more pages
 need to be allocated ahead of time.
 The chunk size needs to be a valid block size. The list of acceptable
 block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
 64-bit bitmap (each bit describing a block size). The default value is
 0, to disable the eager page splitting.
 9. Known KVM API problems
 =========================
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@ -68,6 +68,7 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
 	__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
 	__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
 	__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
 	__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
 	__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
 	__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
@ -225,6 +226,9 @@ extern void __kvm_flush_vm_context(void);
 extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
 				     int level);
 extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
 					 phys_addr_t ipa,
 					 int level);
 extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@ -159,6 +159,21 @@ struct kvm_s2_mmu {
 	/* The last vcpu id that ran on each physical CPU */
 	int __percpu *last_vcpu_ran;
 #define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0
 	/*
 	 * Memory cache used to split
 	 * KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It
 	 * is used to allocate stage2 page tables while splitting huge
 	 * pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
 	 * influences both the capacity of the split page cache, and
 	 * how often KVM reschedules. Be wary of raising CHUNK_SIZE
 	 * too high.
 	 *
 	 * Protected by kvm->slots_lock.
 	 */
 	struct kvm_mmu_memory_cache split_page_cache;
 	uint64_t split_page_chunk_size;
 	struct kvm_arch *arch;
 };
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@ -172,6 +172,7 @@ void __init free_hyp_pgds(void);
 void stage2_unmap_vm(struct kvm *kvm);
 int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
 void kvm_uninit_stage2_mmu(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 			  phys_addr_t pa, unsigned long size, bool writable);
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@ -92,6 +92,24 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
 	return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
 }
 static inline u32 kvm_supported_block_sizes(void)
 {
 	u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
 	u32 r = 0;
 	for (; level < KVM_PGTABLE_MAX_LEVELS; level++)
 		r |= BIT(kvm_granule_shift(level));
 	return r;
 }
 static inline bool kvm_is_block_size_supported(u64 size)
 {
 	bool is_power_of_two = IS_ALIGNED(size, size);
 	return is_power_of_two && (size & kvm_supported_block_sizes());
 }
 /**
 * struct kvm_pgtable_mm_ops - Memory management callbacks.
 * @zalloc_page:		Allocate a single zeroed memory page.
@ -104,7 +122,7 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
 *				allocation is physically contiguous.
 * @free_pages_exact:		Free an exact number of memory pages previously
 *				allocated by zalloc_pages_exact.
- * @free_removed_table:		Free a removed paging structure by unlinking and
+ * @free_unlinked_table:	Free an unlinked paging structure by unlinking and
 *				dropping references.
 * @get_page:			Increment the refcount on a page.
 * @put_page:			Decrement the refcount on a page. When the
@ -124,7 +142,7 @@ struct kvm_pgtable_mm_ops {
 	void*		(*zalloc_page)(void *arg);
 	void*		(*zalloc_pages_exact)(size_t size);
 	void		(*free_pages_exact)(void *addr, size_t size);
-	void		(*free_removed_table)(void *addr, u32 level);
+	void		(*free_unlinked_table)(void *addr, u32 level);
 	void		(*get_page)(void *addr);
 	void		(*put_page)(void *addr);
 	int		(*page_count)(void *addr);
@ -195,6 +213,12 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
 *					with other software walkers.
 * @KVM_PGTABLE_WALK_HANDLE_FAULT:	Indicates the page-table walk was
 *					invoked from a fault handler.
 * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI:	Visit and update table entries
 *					without Break-before-make's
 *					TLB invalidation.
 * @KVM_PGTABLE_WALK_SKIP_CMO:		Visit and update table entries
 *					without Cache maintenance
 *					operations required.
 */
 enum kvm_pgtable_walk_flags {
 	KVM_PGTABLE_WALK_LEAF			= BIT(0),
@ -202,6 +226,8 @@ enum kvm_pgtable_walk_flags {
 	KVM_PGTABLE_WALK_TABLE_POST		= BIT(2),
 	KVM_PGTABLE_WALK_SHARED			= BIT(3),
 	KVM_PGTABLE_WALK_HANDLE_FAULT		= BIT(4),
 	KVM_PGTABLE_WALK_SKIP_BBM_TLBI		= BIT(5),
 	KVM_PGTABLE_WALK_SKIP_CMO		= BIT(6),
 };
 struct kvm_pgtable_visit_ctx {
@ -441,7 +467,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
 /**
- * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
+ * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
 * @mm_ops:	Memory management callbacks.
 * @pgtable:	Unlinked stage-2 paging structure to be freed.
 * @level:	Level of the stage-2 paging structure to be freed.
@ -449,7 +475,33 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
 * The page-table is assumed to be unreachable by any hardware walkers prior to
 * freeing and therefore no TLB invalidation is performed.
 */
-void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
 /**
 * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
 * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
 * @phys:	Physical address of the memory to map.
 * @level:	Starting level of the stage-2 paging structure to be created.
 * @prot:	Permissions and attributes for the mapping.
 * @mc:		Cache of pre-allocated and zeroed memory from which to allocate
 *		page-table pages.
 * @force_pte:  Force mappings to PAGE_SIZE granularity.
 *
 * Returns an unlinked page-table tree.  This new page-table tree is
 * not reachable (i.e., it is unlinked) from the root pgd and it's
 * therefore unreachableby the hardware page-table walker. No TLB
 * invalidation or CMOs are performed.
 *
 * If device attributes are not explicitly requested in @prot, then the
 * mapping will be normal, cacheable.
 *
 * Return: The fully populated (unlinked) stage-2 paging structure, or
 * an ERR_PTR(error) on failure.
 */
 kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
 					      u64 phys, u32 level,
 					      enum kvm_pgtable_prot prot,
 					      void *mc, bool force_pte);
 /**
 * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
@ -620,6 +672,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
 */
 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
 /**
 * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
 *				to PAGE_SIZE guest pages.
 * @pgt:	 Page-table structure initialised by kvm_pgtable_stage2_init().
 * @addr:	 Intermediate physical address from which to split.
 * @size:	 Size of the range.
 * @mc:		 Cache of pre-allocated and zeroed memory from which to allocate
 *		 page-table pages.
 *
 * The function tries to split any level 1 or 2 entry that overlaps
 * with the input range (given by @addr and @size).
 *
 * Return: 0 on success, negative error code on failure. Note that
 * kvm_pgtable_stage2_split() is best effort: it tries to break as many
 * blocks in the input range as allowed by @mc_capacity.
 */
 int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
 			     struct kvm_mmu_memory_cache *mc);
 /**
 * kvm_pgtable_walk() - Walk a page-table.
 * @pgt:	Page-table structure initialised by kvm_pgtable_*_init().
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@ -65,6 +65,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 			    struct kvm_enable_cap *cap)
 {
 	int r;
 	u64 new_cap;
 	if (cap->flags)
 		return -EINVAL;
@ -89,6 +90,24 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		r = 0;
 		set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
 		break;
 	case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
 		new_cap = cap->args[0];
 		mutex_lock(&kvm->slots_lock);
 		/*
 		 * To keep things simple, allow changing the chunk
 		 * size only when no memory slots have been created.
 		 */
 		if (!kvm_are_all_memslots_empty(kvm)) {
 			r = -EINVAL;
 		} else if (new_cap && !kvm_is_block_size_supported(new_cap)) {
 			r = -EINVAL;
 		} else {
 			r = 0;
 			kvm->arch.mmu.split_page_chunk_size = new_cap;
 		}
 		mutex_unlock(&kvm->slots_lock);
 		break;
 	default:
 		r = -EINVAL;
 		break;
@ -302,6 +321,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ARM_PTRAUTH_GENERIC:
 		r = system_has_full_ptr_auth();
 		break;
 	case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
 		if (kvm)
 			r = kvm->arch.mmu.split_page_chunk_size;
 		else
 			r = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
 		break;
 	case KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES:
 		r = kvm_supported_block_sizes();
 		break;
 	default:
 		r = 0;
 	}
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@ -125,6 +125,15 @@ static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt)
 	__kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
 }
 static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctxt)
 {
 	DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
 	DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2);
 	DECLARE_REG(int, level, host_ctxt, 3);
 	__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
 }
 static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
 {
 	DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@ -315,6 +324,7 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__kvm_vcpu_run),
 	HANDLE_FUNC(__kvm_flush_vm_context),
 	HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
 	HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
 	HANDLE_FUNC(__kvm_tlb_flush_vmid),
 	HANDLE_FUNC(__kvm_flush_cpu_context),
 	HANDLE_FUNC(__kvm_timer_set_cntvoff),
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@ -91,9 +91,9 @@ static void host_s2_put_page(void *addr)
 	hyp_put_page(&host_s2_pool, addr);
 }
-static void host_s2_free_removed_table(void *addr, u32 level)
+static void host_s2_free_unlinked_table(void *addr, u32 level)
 {
-	kvm_pgtable_stage2_free_removed(&host_mmu.mm_ops, addr, level);
+	kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
 }
 static int prepare_s2_pool(void *pgt_pool_base)
@ -110,7 +110,7 @@ static int prepare_s2_pool(void *pgt_pool_base)
 	host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
 		.zalloc_pages_exact = host_s2_zalloc_pages_exact,
 		.zalloc_page = host_s2_zalloc_page,
-		.free_removed_table = host_s2_free_removed_table,
+		.free_unlinked_table = host_s2_free_unlinked_table,
 		.phys_to_virt = hyp_phys_to_virt,
 		.virt_to_phys = hyp_virt_to_phys,
 		.page_count = hyp_page_count,
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@ -130,6 +130,58 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
 	__tlb_switch_to_host(&cxt);
 }
 void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
 				  phys_addr_t ipa, int level)
 {
 	struct tlb_inv_context cxt;
 	/* Switch to requested VMID */
 	__tlb_switch_to_guest(mmu, &cxt, true);
 	/*
 	 * We could do so much better if we had the VA as well.
 	 * Instead, we invalidate Stage-2 for this IPA, and the
 	 * whole of Stage-1. Weep...
 	 */
 	ipa >>= 12;
 	__tlbi_level(ipas2e1, ipa, level);
 	/*
 	 * We have to ensure completion of the invalidation at Stage-2,
 	 * since a table walk on another CPU could refill a TLB with a
 	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
 	 * the Stage-1 invalidation happened first.
 	 */
 	dsb(nsh);
 	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 	/*
 	 * If the host is running at EL1 and we have a VPIPT I-cache,
 	 * then we must perform I-cache maintenance at EL2 in order for
 	 * it to have an effect on the guest. Since the guest cannot hit
 	 * I-cache lines allocated with a different VMID, we don't need
 	 * to worry about junk out of guest reset (we nuke the I-cache on
 	 * VMID rollover), but we do need to be careful when remapping
 	 * executable pages for the same guest. This can happen when KSM
 	 * takes a CoW fault on an executable page, copies the page into
 	 * a page that was previously mapped in the guest and then needs
 	 * to invalidate the guest view of the I-cache for that page
 	 * from EL1. To solve this, we invalidate the entire I-cache when
 	 * unmapping a page from a guest if we have a VPIPT I-cache but
 	 * the host is running at EL1. As above, we could do better if
 	 * we had the VA.
 	 *
 	 * The moral of this story is: if you have a VPIPT I-cache, then
 	 * you should be running with VHE enabled.
 	 */
 	if (icache_is_vpipt())
 		icache_inval_all_pou();
 	__tlb_switch_to_host(&cxt);
 }
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
 	struct tlb_inv_context cxt;
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@ -63,6 +63,16 @@ struct kvm_pgtable_walk_data {
 	const u64			end;
 };
 static bool kvm_pgtable_walk_skip_bbm_tlbi(const struct kvm_pgtable_visit_ctx *ctx)
 {
 	return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_BBM_TLBI);
 }
 static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
 {
 	return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO);
 }
 static bool kvm_phys_is_valid(u64 phys)
 {
 	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
@ -743,14 +753,17 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
 	if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED))
 		return false;
-	/*
+	if (!kvm_pgtable_walk_skip_bbm_tlbi(ctx)) {
-	 * Perform the appropriate TLB invalidation based on the evicted pte
+		/*
-	 * value (if any).
+		 * Perform the appropriate TLB invalidation based on the
-	 */
+		 * evicted pte value (if any).
-	if (kvm_pte_table(ctx->old, ctx->level))
+		 */
-		kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+		if (kvm_pte_table(ctx->old, ctx->level))
-	else if (kvm_pte_valid(ctx->old))
+			kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
-		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+		else if (kvm_pte_valid(ctx->old))
 			kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
 				     ctx->addr, ctx->level);
 	}
 	if (stage2_pte_is_counted(ctx->old))
 		mm_ops->put_page(ctx->ptep);
@ -857,11 +870,13 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 		return -EAGAIN;
 	/* Perform CMOs before installation of the guest stage-2 PTE */
-	if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
+	if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->dcache_clean_inval_poc &&
 	    stage2_pte_cacheable(pgt, new))
 		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
-						granule);
+					       granule);
-	if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
+	if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->icache_inval_pou &&
 	    stage2_pte_executable(new))
 		mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
 	stage2_make_pte(ctx, new);
@ -883,7 +898,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
 	if (ret)
 		return ret;
-	mm_ops->free_removed_table(childp, ctx->level);
+	mm_ops->free_unlinked_table(childp, ctx->level);
 	return 0;
 }
@ -928,7 +943,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 * The TABLE_PRE callback runs for table entries on the way down, looking
 * for table entries which we could conceivably replace with a block entry
 * for this mapping. If it finds one it replaces the entry and calls
- * kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table.
+ * kvm_pgtable_mm_ops::free_unlinked_table() to tear down the detached table.
 *
 * Otherwise, the LEAF callback performs the mapping at the existing leaves
 * instead.
@ -1197,7 +1212,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
 				       KVM_PGTABLE_WALK_HANDLE_FAULT |
 				       KVM_PGTABLE_WALK_SHARED);
 	if (!ret)
-		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
+		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
 	return ret;
 }
@ -1230,6 +1245,162 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
 	return kvm_pgtable_walk(pgt, addr, size, &walker);
 }
 kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
 					      u64 phys, u32 level,
 					      enum kvm_pgtable_prot prot,
 					      void *mc, bool force_pte)
 {
 	struct stage2_map_data map_data = {
 		.phys		= phys,
 		.mmu		= pgt->mmu,
 		.memcache	= mc,
 		.force_pte	= force_pte,
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_map_walker,
 		.flags		= KVM_PGTABLE_WALK_LEAF |
 				  KVM_PGTABLE_WALK_SKIP_BBM_TLBI |
 				  KVM_PGTABLE_WALK_SKIP_CMO,
 		.arg		= &map_data,
 	};
 	/*
 	 * The input address (.addr) is irrelevant for walking an
 	 * unlinked table. Construct an ambiguous IA range to map
 	 * kvm_granule_size(level) worth of memory.
 	 */
 	struct kvm_pgtable_walk_data data = {
 		.walker	= &walker,
 		.addr	= 0,
 		.end	= kvm_granule_size(level),
 	};
 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
 	kvm_pte_t *pgtable;
 	int ret;
 	if (!IS_ALIGNED(phys, kvm_granule_size(level)))
 		return ERR_PTR(-EINVAL);
 	ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
 	if (ret)
 		return ERR_PTR(ret);
 	pgtable = mm_ops->zalloc_page(mc);
 	if (!pgtable)
 		return ERR_PTR(-ENOMEM);
 	ret = __kvm_pgtable_walk(&data, mm_ops, (kvm_pteref_t)pgtable,
 				 level + 1);
 	if (ret) {
 		kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level);
 		mm_ops->put_page(pgtable);
 		return ERR_PTR(ret);
 	}
 	return pgtable;
 }
 /*
 * Get the number of page-tables needed to replace a block with a
 * fully populated tree up to the PTE entries. Note that @level is
 * interpreted as in "level @level entry".
 */
 static int stage2_block_get_nr_page_tables(u32 level)
 {
 	switch (level) {
 	case 1:
 		return PTRS_PER_PTE + 1;
 	case 2:
 		return 1;
 	case 3:
 		return 0;
 	default:
 		WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
 			     level >= KVM_PGTABLE_MAX_LEVELS);
 		return -EINVAL;
 	};
 }
 static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
 			       enum kvm_pgtable_walk_flags visit)
 {
 	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 	struct kvm_mmu_memory_cache *mc = ctx->arg;
 	struct kvm_s2_mmu *mmu;
 	kvm_pte_t pte = ctx->old, new, *childp;
 	enum kvm_pgtable_prot prot;
 	u32 level = ctx->level;
 	bool force_pte;
 	int nr_pages;
 	u64 phys;
 	/* No huge-pages exist at the last level */
 	if (level == KVM_PGTABLE_MAX_LEVELS - 1)
 		return 0;
 	/* We only split valid block mappings */
 	if (!kvm_pte_valid(pte))
 		return 0;
 	nr_pages = stage2_block_get_nr_page_tables(level);
 	if (nr_pages < 0)
 		return nr_pages;
 	if (mc->nobjs >= nr_pages) {
 		/* Build a tree mapped down to the PTE granularity. */
 		force_pte = true;
 	} else {
 		/*
 		 * Don't force PTEs, so create_unlinked() below does
 		 * not populate the tree up to the PTE level. The
 		 * consequence is that the call will require a single
 		 * page of level 2 entries at level 1, or a single
 		 * page of PTEs at level 2. If we are at level 1, the
 		 * PTEs will be created recursively.
 		 */
 		force_pte = false;
 		nr_pages = 1;
 	}
 	if (mc->nobjs < nr_pages)
 		return -ENOMEM;
 	mmu = container_of(mc, struct kvm_s2_mmu, split_page_cache);
 	phys = kvm_pte_to_phys(pte);
 	prot = kvm_pgtable_stage2_pte_prot(pte);
 	childp = kvm_pgtable_stage2_create_unlinked(mmu->pgt, phys,
 						    level, prot, mc, force_pte);
 	if (IS_ERR(childp))
 		return PTR_ERR(childp);
 	if (!stage2_try_break_pte(ctx, mmu)) {
 		kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
 		mm_ops->put_page(childp);
 		return -EAGAIN;
 	}
 	/*
 	 * Note, the contents of the page table are guaranteed to be made
 	 * visible before the new PTE is assigned because stage2_make_pte()
 	 * writes the PTE using smp_store_release().
 	 */
 	new = kvm_init_table_pte(childp, mm_ops);
 	stage2_make_pte(ctx, new);
 	dsb(ishst);
 	return 0;
 }
 int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
 			     struct kvm_mmu_memory_cache *mc)
 {
 	struct kvm_pgtable_walker walker = {
 		.cb	= stage2_split_walker,
 		.flags	= KVM_PGTABLE_WALK_LEAF,
 		.arg	= mc,
 	};
 	return kvm_pgtable_walk(pgt, addr, size, &walker);
 }
 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
 			      struct kvm_pgtable_mm_ops *mm_ops,
@ -1299,7 +1470,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
 	pgt->pgd = NULL;
 }
-void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
 {
 	kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
 	struct kvm_pgtable_walker walker = {
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@ -111,6 +111,38 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
 	__tlb_switch_to_host(&cxt);
 }
 void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
 				  phys_addr_t ipa, int level)
 {
 	struct tlb_inv_context cxt;
 	dsb(nshst);
 	/* Switch to requested VMID */
 	__tlb_switch_to_guest(mmu, &cxt);
 	/*
 	 * We could do so much better if we had the VA as well.
 	 * Instead, we invalidate Stage-2 for this IPA, and the
 	 * whole of Stage-1. Weep...
 	 */
 	ipa >>= 12;
 	__tlbi_level(ipas2e1, ipa, level);
 	/*
 	 * We have to ensure completion of the invalidation at Stage-2,
 	 * since a table walk on another CPU could refill a TLB with a
 	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
 	 * the Stage-1 invalidation happened first.
 	 */
 	dsb(nsh);
 	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 	__tlb_switch_to_host(&cxt);
 }
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
 	struct tlb_inv_context cxt;
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@ -31,14 +31,21 @@ static phys_addr_t __ro_after_init hyp_idmap_vector;
 static unsigned long __ro_after_init io_map_base;
-static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
+static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end,
 					   phys_addr_t size)
 {
 	phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL);
 	phys_addr_t boundary = ALIGN_DOWN(addr + size, size);
 	return (boundary - 1 < end - 1) ? boundary : end;
 }
 static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
 {
 	phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL);
 	return __stage2_range_addr_end(addr, end, size);
 }
 /*
 * Release kvm_mmu_lock periodically if the memory region is large. Otherwise,
 * we may see kernel panics with CONFIG_DETECT_HUNG_TASK,
@ -75,6 +82,79 @@ static int stage2_apply_range(struct kvm_s2_mmu *mmu, phys_addr_t addr,
 #define stage2_apply_range_resched(mmu, addr, end, fn)			\
 	stage2_apply_range(mmu, addr, end, fn, true)
 /*
 * Get the maximum number of page-tables pages needed to split a range
 * of blocks into PAGE_SIZE PTEs. It assumes the range is already
 * mapped at level 2, or at level 1 if allowed.
 */
 static int kvm_mmu_split_nr_page_tables(u64 range)
 {
 	int n = 0;
 	if (KVM_PGTABLE_MIN_BLOCK_LEVEL < 2)
 		n += DIV_ROUND_UP(range, PUD_SIZE);
 	n += DIV_ROUND_UP(range, PMD_SIZE);
 	return n;
 }
 static bool need_split_memcache_topup_or_resched(struct kvm *kvm)
 {
 	struct kvm_mmu_memory_cache *cache;
 	u64 chunk_size, min;
 	if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
 		return true;
 	chunk_size = kvm->arch.mmu.split_page_chunk_size;
 	min = kvm_mmu_split_nr_page_tables(chunk_size);
 	cache = &kvm->arch.mmu.split_page_cache;
 	return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
 }
 static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr,
 				    phys_addr_t end)
 {
 	struct kvm_mmu_memory_cache *cache;
 	struct kvm_pgtable *pgt;
 	int ret, cache_capacity;
 	u64 next, chunk_size;
 	lockdep_assert_held_write(&kvm->mmu_lock);
 	chunk_size = kvm->arch.mmu.split_page_chunk_size;
 	cache_capacity = kvm_mmu_split_nr_page_tables(chunk_size);
 	if (chunk_size == 0)
 		return 0;
 	cache = &kvm->arch.mmu.split_page_cache;
 	do {
 		if (need_split_memcache_topup_or_resched(kvm)) {
 			write_unlock(&kvm->mmu_lock);
 			cond_resched();
 			/* Eager page splitting is best-effort. */
 			ret = __kvm_mmu_topup_memory_cache(cache,
 							   cache_capacity,
 							   cache_capacity);
 			write_lock(&kvm->mmu_lock);
 			if (ret)
 				break;
 		}
 		pgt = kvm->arch.mmu.pgt;
 		if (!pgt)
 			return -EINVAL;
 		next = __stage2_range_addr_end(addr, end, chunk_size);
 		ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache);
 		if (ret)
 			break;
 	} while (addr = next, addr != end);
 	return ret;
 }
 static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 {
 	return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
@ -131,21 +211,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
 static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
-static void stage2_free_removed_table_rcu_cb(struct rcu_head *head)
+static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
 {
 	struct page *page = container_of(head, struct page, rcu_head);
 	void *pgtable = page_to_virt(page);
 	u32 level = page_private(page);
-	kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level);
+	kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
 }
-static void stage2_free_removed_table(void *addr, u32 level)
+static void stage2_free_unlinked_table(void *addr, u32 level)
 {
 	struct page *page = virt_to_page(addr);
 	set_page_private(page, (unsigned long)level);
-	call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb);
+	call_rcu(&page->rcu_head, stage2_free_unlinked_table_rcu_cb);
 }
 static void kvm_host_get_page(void *addr)
@ -701,7 +781,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
 	.zalloc_page		= stage2_memcache_zalloc_page,
 	.zalloc_pages_exact	= kvm_s2_zalloc_pages_exact,
 	.free_pages_exact	= kvm_s2_free_pages_exact,
-	.free_removed_table	= stage2_free_removed_table,
+	.free_unlinked_table	= stage2_free_unlinked_table,
 	.get_page		= kvm_host_get_page,
 	.put_page		= kvm_s2_put_page,
 	.page_count		= kvm_host_page_count,
@ -775,6 +855,10 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
 	 /* The eager page splitting is disabled by default */
 	mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
 	mmu->split_page_cache.gfp_zero = __GFP_ZERO;
 	mmu->pgt = pgt;
 	mmu->pgd_phys = __pa(pgt->pgd);
 	return 0;
@ -786,6 +870,12 @@ out_free_pgtable:
 	return err;
 }
 void kvm_uninit_stage2_mmu(struct kvm *kvm)
 {
 	kvm_free_stage2_pgd(&kvm->arch.mmu);
 	kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache);
 }
 static void stage2_unmap_memslot(struct kvm *kvm,
 				 struct kvm_memory_slot *memslot)
 {
@ -989,17 +1079,45 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
 }
 /**
- * kvm_mmu_write_protect_pt_masked() - write protect dirty pages
+ * kvm_mmu_split_memory_region() - split the stage 2 blocks into PAGE_SIZE
 *				   pages for memory slot
 * @kvm:	The KVM pointer
 * @slot:	The memory slot to split
 *
 * Acquires kvm->mmu_lock. Called with kvm->slots_lock mutex acquired,
 * serializing operations for VM memory regions.
 */
 static void kvm_mmu_split_memory_region(struct kvm *kvm, int slot)
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
 	phys_addr_t start, end;
 	lockdep_assert_held(&kvm->slots_lock);
 	slots = kvm_memslots(kvm);
 	memslot = id_to_memslot(slots, slot);
 	start = memslot->base_gfn << PAGE_SHIFT;
 	end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
 	write_lock(&kvm->mmu_lock);
 	kvm_mmu_split_huge_pages(kvm, start, end);
 	write_unlock(&kvm->mmu_lock);
 }
 /*
 * kvm_arch_mmu_enable_log_dirty_pt_masked() - enable dirty logging for selected pages.
 * @kvm:	The KVM pointer
 * @slot:	The memory slot associated with mask
 * @gfn_offset:	The gfn offset in memory slot
- * @mask:	The mask of dirty pages at offset 'gfn_offset' in this memory
+ * @mask:	The mask of pages at offset 'gfn_offset' in this memory
- *		slot to be write protected
+ *		slot to enable dirty logging on
 *
- * Walks bits set in mask write protects the associated pte's. Caller must
+ * Writes protect selected pages to enable dirty logging, and then
- * acquire kvm_mmu_lock.
+ * splits them to PAGE_SIZE. Caller must acquire kvm->mmu_lock.
 */
-static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 		struct kvm_memory_slot *slot,
 		gfn_t gfn_offset, unsigned long mask)
 {
@ -1007,21 +1125,20 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
 	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
-	stage2_wp_range(&kvm->arch.mmu, start, end);
+	lockdep_assert_held_write(&kvm->mmu_lock);
 }
-/*
+	stage2_wp_range(&kvm->arch.mmu, start, end);
- * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
+
- * dirty pages.
+	/*
- *
+	 * Eager-splitting is done when manual-protect is set.  We
- * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
+	 * also check for initially-all-set because we can avoid
- * enable dirty logging for them.
+	 * eager-splitting if initially-all-set is false.
- */
+	 * Initially-all-set equal false implies that huge-pages were
-void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
+	 * already split when enabling dirty logging: no need to do it
-		struct kvm_memory_slot *slot,
+	 * again.
-		gfn_t gfn_offset, unsigned long mask)
+	 */
-{
+	if (kvm_dirty_log_manual_protect_and_init_set(kvm))
-	kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
+		kvm_mmu_split_huge_pages(kvm, start, end);
 }
 static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
@ -1790,20 +1907,42 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 				   const struct kvm_memory_slot *new,
 				   enum kvm_mr_change change)
 {
 	bool log_dirty_pages = new && new->flags & KVM_MEM_LOG_DIRTY_PAGES;
 	/*
 	 * At this point memslot has been committed and there is an
 	 * allocated dirty_bitmap[], dirty pages will be tracked while the
 	 * memory slot is write protected.
 	 */
-	if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+	if (log_dirty_pages) {
 		if (change == KVM_MR_DELETE)
 			return;
 		/*
-		 * If we're with initial-all-set, we don't need to write
+		 * Huge and normal pages are write-protected and split
-		 * protect any pages because they're all reported as dirty.
+		 * on either of these two cases:
-		 * Huge pages and normal pages will be write protect gradually.
+		 *
 		 * 1. with initial-all-set: gradually with CLEAR ioctls,
 		 */
-		if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
+		if (kvm_dirty_log_manual_protect_and_init_set(kvm))
-			kvm_mmu_wp_memory_region(kvm, new->id);
+			return;
-		}
+		/*
 		 * or
 		 * 2. without initial-all-set: all in one shot when
 		 *    enabling dirty logging.
 		 */
 		kvm_mmu_wp_memory_region(kvm, new->id);
 		kvm_mmu_split_memory_region(kvm, new->id);
 	} else {
 		/*
 		 * Free any leftovers from the eager page splitting cache. Do
 		 * this when deleting, moving, disabling dirty logging, or
 		 * creating the memslot (a nop). Doing it for deletes makes
 		 * sure we don't leak memory, and there's no need to keep the
 		 * cache around for any of the other cases.
 		 */
 		kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache);
 	}
 }
@ -1877,7 +2016,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
-	kvm_free_stage2_pgd(&kvm->arch.mmu);
+	kvm_uninit_stage2_mmu(kvm);
 }
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@ -991,6 +991,8 @@ static inline bool kvm_memslots_empty(struct kvm_memslots *slots)
 	return RB_EMPTY_ROOT(&slots->gfn_tree);
 }
 bool kvm_are_all_memslots_empty(struct kvm *kvm);
 #define kvm_for_each_memslot(memslot, bkt, slots)			      \
 	hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \
 		if (WARN_ON_ONCE(!memslot->npages)) {			      \
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@ -1190,6 +1190,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
 #define KVM_CAP_COUNTER_OFFSET 227
 #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
 #ifdef KVM_CAP_IRQ_ROUTING
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@ -4602,7 +4602,7 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 	return -EINVAL;
 }
-static bool kvm_are_all_memslots_empty(struct kvm *kvm)
+bool kvm_are_all_memslots_empty(struct kvm *kvm)
 {
 	int i;
@ -4615,6 +4615,7 @@ static bool kvm_are_all_memslots_empty(struct kvm *kvm)
 	return true;
 }
 EXPORT_SYMBOL_GPL(kvm_are_all_memslots_empty);
 static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
 					   struct kvm_enable_cap *cap)