KVM: x86/mmu: Remove KVM's MMU shrinker

Remove KVM's MMU shrinker and (almost) all of its related code, as the
current implementation is very disruptive to VMs (if it ever runs),
without providing any meaningful benefit[1].

Alternatively, KVM could repurpose its shrinker, e.g. to reclaim pages
from the per-vCPU caches[2], but given that no one has complained about
lack of TDP MMU support for the shrinker in the 3+ years since the TDP MMU
was enabled by default, it's safe to say that there is likely no real use
case for initiating reclaim of KVM's page tables from the shrinker.

And while clever/cute, reclaiming the per-vCPU caches doesn't scale the
same way that reclaiming in-use page table pages does.  E.g. the amount of
memory being used by a VM doesn't always directly correlate with the
number vCPUs, and even when it does, reclaiming a few pages from per-vCPU
caches likely won't make much of a dent in the VM's total memory usage,
especially for VMs with huge amounts of memory.

Lastly, if it turns out that there is a strong use case for dropping the
per-vCPU caches, re-introducing the shrinker registration is trivial
compared to the complexity of actually reclaiming pages from the caches.

[1] https://lore.kernel.org/lkml/Y45dldZnI6OIf+a5@google.com
[2] https://lore.kernel.org/kvm/20241004195540.210396-3-vipinsh@google.com

Suggested-by: Sean Christopherson <seanjc@google.com>
Suggested-by: David Matlack <dmatlack@google.com>
Signed-off-by: Vipin Sharma <vipinsh@google.com>
Link: https://lore.kernel.org/r/20241101201437.1604321-2-vipinsh@google.com
[sean: keep zapped_obsolete_pages for now, massage changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
This commit is contained in:
Vipin Sharma 2024-11-01 13:14:37 -07:00 committed by Sean Christopherson
parent 06c4cd957b
commit fe140e611d

View File

@ -179,7 +179,6 @@ struct kvm_shadow_walk_iterator {
static struct kmem_cache *pte_list_desc_cache;
struct kmem_cache *mmu_page_header_cache;
static struct percpu_counter kvm_total_used_mmu_pages;
static void mmu_spte_set(u64 *sptep, u64 spte);
@ -1622,27 +1621,15 @@ static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp)
#endif
}
/*
* This value is the sum of all of the kvm instances's
* kvm->arch.n_used_mmu_pages values. We need a global,
* aggregate version in order to make the slab shrinker
* faster
*/
static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
{
kvm->arch.n_used_mmu_pages += nr;
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
}
static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
kvm_mod_used_mmu_pages(kvm, +1);
kvm->arch.n_used_mmu_pages++;
kvm_account_pgtable_pages((void *)sp->spt, +1);
}
static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
kvm_mod_used_mmu_pages(kvm, -1);
kvm->arch.n_used_mmu_pages--;
kvm_account_pgtable_pages((void *)sp->spt, -1);
}
@ -6492,11 +6479,6 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
kvm_tdp_mmu_zap_invalidated_roots(kvm);
}
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
{
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
}
void kvm_mmu_init_vm(struct kvm *kvm)
{
kvm->arch.shadow_mmio_value = shadow_mmio_value;
@ -7112,72 +7094,6 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
}
}
static unsigned long mmu_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
struct kvm *kvm;
int nr_to_scan = sc->nr_to_scan;
unsigned long freed = 0;
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
int idx;
/*
* Never scan more than sc->nr_to_scan VM instances.
* Will not hit this condition practically since we do not try
* to shrink more than one VM and it is very unlikely to see
* !n_used_mmu_pages so many times.
*/
if (!nr_to_scan--)
break;
/*
* n_used_mmu_pages is accessed without holding kvm->mmu_lock
* here. We may skip a VM instance errorneosly, but we do not
* want to shrink a VM that only started to populate its MMU
* anyway.
*/
if (!kvm->arch.n_used_mmu_pages &&
!kvm_has_zapped_obsolete_pages(kvm))
continue;
idx = srcu_read_lock(&kvm->srcu);
write_lock(&kvm->mmu_lock);
if (kvm_has_zapped_obsolete_pages(kvm)) {
kvm_mmu_commit_zap_page(kvm,
&kvm->arch.zapped_obsolete_pages);
goto unlock;
}
freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
unlock:
write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
/*
* unfair on small ones
* per-vm shrinkers cry out
* sadness comes quickly
*/
list_move_tail(&kvm->vm_list, &vm_list);
break;
}
mutex_unlock(&kvm_lock);
return freed;
}
static unsigned long mmu_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
{
return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
}
static struct shrinker *mmu_shrinker;
static void mmu_destroy_caches(void)
{
kmem_cache_destroy(pte_list_desc_cache);
@ -7304,23 +7220,8 @@ int kvm_mmu_vendor_module_init(void)
if (!mmu_page_header_cache)
goto out;
if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
goto out;
mmu_shrinker = shrinker_alloc(0, "x86-mmu");
if (!mmu_shrinker)
goto out_shrinker;
mmu_shrinker->count_objects = mmu_shrink_count;
mmu_shrinker->scan_objects = mmu_shrink_scan;
mmu_shrinker->seeks = DEFAULT_SEEKS * 10;
shrinker_register(mmu_shrinker);
return 0;
out_shrinker:
percpu_counter_destroy(&kvm_total_used_mmu_pages);
out:
mmu_destroy_caches();
return ret;
@ -7337,8 +7238,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
void kvm_mmu_vendor_module_exit(void)
{
mmu_destroy_caches();
percpu_counter_destroy(&kvm_total_used_mmu_pages);
shrinker_free(mmu_shrinker);
}
/*