mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 10:43:43 +00:00
KVM: Terminate memslot walks via used_slots
Refactor memslot handling to treat the number of used slots as the de facto size of the memslot array, e.g. return NULL from id_to_memslot() when an invalid index is provided instead of relying on npages==0 to detect an invalid memslot. Rework the sorting and walking of memslots in advance of dynamically sizing memslots to aid bisection and debug, e.g. with luck, a bug in the refactoring will bisect here and/or hit a WARN instead of randomly corrupting memory. Alternatively, a global null/invalid memslot could be returned, i.e. so callers of id_to_memslot() don't have to explicitly check for a NULL memslot, but that approach runs the risk of introducing difficult-to- debug issues, e.g. if the global null slot is modified. Constifying the return from id_to_memslot() to combat such issues is possible, but would require a massive refactoring of arch specific code and would still be susceptible to casting shenanigans. Add function comments to update_memslots() and search_memslots() to explicitly (and loudly) state how memslots are sorted. Opportunistically stuff @hva with a non-canonical value when deleting a private memslot on x86 to detect bogus usage of the freed slot. No functional change intended. Tested-by: Christoffer Dall <christoffer.dall@arm.com> Tested-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
2a49f61dfc
commit
0577d1abe7
@ -4400,7 +4400,7 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, log->slot);
|
||||
r = -ENOENT;
|
||||
if (!memslot->dirty_bitmap)
|
||||
if (!memslot || !memslot->dirty_bitmap)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
|
@ -9715,9 +9715,9 @@ void kvm_arch_sync_events(struct kvm *kvm)
|
||||
int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
|
||||
{
|
||||
int i, r;
|
||||
unsigned long hva;
|
||||
unsigned long hva, uninitialized_var(old_npages);
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
struct kvm_memory_slot *slot, old;
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
/* Called with kvm->slots_lock held. */
|
||||
if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
|
||||
@ -9725,7 +9725,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
|
||||
|
||||
slot = id_to_memslot(slots, id);
|
||||
if (size) {
|
||||
if (slot->npages)
|
||||
if (slot && slot->npages)
|
||||
return -EEXIST;
|
||||
|
||||
/*
|
||||
@ -9737,13 +9737,14 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
|
||||
if (IS_ERR((void *)hva))
|
||||
return PTR_ERR((void *)hva);
|
||||
} else {
|
||||
if (!slot->npages)
|
||||
if (!slot || !slot->npages)
|
||||
return 0;
|
||||
|
||||
hva = 0;
|
||||
/* Stuff a non-canonical value to catch use-after-delete. */
|
||||
hva = 0xdeadull << 48;
|
||||
old_npages = slot->npages;
|
||||
}
|
||||
|
||||
old = *slot;
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
struct kvm_userspace_memory_region m;
|
||||
|
||||
@ -9758,7 +9759,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
|
||||
}
|
||||
|
||||
if (!size)
|
||||
vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
|
||||
vm_munmap(hva, old_npages * PAGE_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -572,10 +572,11 @@ static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
|
||||
return vcpu->vcpu_idx;
|
||||
}
|
||||
|
||||
#define kvm_for_each_memslot(memslot, slots) \
|
||||
for (memslot = &slots->memslots[0]; \
|
||||
memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
|
||||
memslot++)
|
||||
#define kvm_for_each_memslot(memslot, slots) \
|
||||
for (memslot = &slots->memslots[0]; \
|
||||
memslot < slots->memslots + slots->used_slots; memslot++) \
|
||||
if (WARN_ON_ONCE(!memslot->npages)) { \
|
||||
} else
|
||||
|
||||
void kvm_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
|
||||
@ -635,12 +636,15 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
|
||||
return __kvm_memslots(vcpu->kvm, as_id);
|
||||
}
|
||||
|
||||
static inline struct kvm_memory_slot *
|
||||
id_to_memslot(struct kvm_memslots *slots, int id)
|
||||
static inline
|
||||
struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
|
||||
{
|
||||
int index = slots->id_to_index[id];
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
if (index < 0)
|
||||
return NULL;
|
||||
|
||||
slot = &slots->memslots[index];
|
||||
|
||||
WARN_ON(slot->id != id);
|
||||
@ -1012,6 +1016,8 @@ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
|
||||
* used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
|
||||
* gfn_to_memslot() itself isn't here as an inline because that would
|
||||
* bloat other code too much.
|
||||
*
|
||||
* IMPORTANT: Slots are sorted from highest GFN to lowest GFN!
|
||||
*/
|
||||
static inline struct kvm_memory_slot *
|
||||
search_memslots(struct kvm_memslots *slots, gfn_t gfn)
|
||||
|
@ -1534,8 +1534,13 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
|
||||
{
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
|
||||
phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
|
||||
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
|
||||
phys_addr_t start, end;
|
||||
|
||||
if (WARN_ON_ONCE(!memslot))
|
||||
return;
|
||||
|
||||
start = memslot->base_gfn << PAGE_SHIFT;
|
||||
end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
stage2_wp_range(kvm, start, end);
|
||||
|
@ -566,7 +566,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
|
||||
slots->id_to_index[i] = slots->memslots[i].id = i;
|
||||
slots->id_to_index[i] = slots->memslots[i].id = -1;
|
||||
|
||||
return slots;
|
||||
}
|
||||
@ -870,63 +870,162 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert memslot and re-sort memslots based on their GFN,
|
||||
* so binary search could be used to lookup GFN.
|
||||
* Sorting algorithm takes advantage of having initially
|
||||
* sorted array and known changed memslot position.
|
||||
* Delete a memslot by decrementing the number of used slots and shifting all
|
||||
* other entries in the array forward one spot.
|
||||
*/
|
||||
static void update_memslots(struct kvm_memslots *slots,
|
||||
struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
static inline void kvm_memslot_delete(struct kvm_memslots *slots,
|
||||
struct kvm_memory_slot *memslot)
|
||||
{
|
||||
int id = new->id;
|
||||
int i = slots->id_to_index[id];
|
||||
struct kvm_memory_slot *mslots = slots->memslots;
|
||||
int i;
|
||||
|
||||
WARN_ON(mslots[i].id != id);
|
||||
switch (change) {
|
||||
case KVM_MR_CREATE:
|
||||
slots->used_slots++;
|
||||
WARN_ON(mslots[i].npages || !new->npages);
|
||||
break;
|
||||
case KVM_MR_DELETE:
|
||||
slots->used_slots--;
|
||||
WARN_ON(new->npages || !mslots[i].npages);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (WARN_ON(slots->id_to_index[memslot->id] == -1))
|
||||
return;
|
||||
|
||||
while (i < KVM_MEM_SLOTS_NUM - 1 &&
|
||||
new->base_gfn <= mslots[i + 1].base_gfn) {
|
||||
if (!mslots[i + 1].npages)
|
||||
break;
|
||||
slots->used_slots--;
|
||||
|
||||
for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
|
||||
mslots[i] = mslots[i + 1];
|
||||
slots->id_to_index[mslots[i].id] = i;
|
||||
i++;
|
||||
}
|
||||
mslots[i] = *memslot;
|
||||
slots->id_to_index[memslot->id] = -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Insert" a new memslot by incrementing the number of used slots. Returns
|
||||
* the new slot's initial index into the memslots array.
|
||||
*/
|
||||
static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
|
||||
{
|
||||
return slots->used_slots++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move a changed memslot backwards in the array by shifting existing slots
|
||||
* with a higher GFN toward the front of the array. Note, the changed memslot
|
||||
* itself is not preserved in the array, i.e. not swapped at this time, only
|
||||
* its new index into the array is tracked. Returns the changed memslot's
|
||||
* current index into the memslots array.
|
||||
*/
|
||||
static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
|
||||
struct kvm_memory_slot *memslot)
|
||||
{
|
||||
struct kvm_memory_slot *mslots = slots->memslots;
|
||||
int i;
|
||||
|
||||
if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) ||
|
||||
WARN_ON_ONCE(!slots->used_slots))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* The ">=" is needed when creating a slot with base_gfn == 0,
|
||||
* so that it moves before all those with base_gfn == npages == 0.
|
||||
*
|
||||
* On the other hand, if new->npages is zero, the above loop has
|
||||
* already left i pointing to the beginning of the empty part of
|
||||
* mslots, and the ">=" would move the hole backwards in this
|
||||
* case---which is wrong. So skip the loop when deleting a slot.
|
||||
* Move the target memslot backward in the array by shifting existing
|
||||
* memslots with a higher GFN (than the target memslot) towards the
|
||||
* front of the array.
|
||||
*/
|
||||
if (new->npages) {
|
||||
while (i > 0 &&
|
||||
new->base_gfn >= mslots[i - 1].base_gfn) {
|
||||
mslots[i] = mslots[i - 1];
|
||||
slots->id_to_index[mslots[i].id] = i;
|
||||
i--;
|
||||
}
|
||||
} else
|
||||
WARN_ON_ONCE(i != slots->used_slots);
|
||||
for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) {
|
||||
if (memslot->base_gfn > mslots[i + 1].base_gfn)
|
||||
break;
|
||||
|
||||
mslots[i] = *new;
|
||||
slots->id_to_index[mslots[i].id] = i;
|
||||
WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);
|
||||
|
||||
/* Shift the next memslot forward one and update its index. */
|
||||
mslots[i] = mslots[i + 1];
|
||||
slots->id_to_index[mslots[i].id] = i;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move a changed memslot forwards in the array by shifting existing slots with
|
||||
* a lower GFN toward the back of the array. Note, the changed memslot itself
|
||||
* is not preserved in the array, i.e. not swapped at this time, only its new
|
||||
* index into the array is tracked. Returns the changed memslot's final index
|
||||
* into the memslots array.
|
||||
*/
|
||||
static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
|
||||
struct kvm_memory_slot *memslot,
|
||||
int start)
|
||||
{
|
||||
struct kvm_memory_slot *mslots = slots->memslots;
|
||||
int i;
|
||||
|
||||
for (i = start; i > 0; i--) {
|
||||
if (memslot->base_gfn < mslots[i - 1].base_gfn)
|
||||
break;
|
||||
|
||||
WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);
|
||||
|
||||
/* Shift the next memslot back one and update its index. */
|
||||
mslots[i] = mslots[i - 1];
|
||||
slots->id_to_index[mslots[i].id] = i;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-sort memslots based on their GFN to account for an added, deleted, or
|
||||
* moved memslot. Sorting memslots by GFN allows using a binary search during
|
||||
* memslot lookup.
|
||||
*
|
||||
* IMPORTANT: Slots are sorted from highest GFN to lowest GFN! I.e. the entry
|
||||
* at memslots[0] has the highest GFN.
|
||||
*
|
||||
* The sorting algorithm takes advantage of having initially sorted memslots
|
||||
* and knowing the position of the changed memslot. Sorting is also optimized
|
||||
* by not swapping the updated memslot and instead only shifting other memslots
|
||||
* and tracking the new index for the update memslot. Only once its final
|
||||
* index is known is the updated memslot copied into its position in the array.
|
||||
*
|
||||
* - When deleting a memslot, the deleted memslot simply needs to be moved to
|
||||
* the end of the array.
|
||||
*
|
||||
* - When creating a memslot, the algorithm "inserts" the new memslot at the
|
||||
* end of the array and then it forward to its correct location.
|
||||
*
|
||||
* - When moving a memslot, the algorithm first moves the updated memslot
|
||||
* backward to handle the scenario where the memslot's GFN was changed to a
|
||||
* lower value. update_memslots() then falls through and runs the same flow
|
||||
* as creating a memslot to move the memslot forward to handle the scenario
|
||||
* where its GFN was changed to a higher value.
|
||||
*
|
||||
* Note, slots are sorted from highest->lowest instead of lowest->highest for
|
||||
* historical reasons. Originally, invalid memslots where denoted by having
|
||||
* GFN=0, thus sorting from highest->lowest naturally sorted invalid memslots
|
||||
* to the end of the array. The current algorithm uses dedicated logic to
|
||||
* delete a memslot and thus does not rely on invalid memslots having GFN=0.
|
||||
*
|
||||
* The other historical motiviation for highest->lowest was to improve the
|
||||
* performance of memslot lookup. KVM originally used a linear search starting
|
||||
* at memslots[0]. On x86, the largest memslot usually has one of the highest,
|
||||
* if not *the* highest, GFN, as the bulk of the guest's RAM is located in a
|
||||
* single memslot above the 4gb boundary. As the largest memslot is also the
|
||||
* most likely to be referenced, sorting it to the front of the array was
|
||||
* advantageous. The current binary search starts from the middle of the array
|
||||
* and uses an LRU pointer to improve performance for all memslots and GFNs.
|
||||
*/
|
||||
static void update_memslots(struct kvm_memslots *slots,
|
||||
struct kvm_memory_slot *memslot,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (change == KVM_MR_DELETE) {
|
||||
kvm_memslot_delete(slots, memslot);
|
||||
} else {
|
||||
if (change == KVM_MR_CREATE)
|
||||
i = kvm_memslot_insert_back(slots);
|
||||
else
|
||||
i = kvm_memslot_move_backward(slots, memslot);
|
||||
i = kvm_memslot_move_forward(slots, memslot, i);
|
||||
|
||||
/*
|
||||
* Copy the memslot to its new position in memslots and update
|
||||
* its index accordingly.
|
||||
*/
|
||||
slots->memslots[i] = *memslot;
|
||||
slots->id_to_index[memslot->id] = i;
|
||||
}
|
||||
}
|
||||
|
||||
static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
|
||||
@ -1106,7 +1205,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
* memslot needs to be referenced after calling update_memslots(), e.g.
|
||||
* to free its resources and for arch specific behavior.
|
||||
*/
|
||||
old = *id_to_memslot(__kvm_memslots(kvm, as_id), id);
|
||||
tmp = id_to_memslot(__kvm_memslots(kvm, as_id), id);
|
||||
if (tmp) {
|
||||
old = *tmp;
|
||||
tmp = NULL;
|
||||
} else {
|
||||
memset(&old, 0, sizeof(old));
|
||||
old.id = id;
|
||||
}
|
||||
|
||||
if (!mem->memory_size)
|
||||
return kvm_delete_memslot(kvm, mem, &old, as_id);
|
||||
@ -1224,7 +1330,7 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
|
||||
|
||||
slots = __kvm_memslots(kvm, as_id);
|
||||
*memslot = id_to_memslot(slots, id);
|
||||
if (!(*memslot)->dirty_bitmap)
|
||||
if (!(*memslot) || !(*memslot)->dirty_bitmap)
|
||||
return -ENOENT;
|
||||
|
||||
kvm_arch_sync_dirty_log(kvm, *memslot);
|
||||
@ -1282,10 +1388,10 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
|
||||
slots = __kvm_memslots(kvm, as_id);
|
||||
memslot = id_to_memslot(slots, id);
|
||||
if (!memslot || !memslot->dirty_bitmap)
|
||||
return -ENOENT;
|
||||
|
||||
dirty_bitmap = memslot->dirty_bitmap;
|
||||
if (!dirty_bitmap)
|
||||
return -ENOENT;
|
||||
|
||||
kvm_arch_sync_dirty_log(kvm, memslot);
|
||||
|
||||
@ -1393,10 +1499,10 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
|
||||
|
||||
slots = __kvm_memslots(kvm, as_id);
|
||||
memslot = id_to_memslot(slots, id);
|
||||
if (!memslot || !memslot->dirty_bitmap)
|
||||
return -ENOENT;
|
||||
|
||||
dirty_bitmap = memslot->dirty_bitmap;
|
||||
if (!dirty_bitmap)
|
||||
return -ENOENT;
|
||||
|
||||
n = ALIGN(log->num_pages, BITS_PER_LONG) / 8;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user