mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-09 14:50:19 +00:00
KVM: s390/interrupt: do not pin adapter interrupt pages
The adapter interrupt page containing the indicator bits is currently pinned. That means that a guest with many devices can pin a lot of memory pages in the host. This also complicates the reference tracking which is needed for memory management handling of protected virtual machines. It might also have some strange side effects for madvise MADV_DONTNEED and other things. We can simply try to get the userspace page set the bits and free the page. By storing the userspace address in the irq routing entry instead of the guest address we can actually avoid many lookups and list walks so that this variant is very likely not slower. If userspace messes around with the memory slots the worst thing that can happen is that we write to some other memory within that process. As we get the the page with FOLL_WRITE this can also not be used to write to shared read-only pages. Signed-off-by: Ulrich Weigand <Ulrich.Weigand@de.ibm.com> Acked-by: David Hildenbrand <david@redhat.com> Reviewed-by: Cornelia Huck <cohuck@redhat.com> [borntraeger@de.ibm.com: patch simplification] Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
This commit is contained in:
parent
f15587c834
commit
f65470661f
@ -108,16 +108,9 @@ Groups:
|
||||
mask or unmask the adapter, as specified in mask
|
||||
|
||||
KVM_S390_IO_ADAPTER_MAP
|
||||
perform a gmap translation for the guest address provided in addr,
|
||||
pin a userspace page for the translated address and add it to the
|
||||
list of mappings
|
||||
|
||||
.. note:: A new mapping will be created unconditionally; therefore,
|
||||
the calling code should avoid making duplicate mappings.
|
||||
|
||||
This is now a no-op. The mapping is purely done by the irq route.
|
||||
KVM_S390_IO_ADAPTER_UNMAP
|
||||
release a userspace page for the translated address specified in addr
|
||||
from the list of mappings
|
||||
This is now a no-op. The mapping is purely done by the irq route.
|
||||
|
||||
KVM_DEV_FLIC_AISM
|
||||
modify the adapter-interruption-suppression mode for a given isc if the
|
||||
|
@ -701,9 +701,6 @@ struct s390_io_adapter {
|
||||
bool masked;
|
||||
bool swap;
|
||||
bool suppressible;
|
||||
struct rw_semaphore maps_lock;
|
||||
struct list_head maps;
|
||||
atomic_t nr_maps;
|
||||
};
|
||||
|
||||
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
|
||||
|
@ -2,7 +2,7 @@
|
||||
/*
|
||||
* handling kvm guest interrupts
|
||||
*
|
||||
* Copyright IBM Corp. 2008, 2015
|
||||
* Copyright IBM Corp. 2008, 2020
|
||||
*
|
||||
* Author(s): Carsten Otte <cotte@de.ibm.com>
|
||||
*/
|
||||
@ -2327,9 +2327,6 @@ static int register_io_adapter(struct kvm_device *dev,
|
||||
if (!adapter)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&adapter->maps);
|
||||
init_rwsem(&adapter->maps_lock);
|
||||
atomic_set(&adapter->nr_maps, 0);
|
||||
adapter->id = adapter_info.id;
|
||||
adapter->isc = adapter_info.isc;
|
||||
adapter->maskable = adapter_info.maskable;
|
||||
@ -2354,87 +2351,12 @@ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
|
||||
{
|
||||
struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
|
||||
struct s390_map_info *map;
|
||||
int ret;
|
||||
|
||||
if (!adapter || !addr)
|
||||
return -EINVAL;
|
||||
|
||||
map = kzalloc(sizeof(*map), GFP_KERNEL);
|
||||
if (!map) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
INIT_LIST_HEAD(&map->list);
|
||||
map->guest_addr = addr;
|
||||
map->addr = gmap_translate(kvm->arch.gmap, addr);
|
||||
if (map->addr == -EFAULT) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
BUG_ON(ret != 1);
|
||||
down_write(&adapter->maps_lock);
|
||||
if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
|
||||
list_add_tail(&map->list, &adapter->maps);
|
||||
ret = 0;
|
||||
} else {
|
||||
put_page(map->page);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
up_write(&adapter->maps_lock);
|
||||
out:
|
||||
if (ret)
|
||||
kfree(map);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
|
||||
{
|
||||
struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
|
||||
struct s390_map_info *map, *tmp;
|
||||
int found = 0;
|
||||
|
||||
if (!adapter || !addr)
|
||||
return -EINVAL;
|
||||
|
||||
down_write(&adapter->maps_lock);
|
||||
list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
|
||||
if (map->guest_addr == addr) {
|
||||
found = 1;
|
||||
atomic_dec(&adapter->nr_maps);
|
||||
list_del(&map->list);
|
||||
put_page(map->page);
|
||||
kfree(map);
|
||||
break;
|
||||
}
|
||||
}
|
||||
up_write(&adapter->maps_lock);
|
||||
|
||||
return found ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
void kvm_s390_destroy_adapters(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
struct s390_map_info *map, *tmp;
|
||||
|
||||
for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
|
||||
if (!kvm->arch.adapters[i])
|
||||
continue;
|
||||
list_for_each_entry_safe(map, tmp,
|
||||
&kvm->arch.adapters[i]->maps, list) {
|
||||
list_del(&map->list);
|
||||
put_page(map->page);
|
||||
kfree(map);
|
||||
}
|
||||
for (i = 0; i < MAX_S390_IO_ADAPTERS; i++)
|
||||
kfree(kvm->arch.adapters[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static int modify_io_adapter(struct kvm_device *dev,
|
||||
@ -2456,11 +2378,14 @@ static int modify_io_adapter(struct kvm_device *dev,
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
break;
|
||||
/*
|
||||
* The following operations are no longer needed and therefore no-ops.
|
||||
* The gpa to hva translation is done when an IRQ route is set up. The
|
||||
* set_irq code uses get_user_pages_remote() to do the actual write.
|
||||
*/
|
||||
case KVM_S390_IO_ADAPTER_MAP:
|
||||
ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
|
||||
break;
|
||||
case KVM_S390_IO_ADAPTER_UNMAP:
|
||||
ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
@ -2699,19 +2624,15 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
|
||||
return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
|
||||
}
|
||||
|
||||
static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
|
||||
u64 addr)
|
||||
static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
|
||||
{
|
||||
struct s390_map_info *map;
|
||||
struct page *page = NULL;
|
||||
|
||||
if (!adapter)
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry(map, &adapter->maps, list) {
|
||||
if (map->guest_addr == addr)
|
||||
return map;
|
||||
}
|
||||
return NULL;
|
||||
down_read(&kvm->mm->mmap_sem);
|
||||
get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE,
|
||||
&page, NULL, NULL);
|
||||
up_read(&kvm->mm->mmap_sem);
|
||||
return page;
|
||||
}
|
||||
|
||||
static int adapter_indicators_set(struct kvm *kvm,
|
||||
@ -2720,30 +2641,35 @@ static int adapter_indicators_set(struct kvm *kvm,
|
||||
{
|
||||
unsigned long bit;
|
||||
int summary_set, idx;
|
||||
struct s390_map_info *info;
|
||||
struct page *ind_page, *summary_page;
|
||||
void *map;
|
||||
|
||||
info = get_map_info(adapter, adapter_int->ind_addr);
|
||||
if (!info)
|
||||
ind_page = get_map_page(kvm, adapter_int->ind_addr);
|
||||
if (!ind_page)
|
||||
return -1;
|
||||
map = page_address(info->page);
|
||||
bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
|
||||
set_bit(bit, map);
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
|
||||
set_page_dirty_lock(info->page);
|
||||
info = get_map_info(adapter, adapter_int->summary_addr);
|
||||
if (!info) {
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
summary_page = get_map_page(kvm, adapter_int->summary_addr);
|
||||
if (!summary_page) {
|
||||
put_page(ind_page);
|
||||
return -1;
|
||||
}
|
||||
map = page_address(info->page);
|
||||
bit = get_ind_bit(info->addr, adapter_int->summary_offset,
|
||||
adapter->swap);
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
map = page_address(ind_page);
|
||||
bit = get_ind_bit(adapter_int->ind_addr,
|
||||
adapter_int->ind_offset, adapter->swap);
|
||||
set_bit(bit, map);
|
||||
mark_page_dirty(kvm, adapter_int->ind_addr >> PAGE_SHIFT);
|
||||
set_page_dirty_lock(ind_page);
|
||||
map = page_address(summary_page);
|
||||
bit = get_ind_bit(adapter_int->summary_addr,
|
||||
adapter_int->summary_offset, adapter->swap);
|
||||
summary_set = test_and_set_bit(bit, map);
|
||||
mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
|
||||
set_page_dirty_lock(info->page);
|
||||
mark_page_dirty(kvm, adapter_int->summary_addr >> PAGE_SHIFT);
|
||||
set_page_dirty_lock(summary_page);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
put_page(ind_page);
|
||||
put_page(summary_page);
|
||||
return summary_set ? 0 : 1;
|
||||
}
|
||||
|
||||
@ -2765,9 +2691,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
|
||||
adapter = get_io_adapter(kvm, e->adapter.adapter_id);
|
||||
if (!adapter)
|
||||
return -1;
|
||||
down_read(&adapter->maps_lock);
|
||||
ret = adapter_indicators_set(kvm, adapter, &e->adapter);
|
||||
up_read(&adapter->maps_lock);
|
||||
if ((ret > 0) && !adapter->masked) {
|
||||
ret = kvm_s390_inject_airq(kvm, adapter);
|
||||
if (ret == 0)
|
||||
@ -2818,23 +2742,27 @@ int kvm_set_routing_entry(struct kvm *kvm,
|
||||
struct kvm_kernel_irq_routing_entry *e,
|
||||
const struct kvm_irq_routing_entry *ue)
|
||||
{
|
||||
int ret;
|
||||
u64 uaddr;
|
||||
|
||||
switch (ue->type) {
|
||||
/* we store the userspace addresses instead of the guest addresses */
|
||||
case KVM_IRQ_ROUTING_S390_ADAPTER:
|
||||
e->set = set_adapter_int;
|
||||
e->adapter.summary_addr = ue->u.adapter.summary_addr;
|
||||
e->adapter.ind_addr = ue->u.adapter.ind_addr;
|
||||
uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
|
||||
if (uaddr == -EFAULT)
|
||||
return -EFAULT;
|
||||
e->adapter.summary_addr = uaddr;
|
||||
uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
|
||||
if (uaddr == -EFAULT)
|
||||
return -EFAULT;
|
||||
e->adapter.ind_addr = uaddr;
|
||||
e->adapter.summary_offset = ue->u.adapter.summary_offset;
|
||||
e->adapter.ind_offset = ue->u.adapter.ind_offset;
|
||||
e->adapter.adapter_id = ue->u.adapter.adapter_id;
|
||||
ret = 0;
|
||||
break;
|
||||
return 0;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
|
||||
|
Loading…
x
Reference in New Issue
Block a user