KVM: arm64: Remove size-order align in the nVHE hyp private VA range

commit f922c13e778d ("KVM: arm64: Introduce
pkvm_alloc_private_va_range()") and commit 92abe0f81e13 ("KVM: arm64:
Introduce hyp_alloc_private_va_range()") added an alignment for the
start address of any allocation into the nVHE hypervisor private VA
range.

This alignment (order of the size of the allocation) intends to enable
efficient stack verification (if the PAGE_SHIFT bit is zero, the stack
pointer is on the guard page and a stack overflow occurred).

But this is only necessary for stack allocation and can waste a lot of
VA space. So instead make stack-specific functions, handling the guard
page requirements, while other users (e.g.  fixmap) will only get page
alignment.

Reviewed-by: Kalesh Singh <kaleshsingh@google.com>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230811112037.1147863-1-vdonnefort@google.com
This commit is contained in:
Vincent Donnefort 2023-08-11 12:20:37 +01:00 committed by Marc Zyngier
parent a6b33d009f
commit f156a7d13f
6 changed files with 138 additions and 85 deletions

View File

@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **haddr);
int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void **haddr);
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
void __init free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);

View File

@ -2283,30 +2283,8 @@ static int __init init_hyp_mode(void)
for_each_possible_cpu(cpu) {
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
unsigned long hyp_addr;
/*
* Allocate a contiguous HYP private VA range for the stack
* and guard page. The allocation is also aligned based on
* the order of its size.
*/
err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
if (err) {
kvm_err("Cannot allocate hyp stack guard page\n");
goto out_err;
}
/*
* Since the stack grows downwards, map the stack to the page
* at the higher address and leave the lower guard page
* unbacked.
*
* Any valid stack address now has the PAGE_SHIFT bit as 1
* and addresses corresponding to the guard page have the
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
*/
err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
__pa(stack_page), PAGE_HYP);
err = create_hyp_stack(__pa(stack_page), &params->stack_hyp_va);
if (err) {
kvm_err("Cannot map hyp stack\n");
goto out_err;
@ -2319,8 +2297,6 @@ static int __init init_hyp_mode(void)
* has been mapped in the flexible private VA space.
*/
params->stack_pa = __pa(stack_page);
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
for_each_possible_cpu(cpu) {

View File

@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot,
unsigned long *haddr);
int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
#endif /* __KVM_HYP_MM_H */

View File

@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
return err;
}
static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
{
unsigned long cur;
hyp_assert_lock_held(&pkvm_pgd_lock);
if (!start || start < __io_map_base)
return -EINVAL;
/* The allocated size is always a multiple of PAGE_SIZE */
cur = start + PAGE_ALIGN(size);
/* Are we overflowing on the vmemmap ? */
if (cur > __hyp_vmemmap)
return -ENOMEM;
__io_map_base = cur;
return 0;
}
/**
* pkvm_alloc_private_va_range - Allocates a private VA range.
* @size: The size of the VA range to reserve.
@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
*/
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
unsigned long base, addr;
int ret = 0;
unsigned long addr;
int ret;
hyp_spin_lock(&pkvm_pgd_lock);
/* Align the allocation based on the order of its size */
addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
/* The allocated size is always a multiple of PAGE_SIZE */
base = addr + PAGE_ALIGN(size);
/* Are we overflowing on the vmemmap ? */
if (!addr || base > __hyp_vmemmap)
ret = -ENOMEM;
else {
__io_map_base = base;
*haddr = addr;
}
addr = __io_map_base;
ret = __pkvm_alloc_private_va_range(addr, size);
hyp_spin_unlock(&pkvm_pgd_lock);
*haddr = addr;
return ret;
}
@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
}
int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
{
unsigned long addr, prev_base;
size_t size;
int ret;
hyp_spin_lock(&pkvm_pgd_lock);
prev_base = __io_map_base;
/*
* Efficient stack verification using the PAGE_SHIFT bit implies
* an alignment of our allocation on the order of the size.
*/
size = PAGE_SIZE * 2;
addr = ALIGN(__io_map_base, size);
ret = __pkvm_alloc_private_va_range(addr, size);
if (!ret) {
/*
* Since the stack grows downwards, map the stack to the page
* at the higher address and leave the lower guard page
* unbacked.
*
* Any valid stack address now has the PAGE_SHIFT bit as 1
* and addresses corresponding to the guard page have the
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
*/
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
PAGE_SIZE, phys, PAGE_HYP);
if (ret)
__io_map_base = prev_base;
}
hyp_spin_unlock(&pkvm_pgd_lock);
*haddr = addr + size;
return ret;
}
static void *admit_host_page(void *arg)
{
struct kvm_hyp_memcache *host_mc = arg;

View File

@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
for (i = 0; i < hyp_nr_cpus; i++) {
struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
unsigned long hyp_addr;
start = (void *)kern_hyp_va(per_cpu_base[i]);
end = start + PAGE_ALIGN(hyp_percpu_size);
@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
if (ret)
return ret;
/*
* Allocate a contiguous HYP private VA range for the stack
* and guard page. The allocation is also aligned based on
* the order of its size.
*/
ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
ret = pkvm_create_stack(params->stack_pa, &params->stack_hyp_va);
if (ret)
return ret;
/*
* Since the stack grows downwards, map the stack to the page
* at the higher address and leave the lower guard page
* unbacked.
*
* Any valid stack address now has the PAGE_SHIFT bit as 1
* and addresses corresponding to the guard page have the
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
*/
hyp_spin_lock(&pkvm_pgd_lock);
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
PAGE_SIZE, params->stack_pa, PAGE_HYP);
hyp_spin_unlock(&pkvm_pgd_lock);
if (ret)
return ret;
/* Update stack_hyp_va to end of the stack's private VA range */
params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
/*

View File

@ -592,6 +592,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
return 0;
}
static int __hyp_alloc_private_va_range(unsigned long base)
{
lockdep_assert_held(&kvm_hyp_pgd_mutex);
if (!PAGE_ALIGNED(base))
return -EINVAL;
/*
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
* allocating the new area, as it would indicate we've
* overflowed the idmap/IO address range.
*/
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
return -ENOMEM;
io_map_base = base;
return 0;
}
/**
* hyp_alloc_private_va_range - Allocates a private VA range.
@ -612,26 +631,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
/*
* This assumes that we have enough space below the idmap
* page to allocate our VAs. If not, the check below will
* kick. A potential alternative would be to detect that
* overflow and switch to an allocation above the idmap.
* page to allocate our VAs. If not, the check in
* __hyp_alloc_private_va_range() will kick. A potential
* alternative would be to detect that overflow and switch
* to an allocation above the idmap.
*
* The allocated size is always a multiple of PAGE_SIZE.
*/
base = io_map_base - PAGE_ALIGN(size);
/* Align the allocation based on the order of its size */
base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
/*
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
* allocating the new area, as it would indicate we've
* overflowed the idmap/IO address range.
*/
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
ret = -ENOMEM;
else
*haddr = io_map_base = base;
size = PAGE_ALIGN(size);
base = io_map_base - size;
ret = __hyp_alloc_private_va_range(base);
mutex_unlock(&kvm_hyp_pgd_mutex);
@ -668,6 +677,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
return ret;
}
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
{
unsigned long base;
size_t size;
int ret;
mutex_lock(&kvm_hyp_pgd_mutex);
/*
* Efficient stack verification using the PAGE_SHIFT bit implies
* an alignment of our allocation on the order of the size.
*/
size = PAGE_SIZE * 2;
base = ALIGN_DOWN(io_map_base - size, size);
ret = __hyp_alloc_private_va_range(base);
mutex_unlock(&kvm_hyp_pgd_mutex);
if (ret) {
kvm_err("Cannot allocate hyp stack guard page\n");
return ret;
}
/*
* Since the stack grows downwards, map the stack to the page
* at the higher address and leave the lower guard page
* unbacked.
*
* Any valid stack address now has the PAGE_SHIFT bit as 1
* and addresses corresponding to the guard page have the
* PAGE_SHIFT bit as 0 - this is used for overflow detection.
*/
ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
PAGE_HYP);
if (ret)
kvm_err("Cannot map hyp stack\n");
*haddr = base + size;
return ret;
}
/**
* create_hyp_io_mappings - Map IO into both kernel and HYP
* @phys_addr: The physical start address which gets mapped