RISC-V: KVM: Add common nested acceleration support

Add a common nested acceleration support which will be shared by
all parts of KVM RISC-V. This nested acceleration support detects
and enables SBI NACL extension usage based on static keys which
ensures minimum impact on the non-nested scenario.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Reviewed-by: Atish Patra <atishp@rivosinc.com>
Link: https://lore.kernel.org/r/20241020194734.58686-9-apatel@ventanamicro.com
Signed-off-by: Anup Patel <anup@brainfault.org>
This commit is contained in:
Anup Patel 2024-10-21 01:17:29 +05:30 committed by Anup Patel
parent 5daf89e73d
commit d466c19cea
4 changed files with 441 additions and 2 deletions

View File

@ -0,0 +1,239 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024 Ventana Micro Systems Inc.
*/
#ifndef __KVM_NACL_H
#define __KVM_NACL_H
#include <linux/jump_label.h>
#include <linux/percpu.h>
#include <asm/byteorder.h>
#include <asm/csr.h>
#include <asm/sbi.h>
DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_available);
#define kvm_riscv_nacl_available() \
static_branch_unlikely(&kvm_riscv_nacl_available)
DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available);
#define kvm_riscv_nacl_sync_csr_available() \
static_branch_unlikely(&kvm_riscv_nacl_sync_csr_available)
DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available);
#define kvm_riscv_nacl_sync_hfence_available() \
static_branch_unlikely(&kvm_riscv_nacl_sync_hfence_available)
DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available);
#define kvm_riscv_nacl_sync_sret_available() \
static_branch_unlikely(&kvm_riscv_nacl_sync_sret_available)
DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available);
#define kvm_riscv_nacl_autoswap_csr_available() \
static_branch_unlikely(&kvm_riscv_nacl_autoswap_csr_available)
struct kvm_riscv_nacl {
void *shmem;
phys_addr_t shmem_phys;
};
DECLARE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl);
void __kvm_riscv_nacl_hfence(void *shmem,
unsigned long control,
unsigned long page_num,
unsigned long page_count);
int kvm_riscv_nacl_enable(void);
void kvm_riscv_nacl_disable(void);
void kvm_riscv_nacl_exit(void);
int kvm_riscv_nacl_init(void);
#ifdef CONFIG_32BIT
#define lelong_to_cpu(__x) le32_to_cpu(__x)
#define cpu_to_lelong(__x) cpu_to_le32(__x)
#else
#define lelong_to_cpu(__x) le64_to_cpu(__x)
#define cpu_to_lelong(__x) cpu_to_le64(__x)
#endif
#define nacl_shmem() \
this_cpu_ptr(&kvm_riscv_nacl)->shmem
#define nacl_scratch_read_long(__shmem, __offset) \
({ \
unsigned long *__p = (__shmem) + \
SBI_NACL_SHMEM_SCRATCH_OFFSET + \
(__offset); \
lelong_to_cpu(*__p); \
})
#define nacl_scratch_write_long(__shmem, __offset, __val) \
do { \
unsigned long *__p = (__shmem) + \
SBI_NACL_SHMEM_SCRATCH_OFFSET + \
(__offset); \
*__p = cpu_to_lelong(__val); \
} while (0)
#define nacl_scratch_write_longs(__shmem, __offset, __array, __count) \
do { \
unsigned int __i; \
unsigned long *__p = (__shmem) + \
SBI_NACL_SHMEM_SCRATCH_OFFSET + \
(__offset); \
for (__i = 0; __i < (__count); __i++) \
__p[__i] = cpu_to_lelong((__array)[__i]); \
} while (0)
#define nacl_sync_hfence(__e) \
sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SYNC_HFENCE, \
(__e), 0, 0, 0, 0, 0)
#define nacl_hfence_mkconfig(__type, __order, __vmid, __asid) \
({ \
unsigned long __c = SBI_NACL_SHMEM_HFENCE_CONFIG_PEND; \
__c |= ((__type) & SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_MASK) \
<< SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_SHIFT; \
__c |= (((__order) - SBI_NACL_SHMEM_HFENCE_ORDER_BASE) & \
SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_MASK) \
<< SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_SHIFT; \
__c |= ((__vmid) & SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_MASK) \
<< SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_SHIFT; \
__c |= ((__asid) & SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_MASK); \
__c; \
})
#define nacl_hfence_mkpnum(__order, __addr) \
((__addr) >> (__order))
#define nacl_hfence_mkpcount(__order, __size) \
((__size) >> (__order))
#define nacl_hfence_gvma(__shmem, __gpa, __gpsz, __order) \
__kvm_riscv_nacl_hfence(__shmem, \
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA, \
__order, 0, 0), \
nacl_hfence_mkpnum(__order, __gpa), \
nacl_hfence_mkpcount(__order, __gpsz))
#define nacl_hfence_gvma_all(__shmem) \
__kvm_riscv_nacl_hfence(__shmem, \
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_ALL, \
0, 0, 0), 0, 0)
#define nacl_hfence_gvma_vmid(__shmem, __vmid, __gpa, __gpsz, __order) \
__kvm_riscv_nacl_hfence(__shmem, \
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID, \
__order, __vmid, 0), \
nacl_hfence_mkpnum(__order, __gpa), \
nacl_hfence_mkpcount(__order, __gpsz))
#define nacl_hfence_gvma_vmid_all(__shmem, __vmid) \
__kvm_riscv_nacl_hfence(__shmem, \
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID_ALL, \
0, __vmid, 0), 0, 0)
#define nacl_hfence_vvma(__shmem, __vmid, __gva, __gvsz, __order) \
__kvm_riscv_nacl_hfence(__shmem, \
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA, \
__order, __vmid, 0), \
nacl_hfence_mkpnum(__order, __gva), \
nacl_hfence_mkpcount(__order, __gvsz))
#define nacl_hfence_vvma_all(__shmem, __vmid) \
__kvm_riscv_nacl_hfence(__shmem, \
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ALL, \
0, __vmid, 0), 0, 0)
#define nacl_hfence_vvma_asid(__shmem, __vmid, __asid, __gva, __gvsz, __order)\
__kvm_riscv_nacl_hfence(__shmem, \
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID, \
__order, __vmid, __asid), \
nacl_hfence_mkpnum(__order, __gva), \
nacl_hfence_mkpcount(__order, __gvsz))
#define nacl_hfence_vvma_asid_all(__shmem, __vmid, __asid) \
__kvm_riscv_nacl_hfence(__shmem, \
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID_ALL, \
0, __vmid, __asid), 0, 0)
#define nacl_csr_read(__shmem, __csr) \
({ \
unsigned long *__a = (__shmem) + SBI_NACL_SHMEM_CSR_OFFSET; \
lelong_to_cpu(__a[SBI_NACL_SHMEM_CSR_INDEX(__csr)]); \
})
#define nacl_csr_write(__shmem, __csr, __val) \
do { \
void *__s = (__shmem); \
unsigned int __i = SBI_NACL_SHMEM_CSR_INDEX(__csr); \
unsigned long *__a = (__s) + SBI_NACL_SHMEM_CSR_OFFSET; \
u8 *__b = (__s) + SBI_NACL_SHMEM_DBITMAP_OFFSET; \
__a[__i] = cpu_to_lelong(__val); \
__b[__i >> 3] |= 1U << (__i & 0x7); \
} while (0)
#define nacl_csr_swap(__shmem, __csr, __val) \
({ \
void *__s = (__shmem); \
unsigned int __i = SBI_NACL_SHMEM_CSR_INDEX(__csr); \
unsigned long *__a = (__s) + SBI_NACL_SHMEM_CSR_OFFSET; \
u8 *__b = (__s) + SBI_NACL_SHMEM_DBITMAP_OFFSET; \
unsigned long __r = lelong_to_cpu(__a[__i]); \
__a[__i] = cpu_to_lelong(__val); \
__b[__i >> 3] |= 1U << (__i & 0x7); \
__r; \
})
#define nacl_sync_csr(__csr) \
sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SYNC_CSR, \
(__csr), 0, 0, 0, 0, 0)
/*
* Each ncsr_xyz() macro defined below has it's own static-branch so every
* use of ncsr_xyz() macro emits a patchable direct jump. This means multiple
* back-to-back ncsr_xyz() macro usage will emit multiple patchable direct
* jumps which is sub-optimal.
*
* Based on the above, it is recommended to avoid multiple back-to-back
* ncsr_xyz() macro usage.
*/
#define ncsr_read(__csr) \
({ \
unsigned long __r; \
if (kvm_riscv_nacl_available()) \
__r = nacl_csr_read(nacl_shmem(), __csr); \
else \
__r = csr_read(__csr); \
__r; \
})
#define ncsr_write(__csr, __val) \
do { \
if (kvm_riscv_nacl_sync_csr_available()) \
nacl_csr_write(nacl_shmem(), __csr, __val); \
else \
csr_write(__csr, __val); \
} while (0)
#define ncsr_swap(__csr, __val) \
({ \
unsigned long __r; \
if (kvm_riscv_nacl_sync_csr_available()) \
__r = nacl_csr_swap(nacl_shmem(), __csr, __val); \
else \
__r = csr_swap(__csr, __val); \
__r; \
})
#define nsync_csr(__csr) \
do { \
if (kvm_riscv_nacl_sync_csr_available()) \
nacl_sync_csr(__csr); \
} while (0)
#endif

View File

@ -16,6 +16,7 @@ kvm-y += aia_device.o
kvm-y += aia_imsic.o
kvm-y += main.o
kvm-y += mmu.o
kvm-y += nacl.o
kvm-y += tlb.o
kvm-y += vcpu.o
kvm-y += vcpu_exit.o

View File

@ -10,8 +10,8 @@
#include <linux/err.h>
#include <linux/module.h>
#include <linux/kvm_host.h>
#include <asm/csr.h>
#include <asm/cpufeature.h>
#include <asm/kvm_nacl.h>
#include <asm/sbi.h>
long kvm_arch_dev_ioctl(struct file *filp,
@ -22,6 +22,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
int kvm_arch_enable_virtualization_cpu(void)
{
int rc;
rc = kvm_riscv_nacl_enable();
if (rc)
return rc;
csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
@ -49,17 +55,21 @@ void kvm_arch_disable_virtualization_cpu(void)
csr_write(CSR_HVIP, 0);
csr_write(CSR_HEDELEG, 0);
csr_write(CSR_HIDELEG, 0);
kvm_riscv_nacl_disable();
}
static void kvm_riscv_teardown(void)
{
kvm_riscv_aia_exit();
kvm_riscv_nacl_exit();
kvm_unregister_perf_callbacks();
}
static int __init riscv_kvm_init(void)
{
int rc;
char slist[64];
const char *str;
if (!riscv_isa_extension_available(NULL, h)) {
@ -77,16 +87,53 @@ static int __init riscv_kvm_init(void)
return -ENODEV;
}
rc = kvm_riscv_nacl_init();
if (rc && rc != -ENODEV)
return rc;
kvm_riscv_gstage_mode_detect();
kvm_riscv_gstage_vmid_detect();
rc = kvm_riscv_aia_init();
if (rc && rc != -ENODEV)
if (rc && rc != -ENODEV) {
kvm_riscv_nacl_exit();
return rc;
}
kvm_info("hypervisor extension available\n");
if (kvm_riscv_nacl_available()) {
rc = 0;
slist[0] = '\0';
if (kvm_riscv_nacl_sync_csr_available()) {
if (rc)
strcat(slist, ", ");
strcat(slist, "sync_csr");
rc++;
}
if (kvm_riscv_nacl_sync_hfence_available()) {
if (rc)
strcat(slist, ", ");
strcat(slist, "sync_hfence");
rc++;
}
if (kvm_riscv_nacl_sync_sret_available()) {
if (rc)
strcat(slist, ", ");
strcat(slist, "sync_sret");
rc++;
}
if (kvm_riscv_nacl_autoswap_csr_available()) {
if (rc)
strcat(slist, ", ");
strcat(slist, "autoswap_csr");
rc++;
}
kvm_info("using SBI nested acceleration with %s\n",
(rc) ? slist : "no features");
}
switch (kvm_riscv_gstage_mode()) {
case HGATP_MODE_SV32X4:
str = "Sv32x4";

152
arch/riscv/kvm/nacl.c Normal file
View File

@ -0,0 +1,152 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2024 Ventana Micro Systems Inc.
*/
#include <linux/kvm_host.h>
#include <linux/vmalloc.h>
#include <asm/kvm_nacl.h>
DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_available);
DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available);
DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available);
DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available);
DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available);
DEFINE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl);
void __kvm_riscv_nacl_hfence(void *shmem,
unsigned long control,
unsigned long page_num,
unsigned long page_count)
{
int i, ent = -1, try_count = 5;
unsigned long *entp;
again:
for (i = 0; i < SBI_NACL_SHMEM_HFENCE_ENTRY_MAX; i++) {
entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i);
if (lelong_to_cpu(*entp) & SBI_NACL_SHMEM_HFENCE_CONFIG_PEND)
continue;
ent = i;
break;
}
if (ent < 0) {
if (try_count) {
nacl_sync_hfence(-1UL);
goto again;
} else {
pr_warn("KVM: No free entry in NACL shared memory\n");
return;
}
}
entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i);
*entp = cpu_to_lelong(control);
entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PNUM(i);
*entp = cpu_to_lelong(page_num);
entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PCOUNT(i);
*entp = cpu_to_lelong(page_count);
}
int kvm_riscv_nacl_enable(void)
{
int rc;
struct sbiret ret;
struct kvm_riscv_nacl *nacl;
if (!kvm_riscv_nacl_available())
return 0;
nacl = this_cpu_ptr(&kvm_riscv_nacl);
ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM,
nacl->shmem_phys, 0, 0, 0, 0, 0);
rc = sbi_err_map_linux_errno(ret.error);
if (rc)
return rc;
return 0;
}
void kvm_riscv_nacl_disable(void)
{
if (!kvm_riscv_nacl_available())
return;
sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM,
SBI_SHMEM_DISABLE, SBI_SHMEM_DISABLE, 0, 0, 0, 0);
}
void kvm_riscv_nacl_exit(void)
{
int cpu;
struct kvm_riscv_nacl *nacl;
if (!kvm_riscv_nacl_available())
return;
/* Allocate per-CPU shared memory */
for_each_possible_cpu(cpu) {
nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu);
if (!nacl->shmem)
continue;
free_pages((unsigned long)nacl->shmem,
get_order(SBI_NACL_SHMEM_SIZE));
nacl->shmem = NULL;
nacl->shmem_phys = 0;
}
}
static long nacl_probe_feature(long feature_id)
{
struct sbiret ret;
if (!kvm_riscv_nacl_available())
return 0;
ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_PROBE_FEATURE,
feature_id, 0, 0, 0, 0, 0);
return ret.value;
}
int kvm_riscv_nacl_init(void)
{
int cpu;
struct page *shmem_page;
struct kvm_riscv_nacl *nacl;
if (sbi_spec_version < sbi_mk_version(1, 0) ||
sbi_probe_extension(SBI_EXT_NACL) <= 0)
return -ENODEV;
/* Enable NACL support */
static_branch_enable(&kvm_riscv_nacl_available);
/* Probe NACL features */
if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_CSR))
static_branch_enable(&kvm_riscv_nacl_sync_csr_available);
if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_HFENCE))
static_branch_enable(&kvm_riscv_nacl_sync_hfence_available);
if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_SRET))
static_branch_enable(&kvm_riscv_nacl_sync_sret_available);
if (nacl_probe_feature(SBI_NACL_FEAT_AUTOSWAP_CSR))
static_branch_enable(&kvm_riscv_nacl_autoswap_csr_available);
/* Allocate per-CPU shared memory */
for_each_possible_cpu(cpu) {
nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu);
shmem_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
get_order(SBI_NACL_SHMEM_SIZE));
if (!shmem_page) {
kvm_riscv_nacl_exit();
return -ENOMEM;
}
nacl->shmem = page_to_virt(shmem_page);
nacl->shmem_phys = page_to_phys(shmem_page);
}
return 0;
}