mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-09 06:33:34 +00:00
Merge branch 'kvm-guest-sev-migration' into kvm-master
Add guest api and guest kernel support for SEV live migration. Introduces a new hypercall to notify the host of changes to the page encryption status. If the page is encrypted then it must be migrated through the SEV firmware or a helper VM sharing the key. If page is not encrypted then it can be migrated normally by userspace. This new hypercall is invoked using paravirt_ops. Conflicts: sev_active() replaced by cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT).
This commit is contained in:
commit
b9ecb9a997
@ -83,6 +83,18 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
|
||||
unsigned long p2, unsigned long p3)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm volatile("vmmcall"
|
||||
: "=a"(ret)
|
||||
: "a"(nr), "b"(p1), "c"(p2), "d"(p3)
|
||||
: "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST
|
||||
void kvmclock_init(void);
|
||||
void kvmclock_disable(void);
|
||||
|
@ -44,6 +44,8 @@ void __init sme_enable(struct boot_params *bp);
|
||||
|
||||
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
|
||||
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
|
||||
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
|
||||
bool enc);
|
||||
|
||||
void __init mem_encrypt_free_decrypted_mem(void);
|
||||
|
||||
@ -78,6 +80,8 @@ static inline int __init
|
||||
early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
|
||||
static inline int __init
|
||||
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
|
||||
static inline void __init
|
||||
early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
|
||||
|
||||
static inline void mem_encrypt_free_decrypted_mem(void) { }
|
||||
|
||||
|
@ -97,6 +97,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
|
||||
PVOP_VCALL1(mmu.exit_mmap, mm);
|
||||
}
|
||||
|
||||
static inline void notify_page_enc_status_changed(unsigned long pfn,
|
||||
int npages, bool enc)
|
||||
{
|
||||
PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
static inline void load_sp0(unsigned long sp0)
|
||||
{
|
||||
|
@ -168,6 +168,7 @@ struct pv_mmu_ops {
|
||||
|
||||
/* Hook for intercepting the destruction of an mm_struct. */
|
||||
void (*exit_mmap)(struct mm_struct *mm);
|
||||
void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
struct paravirt_callee_save read_cr2;
|
||||
|
@ -83,6 +83,7 @@ int set_pages_rw(struct page *page, int numpages);
|
||||
int set_direct_map_invalid_noflush(struct page *page);
|
||||
int set_direct_map_default_noflush(struct page *page);
|
||||
bool kernel_page_present(struct page *page);
|
||||
void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
|
||||
|
||||
extern int kernel_set_to_readonly;
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <linux/swait.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/cc_platform.h>
|
||||
#include <linux/efi.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/traps.h>
|
||||
@ -41,6 +42,7 @@
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/reboot.h>
|
||||
#include <asm/svm.h>
|
||||
#include <asm/e820/api.h>
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
|
||||
|
||||
@ -434,6 +436,8 @@ static void kvm_guest_cpu_offline(bool shutdown)
|
||||
kvm_disable_steal_time();
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
||||
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
|
||||
if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
|
||||
wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
|
||||
kvm_pv_disable_apf();
|
||||
if (!shutdown)
|
||||
apf_task_wake_all();
|
||||
@ -548,6 +552,55 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
|
||||
__send_ipi_mask(local_mask, vector);
|
||||
}
|
||||
|
||||
static int __init setup_efi_kvm_sev_migration(void)
|
||||
{
|
||||
efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
|
||||
efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
|
||||
efi_status_t status;
|
||||
unsigned long size;
|
||||
bool enabled;
|
||||
|
||||
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
|
||||
!kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
|
||||
return 0;
|
||||
|
||||
if (!efi_enabled(EFI_BOOT))
|
||||
return 0;
|
||||
|
||||
if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
|
||||
pr_info("%s : EFI runtime services are not enabled\n", __func__);
|
||||
return 0;
|
||||
}
|
||||
|
||||
size = sizeof(enabled);
|
||||
|
||||
/* Get variable contents into buffer */
|
||||
status = efi.get_variable(efi_sev_live_migration_enabled,
|
||||
&efi_variable_guid, NULL, &size, &enabled);
|
||||
|
||||
if (status == EFI_NOT_FOUND) {
|
||||
pr_info("%s : EFI live migration variable not found\n", __func__);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (status != EFI_SUCCESS) {
|
||||
pr_info("%s : EFI variable retrieval failed\n", __func__);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (enabled == 0) {
|
||||
pr_info("%s: live migration disabled in EFI\n", __func__);
|
||||
return 0;
|
||||
}
|
||||
|
||||
pr_info("%s : live migration enabled in EFI\n", __func__);
|
||||
wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
late_initcall(setup_efi_kvm_sev_migration);
|
||||
|
||||
/*
|
||||
* Set the IPI entry points
|
||||
*/
|
||||
@ -806,8 +859,62 @@ static bool __init kvm_msi_ext_dest_id(void)
|
||||
return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
|
||||
}
|
||||
|
||||
static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
|
||||
{
|
||||
kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
|
||||
KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
|
||||
}
|
||||
|
||||
static void __init kvm_init_platform(void)
|
||||
{
|
||||
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
|
||||
kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
|
||||
unsigned long nr_pages;
|
||||
int i;
|
||||
|
||||
pv_ops.mmu.notify_page_enc_status_changed =
|
||||
kvm_sev_hc_page_enc_status;
|
||||
|
||||
/*
|
||||
* Reset the host's shared pages list related to kernel
|
||||
* specific page encryption status settings before we load a
|
||||
* new kernel by kexec. Reset the page encryption status
|
||||
* during early boot intead of just before kexec to avoid SMP
|
||||
* races during kvm_pv_guest_cpu_reboot().
|
||||
* NOTE: We cannot reset the complete shared pages list
|
||||
* here as we need to retain the UEFI/OVMF firmware
|
||||
* specific settings.
|
||||
*/
|
||||
|
||||
for (i = 0; i < e820_table->nr_entries; i++) {
|
||||
struct e820_entry *entry = &e820_table->entries[i];
|
||||
|
||||
if (entry->type != E820_TYPE_RAM)
|
||||
continue;
|
||||
|
||||
nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
|
||||
|
||||
kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
|
||||
nr_pages,
|
||||
KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that _bss_decrypted section is marked as decrypted in the
|
||||
* shared pages list.
|
||||
*/
|
||||
nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
|
||||
PAGE_SIZE);
|
||||
early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
|
||||
nr_pages, 0);
|
||||
|
||||
/*
|
||||
* If not booted using EFI, enable Live migration support.
|
||||
*/
|
||||
if (!efi_enabled(EFI_BOOT))
|
||||
wrmsrl(MSR_KVM_MIGRATION_CONTROL,
|
||||
KVM_MIGRATION_READY);
|
||||
}
|
||||
kvmclock_init();
|
||||
x86_platform.apic_post_init = kvm_apic_init;
|
||||
}
|
||||
|
@ -337,6 +337,7 @@ struct paravirt_patch_template pv_ops = {
|
||||
(void (*)(struct mmu_gather *, void *))tlb_remove_page,
|
||||
|
||||
.mmu.exit_mmap = paravirt_nop,
|
||||
.mmu.notify_page_enc_status_changed = paravirt_nop,
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
|
||||
|
@ -229,28 +229,75 @@ void __init sev_setup_arch(void)
|
||||
swiotlb_adjust_size(size);
|
||||
}
|
||||
|
||||
static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
|
||||
{
|
||||
unsigned long pfn = 0;
|
||||
pgprot_t prot;
|
||||
|
||||
switch (level) {
|
||||
case PG_LEVEL_4K:
|
||||
pfn = pte_pfn(*kpte);
|
||||
prot = pte_pgprot(*kpte);
|
||||
break;
|
||||
case PG_LEVEL_2M:
|
||||
pfn = pmd_pfn(*(pmd_t *)kpte);
|
||||
prot = pmd_pgprot(*(pmd_t *)kpte);
|
||||
break;
|
||||
case PG_LEVEL_1G:
|
||||
pfn = pud_pfn(*(pud_t *)kpte);
|
||||
prot = pud_pgprot(*(pud_t *)kpte);
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "Invalid level for kpte\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ret_prot)
|
||||
*ret_prot = prot;
|
||||
|
||||
return pfn;
|
||||
}
|
||||
|
||||
void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
|
||||
{
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
unsigned long sz = npages << PAGE_SHIFT;
|
||||
unsigned long vaddr_end = vaddr + sz;
|
||||
|
||||
while (vaddr < vaddr_end) {
|
||||
int psize, pmask, level;
|
||||
unsigned long pfn;
|
||||
pte_t *kpte;
|
||||
|
||||
kpte = lookup_address(vaddr, &level);
|
||||
if (!kpte || pte_none(*kpte)) {
|
||||
WARN_ONCE(1, "kpte lookup for vaddr\n");
|
||||
return;
|
||||
}
|
||||
|
||||
pfn = pg_level_to_pfn(level, kpte, NULL);
|
||||
if (!pfn)
|
||||
continue;
|
||||
|
||||
psize = page_level_size(level);
|
||||
pmask = page_level_mask(level);
|
||||
|
||||
notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
|
||||
|
||||
vaddr = (vaddr & pmask) + psize;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
|
||||
{
|
||||
pgprot_t old_prot, new_prot;
|
||||
unsigned long pfn, pa, size;
|
||||
pte_t new_pte;
|
||||
|
||||
switch (level) {
|
||||
case PG_LEVEL_4K:
|
||||
pfn = pte_pfn(*kpte);
|
||||
old_prot = pte_pgprot(*kpte);
|
||||
break;
|
||||
case PG_LEVEL_2M:
|
||||
pfn = pmd_pfn(*(pmd_t *)kpte);
|
||||
old_prot = pmd_pgprot(*(pmd_t *)kpte);
|
||||
break;
|
||||
case PG_LEVEL_1G:
|
||||
pfn = pud_pfn(*(pud_t *)kpte);
|
||||
old_prot = pud_pgprot(*(pud_t *)kpte);
|
||||
break;
|
||||
default:
|
||||
pfn = pg_level_to_pfn(level, kpte, &old_prot);
|
||||
if (!pfn)
|
||||
return;
|
||||
}
|
||||
|
||||
new_prot = old_prot;
|
||||
if (enc)
|
||||
@ -286,12 +333,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
|
||||
static int __init early_set_memory_enc_dec(unsigned long vaddr,
|
||||
unsigned long size, bool enc)
|
||||
{
|
||||
unsigned long vaddr_end, vaddr_next;
|
||||
unsigned long vaddr_end, vaddr_next, start;
|
||||
unsigned long psize, pmask;
|
||||
int split_page_size_mask;
|
||||
int level, ret;
|
||||
pte_t *kpte;
|
||||
|
||||
start = vaddr;
|
||||
vaddr_next = vaddr;
|
||||
vaddr_end = vaddr + size;
|
||||
|
||||
@ -346,6 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
|
||||
|
||||
ret = 0;
|
||||
|
||||
notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
|
||||
out:
|
||||
__flush_tlb_all();
|
||||
return ret;
|
||||
@ -361,6 +410,11 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
|
||||
return early_set_memory_enc_dec(vaddr, size, true);
|
||||
}
|
||||
|
||||
void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
|
||||
{
|
||||
notify_range_enc_status_changed(vaddr, npages, enc);
|
||||
}
|
||||
|
||||
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
|
||||
bool force_dma_unencrypted(struct device *dev)
|
||||
{
|
||||
|
@ -2023,6 +2023,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
|
||||
*/
|
||||
cpa_flush(&cpa, 0);
|
||||
|
||||
/*
|
||||
* Notify hypervisor that a given memory range is mapped encrypted
|
||||
* or decrypted.
|
||||
*/
|
||||
notify_range_enc_status_changed(addr, numpages, enc);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -362,6 +362,7 @@ void efi_native_runtime_setup(void);
|
||||
|
||||
/* OEM GUIDs */
|
||||
#define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
|
||||
#define AMD_SEV_MEM_ENCRYPT_GUID EFI_GUID(0x0cf29b71, 0x9e51, 0x433a, 0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
|
||||
|
||||
typedef struct {
|
||||
efi_guid_t guid;
|
||||
|
Loading…
Reference in New Issue
Block a user