x86/mm: Do not use set_{pud, pmd}_safe() when splitting a large page

The commit

  0a9fe8ca84 ("x86/mm: Validate kernel_physical_mapping_init() PTE population")

triggers this warning in SEV guests:

  WARNING: CPU: 0 PID: 0 at arch/x86/include/asm/pgalloc.h:87 phys_pmd_init+0x30d/0x386
  Call Trace:
   kernel_physical_mapping_init+0xce/0x259
   early_set_memory_enc_dec+0x10f/0x160
   kvm_smp_prepare_boot_cpu+0x71/0x9d
   start_kernel+0x1c9/0x50b
   secondary_startup_64+0xa4/0xb0

A SEV guest calls kernel_physical_mapping_init() to clear the encryption
mask from an existing mapping. While doing so, it also splits large
pages into smaller.

To split a page, kernel_physical_mapping_init() allocates a new page and
updates the existing entry. The set_{pud,pmd}_safe() helpers trigger a
warning when updating an entry with a page in the present state.

Add a new kernel_physical_mapping_change() helper which uses the
non-safe variants of set_{pmd,pud,p4d}() and {pmd,pud,p4d}_populate()
routines when updating the entry.

Since kernel_physical_mapping_change() may replace an existing
entry with a new entry, the caller is responsible to flush
the TLB at the end. Change early_set_memory_enc_dec() to use
kernel_physical_mapping_change() when it wants to clear the memory
encryption mask from the page table entry.

 [ bp:
   - massage commit message.
   - flesh out comment according to dhansen's request.
   - align function arguments at opening brace. ]

Fixes: 0a9fe8ca84 ("x86/mm: Validate kernel_physical_mapping_init() PTE population")
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Acked-by:  Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Lendacky <Thomas.Lendacky@amd.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190417154102.22613-1-brijesh.singh@amd.com
This commit is contained in:
Brijesh Singh 2019-04-17 15:41:17 +00:00 committed by Borislav Petkov
parent 0e72499c3c
commit eccd906484
3 changed files with 114 additions and 43 deletions

View File

@ -58,6 +58,37 @@
#include "ident_map.c" #include "ident_map.c"
#define DEFINE_POPULATE(fname, type1, type2, init) \
static inline void fname##_init(struct mm_struct *mm, \
type1##_t *arg1, type2##_t *arg2, bool init) \
{ \
if (init) \
fname##_safe(mm, arg1, arg2); \
else \
fname(mm, arg1, arg2); \
}
DEFINE_POPULATE(p4d_populate, p4d, pud, init)
DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
DEFINE_POPULATE(pud_populate, pud, pmd, init)
DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
#define DEFINE_ENTRY(type1, type2, init) \
static inline void set_##type1##_init(type1##_t *arg1, \
type2##_t arg2, bool init) \
{ \
if (init) \
set_##type1##_safe(arg1, arg2); \
else \
set_##type1(arg1, arg2); \
}
DEFINE_ENTRY(p4d, p4d, init)
DEFINE_ENTRY(pud, pud, init)
DEFINE_ENTRY(pmd, pmd, init)
DEFINE_ENTRY(pte, pte, init)
/* /*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move * physical space so we can cache the place of the first one and move
@ -414,7 +445,7 @@ void __init cleanup_highmap(void)
*/ */
static unsigned long __meminit static unsigned long __meminit
phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
pgprot_t prot) pgprot_t prot, bool init)
{ {
unsigned long pages = 0, paddr_next; unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end; unsigned long paddr_last = paddr_end;
@ -432,7 +463,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) && E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PAGE_MASK, paddr_next, !e820__mapped_any(paddr & PAGE_MASK, paddr_next,
E820_TYPE_RESERVED_KERN)) E820_TYPE_RESERVED_KERN))
set_pte_safe(pte, __pte(0)); set_pte_init(pte, __pte(0), init);
continue; continue;
} }
@ -452,7 +483,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
pages++; pages++;
set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
} }
@ -468,7 +499,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
*/ */
static unsigned long __meminit static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask, pgprot_t prot) unsigned long page_size_mask, pgprot_t prot, bool init)
{ {
unsigned long pages = 0, paddr_next; unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end; unsigned long paddr_last = paddr_end;
@ -487,7 +518,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) && E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PMD_MASK, paddr_next, !e820__mapped_any(paddr & PMD_MASK, paddr_next,
E820_TYPE_RESERVED_KERN)) E820_TYPE_RESERVED_KERN))
set_pmd_safe(pmd, __pmd(0)); set_pmd_init(pmd, __pmd(0), init);
continue; continue;
} }
@ -496,7 +527,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pte = (pte_t *)pmd_page_vaddr(*pmd); pte = (pte_t *)pmd_page_vaddr(*pmd);
paddr_last = phys_pte_init(pte, paddr, paddr_last = phys_pte_init(pte, paddr,
paddr_end, prot); paddr_end, prot,
init);
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
continue; continue;
} }
@ -524,19 +556,20 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_2M)) { if (page_size_mask & (1<<PG_LEVEL_2M)) {
pages++; pages++;
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
set_pte_safe((pte_t *)pmd, set_pte_init((pte_t *)pmd,
pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
__pgprot(pgprot_val(prot) | _PAGE_PSE))); __pgprot(pgprot_val(prot) | _PAGE_PSE)),
init);
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next; paddr_last = paddr_next;
continue; continue;
} }
pte = alloc_low_page(); pte = alloc_low_page();
paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot); paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pmd_populate_kernel_safe(&init_mm, pmd, pte); pmd_populate_kernel_init(&init_mm, pmd, pte, init);
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
} }
update_page_count(PG_LEVEL_2M, pages); update_page_count(PG_LEVEL_2M, pages);
@ -551,7 +584,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
*/ */
static unsigned long __meminit static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask) unsigned long page_size_mask, bool init)
{ {
unsigned long pages = 0, paddr_next; unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end; unsigned long paddr_last = paddr_end;
@ -573,7 +606,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) && E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PUD_MASK, paddr_next, !e820__mapped_any(paddr & PUD_MASK, paddr_next,
E820_TYPE_RESERVED_KERN)) E820_TYPE_RESERVED_KERN))
set_pud_safe(pud, __pud(0)); set_pud_init(pud, __pud(0), init);
continue; continue;
} }
@ -583,7 +616,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
paddr_last = phys_pmd_init(pmd, paddr, paddr_last = phys_pmd_init(pmd, paddr,
paddr_end, paddr_end,
page_size_mask, page_size_mask,
prot); prot, init);
continue; continue;
} }
/* /*
@ -610,9 +643,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_1G)) { if (page_size_mask & (1<<PG_LEVEL_1G)) {
pages++; pages++;
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
set_pte_safe((pte_t *)pud, set_pte_init((pte_t *)pud,
pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
PAGE_KERNEL_LARGE)); PAGE_KERNEL_LARGE),
init);
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next; paddr_last = paddr_next;
continue; continue;
@ -620,10 +654,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
pmd = alloc_low_page(); pmd = alloc_low_page();
paddr_last = phys_pmd_init(pmd, paddr, paddr_end, paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
page_size_mask, prot); page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pud_populate_safe(&init_mm, pud, pmd); pud_populate_init(&init_mm, pud, pmd, init);
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
} }
@ -634,14 +668,15 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
static unsigned long __meminit static unsigned long __meminit
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask) unsigned long page_size_mask, bool init)
{ {
unsigned long paddr_next, paddr_last = paddr_end; unsigned long paddr_next, paddr_last = paddr_end;
unsigned long vaddr = (unsigned long)__va(paddr); unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr); int i = p4d_index(vaddr);
if (!pgtable_l5_enabled()) if (!pgtable_l5_enabled())
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
page_size_mask, init);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
p4d_t *p4d; p4d_t *p4d;
@ -657,39 +692,34 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) && E820_TYPE_RAM) &&
!e820__mapped_any(paddr & P4D_MASK, paddr_next, !e820__mapped_any(paddr & P4D_MASK, paddr_next,
E820_TYPE_RESERVED_KERN)) E820_TYPE_RESERVED_KERN))
set_p4d_safe(p4d, __p4d(0)); set_p4d_init(p4d, __p4d(0), init);
continue; continue;
} }
if (!p4d_none(*p4d)) { if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, 0); pud = pud_offset(p4d, 0);
paddr_last = phys_pud_init(pud, paddr, paddr_last = phys_pud_init(pud, paddr, paddr_end,
paddr_end, page_size_mask, init);
page_size_mask);
continue; continue;
} }
pud = alloc_low_page(); pud = alloc_low_page();
paddr_last = phys_pud_init(pud, paddr, paddr_end, paddr_last = phys_pud_init(pud, paddr, paddr_end,
page_size_mask); page_size_mask, init);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
p4d_populate_safe(&init_mm, p4d, pud); p4d_populate_init(&init_mm, p4d, pud, init);
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
} }
return paddr_last; return paddr_last;
} }
/* static unsigned long __meminit
* Create page table mapping for the physical memory for specific physical __kernel_physical_mapping_init(unsigned long paddr_start,
* addresses. The virtual and physical addresses have to be aligned on PMD level unsigned long paddr_end,
* down. It returns the last physical address mapped. unsigned long page_size_mask,
*/ bool init)
unsigned long __meminit
kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask)
{ {
bool pgd_changed = false; bool pgd_changed = false;
unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
@ -709,19 +739,22 @@ kernel_physical_mapping_init(unsigned long paddr_start,
p4d = (p4d_t *)pgd_page_vaddr(*pgd); p4d = (p4d_t *)pgd_page_vaddr(*pgd);
paddr_last = phys_p4d_init(p4d, __pa(vaddr), paddr_last = phys_p4d_init(p4d, __pa(vaddr),
__pa(vaddr_end), __pa(vaddr_end),
page_size_mask); page_size_mask,
init);
continue; continue;
} }
p4d = alloc_low_page(); p4d = alloc_low_page();
paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
page_size_mask); page_size_mask, init);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
if (pgtable_l5_enabled()) if (pgtable_l5_enabled())
pgd_populate_safe(&init_mm, pgd, p4d); pgd_populate_init(&init_mm, pgd, p4d, init);
else else
p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
(pud_t *) p4d, init);
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
pgd_changed = true; pgd_changed = true;
} }
@ -732,6 +765,37 @@ kernel_physical_mapping_init(unsigned long paddr_start,
return paddr_last; return paddr_last;
} }
/*
* Create page table mapping for the physical memory for specific physical
* addresses. Note that it can only be used to populate non-present entries.
* The virtual and physical addresses have to be aligned on PMD level
* down. It returns the last physical address mapped.
*/
unsigned long __meminit
kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask)
{
return __kernel_physical_mapping_init(paddr_start, paddr_end,
page_size_mask, true);
}
/*
* This function is similar to kernel_physical_mapping_init() above with the
* exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
* when updating the mapping. The caller is responsible to flush the TLBs after
* the function returns.
*/
unsigned long __meminit
kernel_physical_mapping_change(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask)
{
return __kernel_physical_mapping_init(paddr_start, paddr_end,
page_size_mask, false);
}
#ifndef CONFIG_NUMA #ifndef CONFIG_NUMA
void __init initmem_init(void) void __init initmem_init(void)
{ {

View File

@ -301,9 +301,13 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
else else
split_page_size_mask = 1 << PG_LEVEL_2M; split_page_size_mask = 1 << PG_LEVEL_2M;
kernel_physical_mapping_init(__pa(vaddr & pmask), /*
__pa((vaddr_end & pmask) + psize), * kernel_physical_mapping_change() does not flush the TLBs, so
split_page_size_mask); * a TLB flush is required after we exit from the for loop.
*/
kernel_physical_mapping_change(__pa(vaddr & pmask),
__pa((vaddr_end & pmask) + psize),
split_page_size_mask);
} }
ret = 0; ret = 0;

View File

@ -13,6 +13,9 @@ void early_ioremap_page_table_range_init(void);
unsigned long kernel_physical_mapping_init(unsigned long start, unsigned long kernel_physical_mapping_init(unsigned long start,
unsigned long end, unsigned long end,
unsigned long page_size_mask); unsigned long page_size_mask);
unsigned long kernel_physical_mapping_change(unsigned long start,
unsigned long end,
unsigned long page_size_mask);
void zone_sizes_init(void); void zone_sizes_init(void);
extern int after_bootmem; extern int after_bootmem;