asm-generic: pgalloc: provide generic __pgd_{alloc,free}

We already have a generic implementation of alloc/free up to P4D level, as
well as pgd_free().  Let's finish the work and add a generic PGD-level
alloc helper as well.

Unlike at lower levels, almost all architectures need some specific magic
at PGD level (typically initialising PGD entries), so introducing a
generic pgd_alloc() isn't worth it.  Instead we introduce two new helpers,
__pgd_alloc() and __pgd_free(), and make use of them in the arch-specific
pgd_alloc() and pgd_free() wherever possible.  To accommodate as many arch
as possible, __pgd_alloc() takes a page allocation order.

Because pagetable_alloc() allocates zeroed pages, explicit zeroing in
pgd_alloc() becomes redundant and we can get rid of it.  Some trivial
implementations of pgd_free() also become unnecessary once __pgd_alloc()
is used; remove them.

Another small improvement is consistent accounting of PGD pages by using
GFP_PGTABLE_{USER,KERNEL} as appropriate.

Not all PGD allocations can be handled by the generic helpers.  In
particular, multiple architectures allocate PGDs from a kmem_cache, and
those PGDs may not be page-sized.

Link: https://lkml.kernel.org/r/20250103184415.2744423-6-kevin.brodsky@arm.com
Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Kevin Brodsky 2025-01-03 18:44:14 +00:00 committed by Andrew Morton
parent ca50b70a5f
commit 3a8c294d0c
19 changed files with 65 additions and 80 deletions

View File

@ -42,7 +42,7 @@ pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *ret, *init; pgd_t *ret, *init;
ret = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); ret = __pgd_alloc(mm, 0);
init = pgd_offset(&init_mm, 0UL); init = pgd_offset(&init_mm, 0UL);
if (ret) { if (ret) {
#ifdef CONFIG_ALPHA_LARGE_VMALLOC #ifdef CONFIG_ALPHA_LARGE_VMALLOC

View File

@ -53,19 +53,14 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_
static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL); pgd_t *ret = __pgd_alloc(mm, 0);
if (ret) { if (ret) {
int num, num2; int num, num2;
num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
memzero(ret, num * sizeof(pgd_t));
num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
num2 = VMALLOC_SIZE / PGDIR_SIZE; num2 = VMALLOC_SIZE / PGDIR_SIZE;
memcpy(ret + num, swapper_pg_dir + num, num2 * sizeof(pgd_t)); memcpy(ret + num, swapper_pg_dir + num, num2 * sizeof(pgd_t));
memzero(ret + num + num2,
(PTRS_PER_PGD - num - num2) * sizeof(pgd_t));
} }
return ret; return ret;
} }

View File

@ -17,11 +17,11 @@
#include "mm.h" #include "mm.h"
#ifdef CONFIG_ARM_LPAE #ifdef CONFIG_ARM_LPAE
#define _pgd_alloc(mm) kmalloc_array(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL) #define _pgd_alloc(mm) kmalloc_array(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL | __GFP_ZERO)
#define _pgd_free(mm, pgd) kfree(pgd) #define _pgd_free(mm, pgd) kfree(pgd)
#else #else
#define _pgd_alloc(mm) (pgd_t *)__get_free_pages(GFP_KERNEL, 2) #define _pgd_alloc(mm) __pgd_alloc(mm, 2)
#define _pgd_free(mm, pgd) free_pages((unsigned long)pgd, 2) #define _pgd_free(mm, pgd) __pgd_free(mm, pgd)
#endif #endif
/* /*
@ -39,8 +39,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
if (!new_pgd) if (!new_pgd)
goto no_pgd; goto no_pgd;
memset(new_pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
/* /*
* Copy over the kernel and IO PGD entries * Copy over the kernel and IO PGD entries
*/ */

View File

@ -33,7 +33,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
gfp_t gfp = GFP_PGTABLE_USER; gfp_t gfp = GFP_PGTABLE_USER;
if (pgdir_is_page_size()) if (pgdir_is_page_size())
return (pgd_t *)__get_free_page(gfp); return __pgd_alloc(mm, 0);
else else
return kmem_cache_alloc(pgd_cache, gfp); return kmem_cache_alloc(pgd_cache, gfp);
} }
@ -41,7 +41,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
void pgd_free(struct mm_struct *mm, pgd_t *pgd) void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{ {
if (pgdir_is_page_size()) if (pgdir_is_page_size())
free_page((unsigned long)pgd); __pgd_free(mm, pgd);
else else
kmem_cache_free(pgd_cache, pgd); kmem_cache_free(pgd_cache, pgd);
} }

View File

@ -44,7 +44,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
pgd_t *ret; pgd_t *ret;
pgd_t *init; pgd_t *init;
ret = (pgd_t *) __get_free_page(GFP_KERNEL); ret = __pgd_alloc(mm, 0);
if (ret) { if (ret) {
init = pgd_offset(&init_mm, 0UL); init = pgd_offset(&init_mm, 0UL);
pgd_init((unsigned long *)ret); pgd_init((unsigned long *)ret);

View File

@ -22,7 +22,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *pgd; pgd_t *pgd;
pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); pgd = __pgd_alloc(mm, 0);
/* /*
* There may be better ways to do this, but to ensure * There may be better ways to do this, but to ensure

View File

@ -23,11 +23,10 @@ EXPORT_SYMBOL(tlb_virt_to_page);
pgd_t *pgd_alloc(struct mm_struct *mm) pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *init, *ret = NULL; pgd_t *init, *ret;
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
if (ptdesc) { ret = __pgd_alloc(mm, 0);
ret = (pgd_t *)ptdesc_address(ptdesc); if (ret) {
init = pgd_offset(&init_mm, 0UL); init = pgd_offset(&init_mm, 0UL);
pgd_init(ret); pgd_init(ret);
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,

View File

@ -43,7 +43,7 @@ static inline pgd_t * pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *new_pgd; pgd_t *new_pgd;
new_pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); new_pgd = __pgd_alloc(mm, 0);
memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE); memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE);
memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT)); memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT));
return new_pgd; return new_pgd;

View File

@ -21,12 +21,7 @@
extern void __bad_pte(pmd_t *pmd); extern void __bad_pte(pmd_t *pmd);
static inline pgd_t *get_pgd(void) #define pgd_alloc(mm) __pgd_alloc(mm, 0)
{
return (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0);
}
#define pgd_alloc(mm) get_pgd()
extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);

View File

@ -15,7 +15,6 @@
#define __HAVE_ARCH_PMD_ALLOC_ONE #define __HAVE_ARCH_PMD_ALLOC_ONE
#define __HAVE_ARCH_PUD_ALLOC_ONE #define __HAVE_ARCH_PUD_ALLOC_ONE
#define __HAVE_ARCH_PGD_FREE
#include <asm-generic/pgalloc.h> #include <asm-generic/pgalloc.h>
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
@ -49,11 +48,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
extern void pgd_init(void *addr); extern void pgd_init(void *addr);
extern pgd_t *pgd_alloc(struct mm_struct *mm); extern pgd_t *pgd_alloc(struct mm_struct *mm);
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
pagetable_free(virt_to_ptdesc(pgd));
}
#define __pte_free_tlb(tlb, pte, address) \ #define __pte_free_tlb(tlb, pte, address) \
do { \ do { \
pagetable_dtor(page_ptdesc(pte)); \ pagetable_dtor(page_ptdesc(pte)); \

View File

@ -10,12 +10,10 @@
pgd_t *pgd_alloc(struct mm_struct *mm) pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *init, *ret = NULL; pgd_t *init, *ret;
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM,
PGD_TABLE_ORDER);
if (ptdesc) { ret = __pgd_alloc(mm, PGD_TABLE_ORDER);
ret = ptdesc_address(ptdesc); if (ret) {
init = pgd_offset(&init_mm, 0UL); init = pgd_offset(&init_mm, 0UL);
pgd_init(ret); pgd_init(ret);
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,

View File

@ -11,6 +11,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <asm/cpuinfo.h> #include <asm/cpuinfo.h>
#include <asm/pgalloc.h>
/* pteaddr: /* pteaddr:
* ptbase | vpn* | zero * ptbase | vpn* | zero
@ -54,7 +55,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *ret, *init; pgd_t *ret, *init;
ret = (pgd_t *) __get_free_page(GFP_KERNEL); ret = __pgd_alloc(mm, 0);
if (ret) { if (ret) {
init = pgd_offset(&init_mm, 0UL); init = pgd_offset(&init_mm, 0UL);
pgd_init(ret); pgd_init(ret);

View File

@ -41,15 +41,13 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
*/ */
static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *ret = (pgd_t *)__get_free_page(GFP_KERNEL); pgd_t *ret = __pgd_alloc(mm, 0);
if (ret) { if (ret)
memset(ret, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
memcpy(ret + USER_PTRS_PER_PGD, memcpy(ret + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
return ret; return ret;
} }

View File

@ -11,26 +11,12 @@
#include <asm/cache.h> #include <asm/cache.h>
#define __HAVE_ARCH_PMD_ALLOC_ONE #define __HAVE_ARCH_PMD_ALLOC_ONE
#define __HAVE_ARCH_PGD_FREE
#include <asm-generic/pgalloc.h> #include <asm-generic/pgalloc.h>
/* Allocate the top level pgd (page directory) */ /* Allocate the top level pgd (page directory) */
static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *pgd; return __pgd_alloc(mm, PGD_TABLE_ORDER);
pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
if (unlikely(pgd == NULL))
return NULL;
memset(pgd, 0, PAGE_SIZE << PGD_TABLE_ORDER);
return pgd;
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
free_pages((unsigned long)pgd, PGD_TABLE_ORDER);
} }
#if CONFIG_PGTABLE_LEVELS == 3 #if CONFIG_PGTABLE_LEVELS == 3

View File

@ -130,9 +130,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *pgd; pgd_t *pgd;
pgd = (pgd_t *)__get_free_page(GFP_KERNEL); pgd = __pgd_alloc(mm, 0);
if (likely(pgd != NULL)) { if (likely(pgd != NULL)) {
memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
/* Copy kernel mappings */ /* Copy kernel mappings */
sync_kernel_mappings(pgd); sync_kernel_mappings(pgd);
} }

View File

@ -214,14 +214,13 @@ void free_initmem(void)
pgd_t *pgd_alloc(struct mm_struct *mm) pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); pgd_t *pgd = __pgd_alloc(mm, 0);
if (pgd) { if (pgd)
memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
memcpy(pgd + USER_PTRS_PER_PGD, memcpy(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
return pgd; return pgd;
} }

View File

@ -397,15 +397,14 @@ void __init pgtable_cache_init(void)
SLAB_PANIC, NULL); SLAB_PANIC, NULL);
} }
static inline pgd_t *_pgd_alloc(void) static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
{ {
/* /*
* If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain. * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
* We allocate one page for pgd. * We allocate one page for pgd.
*/ */
if (!SHARED_KERNEL_PMD) if (!SHARED_KERNEL_PMD)
return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, return __pgd_alloc(mm, PGD_ALLOCATION_ORDER);
PGD_ALLOCATION_ORDER);
/* /*
* Now PAE kernel is not running as a Xen domain. We can allocate * Now PAE kernel is not running as a Xen domain. We can allocate
@ -414,24 +413,23 @@ static inline pgd_t *_pgd_alloc(void)
return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER); return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
} }
static inline void _pgd_free(pgd_t *pgd) static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
{ {
if (!SHARED_KERNEL_PMD) if (!SHARED_KERNEL_PMD)
free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); __pgd_free(mm, pgd);
else else
kmem_cache_free(pgd_cache, pgd); kmem_cache_free(pgd_cache, pgd);
} }
#else #else
static inline pgd_t *_pgd_alloc(void) static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
{ {
return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, return __pgd_alloc(mm, PGD_ALLOCATION_ORDER);
PGD_ALLOCATION_ORDER);
} }
static inline void _pgd_free(pgd_t *pgd) static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
{ {
free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); __pgd_free(mm, pgd);
} }
#endif /* CONFIG_X86_PAE */ #endif /* CONFIG_X86_PAE */
@ -441,7 +439,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
pmd_t *pmds[MAX_PREALLOCATED_PMDS]; pmd_t *pmds[MAX_PREALLOCATED_PMDS];
pgd = _pgd_alloc(); pgd = _pgd_alloc(mm);
if (pgd == NULL) if (pgd == NULL)
goto out; goto out;
@ -484,7 +482,7 @@ out_free_pmds:
if (sizeof(pmds) != 0) if (sizeof(pmds) != 0)
free_pmds(mm, pmds, PREALLOCATED_PMDS); free_pmds(mm, pmds, PREALLOCATED_PMDS);
out_free_pgd: out_free_pgd:
_pgd_free(pgd); _pgd_free(mm, pgd);
out: out:
return NULL; return NULL;
} }
@ -494,7 +492,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
pgd_mop_up_pmds(mm, pgd); pgd_mop_up_pmds(mm, pgd);
pgd_dtor(pgd); pgd_dtor(pgd);
paravirt_pgd_free(mm, pgd); paravirt_pgd_free(mm, pgd);
_pgd_free(pgd); _pgd_free(mm, pgd);
} }
/* /*

View File

@ -29,7 +29,7 @@
static inline pgd_t* static inline pgd_t*
pgd_alloc(struct mm_struct *mm) pgd_alloc(struct mm_struct *mm)
{ {
return (pgd_t*) __get_free_page(GFP_KERNEL | __GFP_ZERO); return __pgd_alloc(mm, 0);
} }
static inline void ptes_clear(pte_t *ptep) static inline void ptes_clear(pte_t *ptep)

View File

@ -258,10 +258,35 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
#endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 4 */
static inline pgd_t *__pgd_alloc_noprof(struct mm_struct *mm, unsigned int order)
{
gfp_t gfp = GFP_PGTABLE_USER;
struct ptdesc *ptdesc;
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
gfp &= ~__GFP_HIGHMEM;
ptdesc = pagetable_alloc_noprof(gfp, order);
if (!ptdesc)
return NULL;
return ptdesc_address(ptdesc);
}
#define __pgd_alloc(...) alloc_hooks(__pgd_alloc_noprof(__VA_ARGS__))
static inline void __pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
struct ptdesc *ptdesc = virt_to_ptdesc(pgd);
BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
pagetable_free(ptdesc);
}
#ifndef __HAVE_ARCH_PGD_FREE #ifndef __HAVE_ARCH_PGD_FREE
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{ {
pagetable_free(virt_to_ptdesc(pgd)); __pgd_free(mm, pgd);
} }
#endif #endif