linux-next/arch/riscv/mm/hugetlbpage.c

336 lines
6.6 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include <linux/hugetlb.h>
#include <linux/err.h>
#ifdef CONFIG_RISCV_ISA_SVNAPOT
pte_t *huge_pte_alloc(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long addr,
unsigned long sz)
{
unsigned long order;
pte_t *pte = NULL;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return NULL;
pud = pud_alloc(mm, p4d, addr);
if (!pud)
return NULL;
if (sz == PUD_SIZE) {
pte = (pte_t *)pud;
goto out;
}
if (sz == PMD_SIZE) {
if (want_pmd_share(vma, addr) && pud_none(*pud))
pte = huge_pmd_share(mm, vma, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
goto out;
}
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return NULL;
for_each_napot_order(order) {
if (napot_cont_size(order) == sz) {
pte = pte_alloc_map(mm, pmd, addr & napot_cont_mask(order));
break;
}
}
out:
WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte));
return pte;
}
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr,
unsigned long sz)
{
unsigned long order;
pte_t *pte = NULL;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
if (!pgd_present(*pgd))
return NULL;
p4d = p4d_offset(pgd, addr);
if (!p4d_present(*p4d))
return NULL;
pud = pud_offset(p4d, addr);
if (sz == PUD_SIZE)
/* must be pud huge, non-present or none */
return (pte_t *)pud;
if (!pud_present(*pud))
return NULL;
pmd = pmd_offset(pud, addr);
if (sz == PMD_SIZE)
/* must be pmd huge, non-present or none */
return (pte_t *)pmd;
if (!pmd_present(*pmd))
return NULL;
for_each_napot_order(order) {
if (napot_cont_size(order) == sz) {
pte = pte_offset_kernel(pmd, addr & napot_cont_mask(order));
break;
}
}
return pte;
}
static pte_t get_clear_contig(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep,
unsigned long pte_num)
{
pte_t orig_pte = ptep_get(ptep);
unsigned long i;
for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) {
pte_t pte = ptep_get_and_clear(mm, addr, ptep);
if (pte_dirty(pte))
orig_pte = pte_mkdirty(orig_pte);
if (pte_young(pte))
orig_pte = pte_mkyoung(orig_pte);
}
return orig_pte;
}
static pte_t get_clear_contig_flush(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep,
unsigned long pte_num)
{
pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num);
struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
bool valid = !pte_none(orig_pte);
if (valid)
flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num));
return orig_pte;
}
pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
{
unsigned long order;
for_each_napot_order(order) {
if (shift == napot_cont_shift(order)) {
entry = pte_mknapot(entry, order);
break;
}
}
if (order == NAPOT_ORDER_MAX)
entry = pte_mkhuge(entry);
return entry;
}
void set_huge_pte_at(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep,
pte_t pte)
{
int i, pte_num;
if (!pte_napot(pte)) {
set_pte_at(mm, addr, ptep, pte);
return;
}
pte_num = napot_pte_num(napot_cont_order(pte));
for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE)
set_pte_at(mm, addr, ptep, pte);
}
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr,
pte_t *ptep,
pte_t pte,
int dirty)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long order;
pte_t orig_pte;
int i, pte_num;
if (!pte_napot(pte))
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
order = napot_cont_order(pte);
pte_num = napot_pte_num(order);
ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
if (pte_dirty(orig_pte))
pte = pte_mkdirty(pte);
if (pte_young(orig_pte))
pte = pte_mkyoung(pte);
for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
set_pte_at(mm, addr, ptep, pte);
return true;
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep)
{
pte_t orig_pte = ptep_get(ptep);
int pte_num;
if (!pte_napot(orig_pte))
return ptep_get_and_clear(mm, addr, ptep);
pte_num = napot_pte_num(napot_cont_order(orig_pte));
return get_clear_contig(mm, addr, ptep, pte_num);
}
void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep)
{
pte_t pte = ptep_get(ptep);
unsigned long order;
int i, pte_num;
if (!pte_napot(pte)) {
ptep_set_wrprotect(mm, addr, ptep);
return;
}
order = napot_cont_order(pte);
pte_num = napot_pte_num(order);
ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
ptep_set_wrprotect(mm, addr, ptep);
}
pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr,
pte_t *ptep)
{
pte_t pte = ptep_get(ptep);
int pte_num;
if (!pte_napot(pte))
return ptep_clear_flush(vma, addr, ptep);
pte_num = napot_pte_num(napot_cont_order(pte));
return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num);
}
void huge_pte_clear(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep,
unsigned long sz)
{
pte_t pte = READ_ONCE(*ptep);
int i, pte_num;
if (!pte_napot(pte)) {
pte_clear(mm, addr, ptep);
return;
}
pte_num = napot_pte_num(napot_cont_order(pte));
for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
pte_clear(mm, addr, ptep);
}
static __init bool is_napot_size(unsigned long size)
{
unsigned long order;
if (!has_svnapot())
return false;
for_each_napot_order(order) {
if (size == napot_cont_size(order))
return true;
}
return false;
}
static __init int napot_hugetlbpages_init(void)
{
if (has_svnapot()) {
unsigned long order;
for_each_napot_order(order)
hugetlb_add_hstate(order);
}
return 0;
}
arch_initcall(napot_hugetlbpages_init);
#else
static __init bool is_napot_size(unsigned long size)
{
return false;
}
#endif /*CONFIG_RISCV_ISA_SVNAPOT*/
int pud_huge(pud_t pud)
{
return pud_leaf(pud);
}
int pmd_huge(pmd_t pmd)
{
return pmd_leaf(pmd);
}
hugetlbfs: add arch_hugetlb_valid_size Patch series "Clean up hugetlb boot command line processing", v4. Longpeng(Mike) reported a weird message from hugetlb command line processing and proposed a solution [1]. While the proposed patch does address the specific issue, there are other related issues in command line processing. As hugetlbfs evolved, updates to command line processing have been made to meet immediate needs and not necessarily in a coordinated manner. The result is that some processing is done in arch specific code, some is done in arch independent code and coordination is problematic. Semantics can vary between architectures. The patch series does the following: - Define arch specific arch_hugetlb_valid_size routine used to validate passed huge page sizes. - Move hugepagesz= command line parsing out of arch specific code and into an arch independent routine. - Clean up command line processing to follow desired semantics and document those semantics. [1] https://lore.kernel.org/linux-mm/20200305033014.1152-1-longpeng2@huawei.com This patch (of 3): The architecture independent routine hugetlb_default_setup sets up the default huge pages size. It has no way to verify if the passed value is valid, so it accepts it and attempts to validate at a later time. This requires undocumented cooperation between the arch specific and arch independent code. For architectures that support more than one huge page size, provide a routine arch_hugetlb_valid_size to validate a huge page size. hugetlb_default_setup can use this to validate passed values. arch_hugetlb_valid_size will also be used in a subsequent patch to move processing of the "hugepagesz=" in arch specific code to a common routine in arch independent code. Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> [s390] Acked-by: Will Deacon <will@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Albert Ou <aou@eecs.berkeley.edu> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: David S. Miller <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Longpeng <longpeng2@huawei.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Mina Almasry <almasrymina@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Nitesh Narayan Lal <nitesh@redhat.com> Cc: Anders Roxell <anders.roxell@linaro.org> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> Cc: Qian Cai <cai@lca.pw> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Link: http://lkml.kernel.org/r/20200428205614.246260-1-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200428205614.246260-2-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200417185049.275845-1-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200417185049.275845-2-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-03 16:00:34 -07:00
bool __init arch_hugetlb_valid_size(unsigned long size)
{
if (size == HPAGE_SIZE)
return true;
else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE)
return true;
else if (is_napot_size(size))
return true;
hugetlbfs: add arch_hugetlb_valid_size Patch series "Clean up hugetlb boot command line processing", v4. Longpeng(Mike) reported a weird message from hugetlb command line processing and proposed a solution [1]. While the proposed patch does address the specific issue, there are other related issues in command line processing. As hugetlbfs evolved, updates to command line processing have been made to meet immediate needs and not necessarily in a coordinated manner. The result is that some processing is done in arch specific code, some is done in arch independent code and coordination is problematic. Semantics can vary between architectures. The patch series does the following: - Define arch specific arch_hugetlb_valid_size routine used to validate passed huge page sizes. - Move hugepagesz= command line parsing out of arch specific code and into an arch independent routine. - Clean up command line processing to follow desired semantics and document those semantics. [1] https://lore.kernel.org/linux-mm/20200305033014.1152-1-longpeng2@huawei.com This patch (of 3): The architecture independent routine hugetlb_default_setup sets up the default huge pages size. It has no way to verify if the passed value is valid, so it accepts it and attempts to validate at a later time. This requires undocumented cooperation between the arch specific and arch independent code. For architectures that support more than one huge page size, provide a routine arch_hugetlb_valid_size to validate a huge page size. hugetlb_default_setup can use this to validate passed values. arch_hugetlb_valid_size will also be used in a subsequent patch to move processing of the "hugepagesz=" in arch specific code to a common routine in arch independent code. Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> [s390] Acked-by: Will Deacon <will@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Albert Ou <aou@eecs.berkeley.edu> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: David S. Miller <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Longpeng <longpeng2@huawei.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Mina Almasry <almasrymina@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Nitesh Narayan Lal <nitesh@redhat.com> Cc: Anders Roxell <anders.roxell@linaro.org> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> Cc: Qian Cai <cai@lca.pw> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Link: http://lkml.kernel.org/r/20200428205614.246260-1-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200428205614.246260-2-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200417185049.275845-1-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200417185049.275845-2-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-03 16:00:34 -07:00
else
return false;
}
#ifdef CONFIG_CONTIG_ALLOC
static __init int gigantic_pages_init(void)
{
/* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */
hugetlbfs: remove hugetlb_add_hstate() warning for existing hstate hugetlb_add_hstate() prints a warning if the hstate already exists. This was originally done as part of kernel command line parsing. If 'hugepagesz=' was specified more than once, the warning pr_warn("hugepagesz= specified twice, ignoring\n"); would be printed. Some architectures want to enable all huge page sizes. They would call hugetlb_add_hstate for all supported sizes. However, this was done after command line processing and as a result hstates could have already been created for some sizes. To make sure no warning were printed, there would often be code like: if (!size_to_hstate(size) hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT) The only time we want to print the warning is as the result of command line processing. So, remove the warning from hugetlb_add_hstate and add it to the single arch independent routine processing "hugepagesz=". After this, calls to size_to_hstate() in arch specific code can be removed and hugetlb_add_hstate can be called without worrying about warning messages. [mike.kravetz@oracle.com: fix hugetlb initialization] Link: http://lkml.kernel.org/r/4c36c6ce-3774-78fa-abc4-b7346bf24348@oracle.com Link: http://lkml.kernel.org/r/20200428205614.246260-5-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Tested-by: Anders Roxell <anders.roxell@linaro.org> Acked-by: Mina Almasry <almasrymina@google.com> Acked-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> [s390] Acked-by: Will Deacon <will@kernel.org> Cc: Albert Ou <aou@eecs.berkeley.edu> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David S. Miller <davem@davemloft.net> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Longpeng <longpeng2@huawei.com> Cc: Nitesh Narayan Lal <nitesh@redhat.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Peter Xu <peterx@redhat.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> Cc: Qian Cai <cai@lca.pw> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Link: http://lkml.kernel.org/r/20200417185049.275845-4-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20200428205614.246260-4-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-03 16:00:42 -07:00
if (IS_ENABLED(CONFIG_64BIT))
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
return 0;
}
arch_initcall(gigantic_pages_init);
#endif