mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-17 18:56:24 +00:00
mm: make folio_pte_batch available outside of mm/memory.c
madvise, mprotect and some others might need folio_pte_batch to check if a range of PTEs are completely mapped to a large folio with contiguous physical addresses. Let's make it available in mm/internal.h. While at it, add proper kernel doc and sanity-check more input parameters using two additional VM_WARN_ON_FOLIO(). [21cnbao@gmail.com: build fix] Link: https://lkml.kernel.org/r/CAGsJ_4wWzG-37D82vqP_zt+Fcbz+URVe5oXLBc4M5wbN8A_gpQ@mail.gmail.com [david@redhat.com: improve the doc for the exported func] Link: https://lkml.kernel.org/r/20240227104201.337988-1-21cnbao@gmail.com Signed-off-by: David Hildenbrand <david@redhat.com> Signed-off-by: Barry Song <v-songbaohua@oppo.com> Suggested-by: David Hildenbrand <david@redhat.com> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com> Acked-by: David Hildenbrand <david@redhat.com> Cc: Lance Yang <ioworker0@gmail.com> Cc: Yin Fengwei <fengwei.yin@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
9164448d31
commit
ac96cc4d1c
@ -83,6 +83,99 @@ static inline void *folio_raw_mapping(struct folio *folio)
|
||||
return (void *)(mapping & ~PAGE_MAPPING_FLAGS);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
||||
/* Flags for folio_pte_batch(). */
|
||||
typedef int __bitwise fpb_t;
|
||||
|
||||
/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */
|
||||
#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0))
|
||||
|
||||
/* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */
|
||||
#define FPB_IGNORE_SOFT_DIRTY ((__force fpb_t)BIT(1))
|
||||
|
||||
static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
|
||||
{
|
||||
if (flags & FPB_IGNORE_DIRTY)
|
||||
pte = pte_mkclean(pte);
|
||||
if (likely(flags & FPB_IGNORE_SOFT_DIRTY))
|
||||
pte = pte_clear_soft_dirty(pte);
|
||||
return pte_wrprotect(pte_mkold(pte));
|
||||
}
|
||||
|
||||
/**
|
||||
* folio_pte_batch - detect a PTE batch for a large folio
|
||||
* @folio: The large folio to detect a PTE batch for.
|
||||
* @addr: The user virtual address the first page is mapped at.
|
||||
* @start_ptep: Page table pointer for the first entry.
|
||||
* @pte: Page table entry for the first page.
|
||||
* @max_nr: The maximum number of table entries to consider.
|
||||
* @flags: Flags to modify the PTE batch semantics.
|
||||
* @any_writable: Optional pointer to indicate whether any entry except the
|
||||
* first one is writable.
|
||||
*
|
||||
* Detect a PTE batch: consecutive (present) PTEs that map consecutive
|
||||
* pages of the same large folio.
|
||||
*
|
||||
* All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN,
|
||||
* the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and
|
||||
* soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY).
|
||||
*
|
||||
* start_ptep must map any page of the folio. max_nr must be at least one and
|
||||
* must be limited by the caller so scanning cannot exceed a single page table.
|
||||
*
|
||||
* Return: the number of table entries in the batch.
|
||||
*/
|
||||
static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
|
||||
pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
|
||||
bool *any_writable)
|
||||
{
|
||||
unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
|
||||
const pte_t *end_ptep = start_ptep + max_nr;
|
||||
pte_t expected_pte, *ptep;
|
||||
bool writable;
|
||||
int nr;
|
||||
|
||||
if (any_writable)
|
||||
*any_writable = false;
|
||||
|
||||
VM_WARN_ON_FOLIO(!pte_present(pte), folio);
|
||||
VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
|
||||
VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio);
|
||||
|
||||
nr = pte_batch_hint(start_ptep, pte);
|
||||
expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags);
|
||||
ptep = start_ptep + nr;
|
||||
|
||||
while (ptep < end_ptep) {
|
||||
pte = ptep_get(ptep);
|
||||
if (any_writable)
|
||||
writable = !!pte_write(pte);
|
||||
pte = __pte_batch_clear_ignored(pte, flags);
|
||||
|
||||
if (!pte_same(pte, expected_pte))
|
||||
break;
|
||||
|
||||
/*
|
||||
* Stop immediately once we reached the end of the folio. In
|
||||
* corner cases the next PFN might fall into a different
|
||||
* folio.
|
||||
*/
|
||||
if (pte_pfn(pte) >= folio_end_pfn)
|
||||
break;
|
||||
|
||||
if (any_writable)
|
||||
*any_writable |= writable;
|
||||
|
||||
nr = pte_batch_hint(ptep, pte);
|
||||
expected_pte = pte_advance_pfn(expected_pte, nr);
|
||||
ptep += nr;
|
||||
}
|
||||
|
||||
return min(ptep - start_ptep, max_nr);
|
||||
}
|
||||
#endif /* CONFIG_MMU */
|
||||
|
||||
void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio,
|
||||
int nr_throttled);
|
||||
static inline void acct_reclaim_writeback(struct folio *folio)
|
||||
|
76
mm/memory.c
76
mm/memory.c
@ -953,82 +953,6 @@ static __always_inline void __copy_present_ptes(struct vm_area_struct *dst_vma,
|
||||
set_ptes(dst_vma->vm_mm, addr, dst_pte, pte, nr);
|
||||
}
|
||||
|
||||
/* Flags for folio_pte_batch(). */
|
||||
typedef int __bitwise fpb_t;
|
||||
|
||||
/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */
|
||||
#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0))
|
||||
|
||||
/* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */
|
||||
#define FPB_IGNORE_SOFT_DIRTY ((__force fpb_t)BIT(1))
|
||||
|
||||
static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
|
||||
{
|
||||
if (flags & FPB_IGNORE_DIRTY)
|
||||
pte = pte_mkclean(pte);
|
||||
if (likely(flags & FPB_IGNORE_SOFT_DIRTY))
|
||||
pte = pte_clear_soft_dirty(pte);
|
||||
return pte_wrprotect(pte_mkold(pte));
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect a PTE batch: consecutive (present) PTEs that map consecutive
|
||||
* pages of the same folio.
|
||||
*
|
||||
* All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN,
|
||||
* the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and
|
||||
* soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY).
|
||||
*
|
||||
* If "any_writable" is set, it will indicate if any other PTE besides the
|
||||
* first (given) PTE is writable.
|
||||
*/
|
||||
static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
|
||||
pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
|
||||
bool *any_writable)
|
||||
{
|
||||
unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
|
||||
const pte_t *end_ptep = start_ptep + max_nr;
|
||||
pte_t expected_pte, *ptep;
|
||||
bool writable;
|
||||
int nr;
|
||||
|
||||
if (any_writable)
|
||||
*any_writable = false;
|
||||
|
||||
VM_WARN_ON_FOLIO(!pte_present(pte), folio);
|
||||
|
||||
nr = pte_batch_hint(start_ptep, pte);
|
||||
expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags);
|
||||
ptep = start_ptep + nr;
|
||||
|
||||
while (ptep < end_ptep) {
|
||||
pte = ptep_get(ptep);
|
||||
if (any_writable)
|
||||
writable = !!pte_write(pte);
|
||||
pte = __pte_batch_clear_ignored(pte, flags);
|
||||
|
||||
if (!pte_same(pte, expected_pte))
|
||||
break;
|
||||
|
||||
/*
|
||||
* Stop immediately once we reached the end of the folio. In
|
||||
* corner cases the next PFN might fall into a different
|
||||
* folio.
|
||||
*/
|
||||
if (pte_pfn(pte) >= folio_end_pfn)
|
||||
break;
|
||||
|
||||
if (any_writable)
|
||||
*any_writable |= writable;
|
||||
|
||||
nr = pte_batch_hint(ptep, pte);
|
||||
expected_pte = pte_advance_pfn(expected_pte, nr);
|
||||
ptep += nr;
|
||||
}
|
||||
|
||||
return min(ptep - start_ptep, max_nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy one present PTE, trying to batch-process subsequent PTEs that map
|
||||
* consecutive pages of the same folio by copying them as well.
|
||||
|
Loading…
x
Reference in New Issue
Block a user