16 hotfixes. All except one are for MM. 10 of these are cc:stable and

the others pertain to post-6.10 issues.
 
 As usual with these merges, singletons and doubletons all over the place,
 no identifiable-by-me theme.  Please see the lovingly curated changelogs
 to get the skinny.
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZsFf8wAKCRDdBJ7gKXxA
 jvEUAP97y/sqKD8rQNc0R8fRGSPNPamwyok8RHwohb0JEHovlAD9HsQ9Ad57EpqR
 wBexMxJRFc7Dt73Tu6IkLQ1iNGqABAc=
 =8KNp
 -----END PGP SIGNATURE-----

Merge tag 'mm-hotfixes-stable-2024-08-17-19-34' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "16 hotfixes. All except one are for MM. 10 of these are cc:stable and
  the others pertain to post-6.10 issues.

  As usual with these merges, singletons and doubletons all over the
  place, no identifiable-by-me theme. Please see the lovingly curated
  changelogs to get the skinny"

* tag 'mm-hotfixes-stable-2024-08-17-19-34' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm/migrate: fix deadlock in migrate_pages_batch() on large folios
  alloc_tag: mark pages reserved during CMA activation as not tagged
  alloc_tag: introduce clear_page_tag_ref() helper function
  crash: fix riscv64 crash memory reserve dead loop
  selftests: memfd_secret: don't build memfd_secret test on unsupported arches
  mm: fix endless reclaim on machines with unaccepted memory
  selftests/mm: compaction_test: fix off by one in check_compaction()
  mm/numa: no task_numa_fault() call if PMD is changed
  mm/numa: no task_numa_fault() call if PTE is changed
  mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0
  mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu
  mm: don't account memmap per-node
  mm: add system wide stats items category
  mm: don't account memmap on failure
  mm/hugetlb: fix hugetlb vs. core-mm PT locking
  mseal: fix is_madv_discard()
This commit is contained in:
Linus Torvalds 2024-08-17 19:50:16 -07:00
commit c3f2d783a4
22 changed files with 201 additions and 182 deletions

View File

@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason)
static inline spinlock_t *huge_pte_lockptr(struct hstate *h, static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte) struct mm_struct *mm, pte_t *pte)
{ {
if (huge_page_size(h) == PMD_SIZE) const unsigned long size = huge_page_size(h);
VM_WARN_ON(size == PAGE_SIZE);
/*
* hugetlb must use the exact same PT locks as core-mm page table
* walkers would. When modifying a PTE table, hugetlb must take the
* PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
* PT lock etc.
*
* The expectation is that any hugetlb folio smaller than a PMD is
* always mapped into a single PTE table and that any hugetlb folio
* smaller than a PUD (but at least as big as a PMD) is always mapped
* into a single PMD table.
*
* If that does not hold for an architecture, then that architecture
* must disable split PT locks such that all *_lockptr() functions
* will give us the same result: the per-MM PT lock.
*
* Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where
* PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr()
* and core-mm would use pmd_lockptr(). However, in such configurations
* split PMD locks are disabled -- they don't make sense on a single
* PGDIR page table -- and the end result is the same.
*/
if (size >= PUD_SIZE)
return pud_lockptr(mm, (pud_t *) pte);
else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE))
return pmd_lockptr(mm, (pmd_t *) pte); return pmd_lockptr(mm, (pmd_t *) pte);
VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */
return &mm->page_table_lock; return ptep_lockptr(mm, pte);
} }
#ifndef hugepages_supported #ifndef hugepages_supported

View File

@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
} }
static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
{
BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
return ptlock_ptr(virt_to_ptdesc(pte));
}
static inline bool ptlock_init(struct ptdesc *ptdesc) static inline bool ptlock_init(struct ptdesc *ptdesc)
{ {
/* /*
@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{ {
return &mm->page_table_lock; return &mm->page_table_lock;
} }
static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
{
return &mm->page_table_lock;
}
static inline void ptlock_cache_init(void) {} static inline void ptlock_cache_init(void) {}
static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
static inline void ptlock_free(struct ptdesc *ptdesc) {} static inline void ptlock_free(struct ptdesc *ptdesc) {}

View File

@ -220,8 +220,6 @@ enum node_stat_item {
PGDEMOTE_KSWAPD, PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT, PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED, PGDEMOTE_KHUGEPAGED,
NR_MEMMAP, /* page metadata allocated through buddy allocator */
NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */
NR_VM_NODE_STAT_ITEMS NR_VM_NODE_STAT_ITEMS
}; };

View File

@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref)
page_ext_put(page_ext_from_codetag_ref(ref)); page_ext_put(page_ext_from_codetag_ref(ref));
} }
static inline void clear_page_tag_ref(struct page *page)
{
if (mem_alloc_profiling_enabled()) {
union codetag_ref *ref = get_page_tag_ref(page);
if (ref) {
set_codetag_empty(ref);
put_page_tag_ref(ref);
}
}
}
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
unsigned int nr) unsigned int nr)
{ {
@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; }
static inline void put_page_tag_ref(union codetag_ref *ref) {} static inline void put_page_tag_ref(union codetag_ref *ref) {}
static inline void clear_page_tag_ref(struct page *page) {}
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
unsigned int nr) {} unsigned int nr) {}
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}

View File

@ -34,10 +34,13 @@ struct reclaim_stat {
unsigned nr_lazyfree_fail; unsigned nr_lazyfree_fail;
}; };
enum writeback_stat_item { /* Stat data for system wide items */
enum vm_stat_item {
NR_DIRTY_THRESHOLD, NR_DIRTY_THRESHOLD,
NR_DIRTY_BG_THRESHOLD, NR_DIRTY_BG_THRESHOLD,
NR_VM_WRITEBACK_STAT_ITEMS, NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */
NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */
NR_VM_STAT_ITEMS,
}; };
#ifdef CONFIG_VM_EVENT_COUNTERS #ifdef CONFIG_VM_EVENT_COUNTERS
@ -514,21 +517,13 @@ static inline const char *lru_list_name(enum lru_list lru)
return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
} }
static inline const char *writeback_stat_name(enum writeback_stat_item item)
{
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
NR_VM_NUMA_EVENT_ITEMS +
NR_VM_NODE_STAT_ITEMS +
item];
}
#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
static inline const char *vm_event_name(enum vm_event_item item) static inline const char *vm_event_name(enum vm_event_item item)
{ {
return vmstat_text[NR_VM_ZONE_STAT_ITEMS + return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
NR_VM_NUMA_EVENT_ITEMS + NR_VM_NUMA_EVENT_ITEMS +
NR_VM_NODE_STAT_ITEMS + NR_VM_NODE_STAT_ITEMS +
NR_VM_WRITEBACK_STAT_ITEMS + NR_VM_STAT_ITEMS +
item]; item];
} }
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
@ -625,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
} }
void __meminit mod_node_early_perpage_metadata(int nid, long delta); void memmap_boot_pages_add(long delta);
void __meminit store_early_perpage_metadata(void); void memmap_pages_add(long delta);
#endif /* _LINUX_VMSTAT_H */ #endif /* _LINUX_VMSTAT_H */

View File

@ -423,7 +423,8 @@ retry:
if (high && search_end == CRASH_ADDR_HIGH_MAX) { if (high && search_end == CRASH_ADDR_HIGH_MAX) {
search_end = CRASH_ADDR_LOW_MAX; search_end = CRASH_ADDR_LOW_MAX;
search_base = 0; search_base = 0;
goto retry; if (search_end != CRASH_ADDR_HIGH_MAX)
goto retry;
} }
pr_warn("cannot allocate crashkernel (size:0x%llx)\n", pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
crash_size); crash_size);

View File

@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
spin_unlock(vmf->ptl); spin_unlock(vmf->ptl);
goto out; return 0;
} }
pmd = pmd_modify(oldpmd, vma->vm_page_prot); pmd = pmd_modify(oldpmd, vma->vm_page_prot);
@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
if (!migrate_misplaced_folio(folio, vma, target_nid)) { if (!migrate_misplaced_folio(folio, vma, target_nid)) {
flags |= TNF_MIGRATED; flags |= TNF_MIGRATED;
nid = target_nid; nid = target_nid;
} else { task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
flags |= TNF_MIGRATE_FAIL; return 0;
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
spin_unlock(vmf->ptl);
goto out;
}
goto out_map;
} }
out: flags |= TNF_MIGRATE_FAIL;
if (nid != NUMA_NO_NODE) vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
spin_unlock(vmf->ptl);
return 0; return 0;
}
out_map: out_map:
/* Restore the PMD */ /* Restore the PMD */
pmd = pmd_modify(oldpmd, vma->vm_page_prot); pmd = pmd_modify(oldpmd, vma->vm_page_prot);
@ -1753,7 +1747,10 @@ out_map:
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
spin_unlock(vmf->ptl); spin_unlock(vmf->ptl);
goto out;
if (nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
return 0;
} }
/* /*

View File

@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
static inline void free_vmemmap_page(struct page *page) static inline void free_vmemmap_page(struct page *page)
{ {
if (PageReserved(page)) { if (PageReserved(page)) {
memmap_boot_pages_add(-1);
free_bootmem_page(page); free_bootmem_page(page);
mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1);
} else { } else {
memmap_pages_add(-1);
__free_page(page); __free_page(page);
mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1);
} }
} }
@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
copy_page(page_to_virt(walk.reuse_page), copy_page(page_to_virt(walk.reuse_page),
(void *)walk.reuse_addr); (void *)walk.reuse_addr);
list_add(&walk.reuse_page->lru, vmemmap_pages); list_add(&walk.reuse_page->lru, vmemmap_pages);
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1); memmap_pages_add(1);
} }
/* /*
@ -392,14 +392,11 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
page = alloc_pages_node(nid, gfp_mask, 0); page = alloc_pages_node(nid, gfp_mask, 0);
if (!page) { if (!page)
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i);
goto out; goto out;
}
list_add(&page->lru, list); list_add(&page->lru, list);
} }
memmap_pages_add(nr_pages);
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages);
return 0; return 0;
out: out:

View File

@ -2417,7 +2417,7 @@ struct memory_failure_entry {
struct memory_failure_cpu { struct memory_failure_cpu {
DECLARE_KFIFO(fifo, struct memory_failure_entry, DECLARE_KFIFO(fifo, struct memory_failure_entry,
MEMORY_FAILURE_FIFO_SIZE); MEMORY_FAILURE_FIFO_SIZE);
spinlock_t lock; raw_spinlock_t lock;
struct work_struct work; struct work_struct work;
}; };
@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags)
{ {
struct memory_failure_cpu *mf_cpu; struct memory_failure_cpu *mf_cpu;
unsigned long proc_flags; unsigned long proc_flags;
bool buffer_overflow;
struct memory_failure_entry entry = { struct memory_failure_entry entry = {
.pfn = pfn, .pfn = pfn,
.flags = flags, .flags = flags,
}; };
mf_cpu = &get_cpu_var(memory_failure_cpu); mf_cpu = &get_cpu_var(memory_failure_cpu);
spin_lock_irqsave(&mf_cpu->lock, proc_flags); raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
if (kfifo_put(&mf_cpu->fifo, entry)) buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry);
if (!buffer_overflow)
schedule_work_on(smp_processor_id(), &mf_cpu->work); schedule_work_on(smp_processor_id(), &mf_cpu->work);
else raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
if (buffer_overflow)
pr_err("buffer overflow when queuing memory failure at %#lx\n", pr_err("buffer overflow when queuing memory failure at %#lx\n",
pfn); pfn);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
} }
EXPORT_SYMBOL_GPL(memory_failure_queue); EXPORT_SYMBOL_GPL(memory_failure_queue);
@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work)
mf_cpu = container_of(work, struct memory_failure_cpu, work); mf_cpu = container_of(work, struct memory_failure_cpu, work);
for (;;) { for (;;) {
spin_lock_irqsave(&mf_cpu->lock, proc_flags); raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
gotten = kfifo_get(&mf_cpu->fifo, &entry); gotten = kfifo_get(&mf_cpu->fifo, &entry);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten) if (!gotten)
break; break;
if (entry.flags & MF_SOFT_OFFLINE) if (entry.flags & MF_SOFT_OFFLINE)
@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void)
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
mf_cpu = &per_cpu(memory_failure_cpu, cpu); mf_cpu = &per_cpu(memory_failure_cpu, cpu);
spin_lock_init(&mf_cpu->lock); raw_spin_lock_init(&mf_cpu->lock);
INIT_KFIFO(mf_cpu->fifo); INIT_KFIFO(mf_cpu->fifo);
INIT_WORK(&mf_cpu->work, memory_failure_work_func); INIT_WORK(&mf_cpu->work, memory_failure_work_func);
} }

View File

@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out; return 0;
} }
pte = pte_modify(old_pte, vma->vm_page_prot); pte = pte_modify(old_pte, vma->vm_page_prot);
@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
if (!migrate_misplaced_folio(folio, vma, target_nid)) { if (!migrate_misplaced_folio(folio, vma, target_nid)) {
nid = target_nid; nid = target_nid;
flags |= TNF_MIGRATED; flags |= TNF_MIGRATED;
} else { task_numa_fault(last_cpupid, nid, nr_pages, flags);
flags |= TNF_MIGRATE_FAIL; return 0;
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
if (unlikely(!vmf->pte))
goto out;
if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
}
goto out_map;
} }
out: flags |= TNF_MIGRATE_FAIL;
if (nid != NUMA_NO_NODE) vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
task_numa_fault(last_cpupid, nid, nr_pages, flags); vmf->address, &vmf->ptl);
return 0; if (unlikely(!vmf->pte))
return 0;
if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0;
}
out_map: out_map:
/* /*
* Make it present again, depending on how arch implements * Make it present again, depending on how arch implements
@ -5387,7 +5383,10 @@ out_map:
numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
writable); writable);
pte_unmap_unlock(vmf->pte, vmf->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
if (nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, nid, nr_pages, flags);
return 0;
} }
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)

View File

@ -1479,11 +1479,17 @@ out:
return rc; return rc;
} }
static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) static inline int try_split_folio(struct folio *folio, struct list_head *split_folios,
enum migrate_mode mode)
{ {
int rc; int rc;
folio_lock(folio); if (mode == MIGRATE_ASYNC) {
if (!folio_trylock(folio))
return -EAGAIN;
} else {
folio_lock(folio);
}
rc = split_folio_to_list(folio, split_folios); rc = split_folio_to_list(folio, split_folios);
folio_unlock(folio); folio_unlock(folio);
if (!rc) if (!rc)
@ -1677,7 +1683,7 @@ static int migrate_pages_batch(struct list_head *from,
*/ */
if (nr_pages > 2 && if (nr_pages > 2 &&
!list_empty(&folio->_deferred_list)) { !list_empty(&folio->_deferred_list)) {
if (try_split_folio(folio, split_folios) == 0) { if (!try_split_folio(folio, split_folios, mode)) {
nr_failed++; nr_failed++;
stats->nr_thp_failed += is_thp; stats->nr_thp_failed += is_thp;
stats->nr_thp_split += is_thp; stats->nr_thp_split += is_thp;
@ -1699,7 +1705,7 @@ static int migrate_pages_batch(struct list_head *from,
if (!thp_migration_supported() && is_thp) { if (!thp_migration_supported() && is_thp) {
nr_failed++; nr_failed++;
stats->nr_thp_failed++; stats->nr_thp_failed++;
if (!try_split_folio(folio, split_folios)) { if (!try_split_folio(folio, split_folios, mode)) {
stats->nr_thp_split++; stats->nr_thp_split++;
stats->nr_split++; stats->nr_split++;
continue; continue;
@ -1731,7 +1737,7 @@ static int migrate_pages_batch(struct list_head *from,
stats->nr_thp_failed += is_thp; stats->nr_thp_failed += is_thp;
/* Large folio NUMA faulting doesn't split to retry. */ /* Large folio NUMA faulting doesn't split to retry. */
if (is_large && !nosplit) { if (is_large && !nosplit) {
int ret = try_split_folio(folio, split_folios); int ret = try_split_folio(folio, split_folios, mode);
if (!ret) { if (!ret) {
stats->nr_thp_split += is_thp; stats->nr_thp_split += is_thp;

View File

@ -1623,8 +1623,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
panic("Failed to allocate %ld bytes for node %d memory map\n", panic("Failed to allocate %ld bytes for node %d memory map\n",
size, pgdat->node_id); size, pgdat->node_id);
pgdat->node_mem_map = map + offset; pgdat->node_mem_map = map + offset;
mod_node_early_perpage_metadata(pgdat->node_id, memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
DIV_ROUND_UP(size, PAGE_SIZE));
pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
__func__, pgdat->node_id, (unsigned long)pgdat, __func__, pgdat->node_id, (unsigned long)pgdat,
(unsigned long)pgdat->node_mem_map); (unsigned long)pgdat->node_mem_map);
@ -2245,6 +2244,8 @@ void __init init_cma_reserved_pageblock(struct page *page)
set_pageblock_migratetype(page, MIGRATE_CMA); set_pageblock_migratetype(page, MIGRATE_CMA);
set_page_refcounted(page); set_page_refcounted(page);
/* pages were reserved and not allocated */
clear_page_tag_ref(page);
__free_pages(page, pageblock_order); __free_pages(page, pageblock_order);
adjust_managed_page_count(page, pageblock_nr_pages); adjust_managed_page_count(page, pageblock_nr_pages);
@ -2460,15 +2461,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
} }
/* pages were reserved and not allocated */ /* pages were reserved and not allocated */
if (mem_alloc_profiling_enabled()) { clear_page_tag_ref(page);
union codetag_ref *ref = get_page_tag_ref(page);
if (ref) {
set_codetag_empty(ref);
put_page_tag_ref(ref);
}
}
__free_pages_core(page, order, MEMINIT_EARLY); __free_pages_core(page, order, MEMINIT_EARLY);
} }

View File

@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma)
static bool is_madv_discard(int behavior) static bool is_madv_discard(int behavior)
{ {
return behavior & switch (behavior) {
(MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED | case MADV_FREE:
MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK); case MADV_DONTNEED:
case MADV_DONTNEED_LOCKED:
case MADV_REMOVE:
case MADV_DONTFORK:
case MADV_WIPEONFORK:
return true;
}
return false;
} }
static bool is_ro_anon(struct vm_area_struct *vma) static bool is_ro_anon(struct vm_area_struct *vma)

View File

@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes);
static bool page_contains_unaccepted(struct page *page, unsigned int order); static bool page_contains_unaccepted(struct page *page, unsigned int order);
static void accept_page(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order);
static bool try_to_accept_memory(struct zone *zone, unsigned int order); static bool cond_accept_memory(struct zone *zone, unsigned int order);
static inline bool has_unaccepted_memory(void); static inline bool has_unaccepted_memory(void);
static bool __free_unaccepted(struct page *page); static bool __free_unaccepted(struct page *page);
@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
if (!(alloc_flags & ALLOC_CMA)) if (!(alloc_flags & ALLOC_CMA))
unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
#endif #endif
#ifdef CONFIG_UNACCEPTED_MEMORY
unusable_free += zone_page_state(z, NR_UNACCEPTED);
#endif
return unusable_free; return unusable_free;
} }
@ -3368,6 +3365,8 @@ retry:
} }
} }
cond_accept_memory(zone, order);
/* /*
* Detect whether the number of free pages is below high * Detect whether the number of free pages is below high
* watermark. If so, we will decrease pcp->high and free * watermark. If so, we will decrease pcp->high and free
@ -3393,10 +3392,8 @@ check_alloc_wmark:
gfp_mask)) { gfp_mask)) {
int ret; int ret;
if (has_unaccepted_memory()) { if (cond_accept_memory(zone, order))
if (try_to_accept_memory(zone, order)) goto try_this_zone;
goto try_this_zone;
}
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/* /*
@ -3450,10 +3447,8 @@ try_this_zone:
return page; return page;
} else { } else {
if (has_unaccepted_memory()) { if (cond_accept_memory(zone, order))
if (try_to_accept_memory(zone, order)) goto try_this_zone;
goto try_this_zone;
}
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/* Try again if zone has deferred pages */ /* Try again if zone has deferred pages */
@ -5755,7 +5750,6 @@ void __init setup_per_cpu_pageset(void)
for_each_online_pgdat(pgdat) for_each_online_pgdat(pgdat)
pgdat->per_cpu_nodestats = pgdat->per_cpu_nodestats =
alloc_percpu(struct per_cpu_nodestat); alloc_percpu(struct per_cpu_nodestat);
store_early_perpage_metadata();
} }
__meminit void zone_pcp_init(struct zone *zone) __meminit void zone_pcp_init(struct zone *zone)
@ -5821,14 +5815,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
void free_reserved_page(struct page *page) void free_reserved_page(struct page *page)
{ {
if (mem_alloc_profiling_enabled()) { clear_page_tag_ref(page);
union codetag_ref *ref = get_page_tag_ref(page);
if (ref) {
set_codetag_empty(ref);
put_page_tag_ref(ref);
}
}
ClearPageReserved(page); ClearPageReserved(page);
init_page_count(page); init_page_count(page);
__free_page(page); __free_page(page);
@ -6951,9 +6938,6 @@ static bool try_to_accept_memory_one(struct zone *zone)
struct page *page; struct page *page;
bool last; bool last;
if (list_empty(&zone->unaccepted_pages))
return false;
spin_lock_irqsave(&zone->lock, flags); spin_lock_irqsave(&zone->lock, flags);
page = list_first_entry_or_null(&zone->unaccepted_pages, page = list_first_entry_or_null(&zone->unaccepted_pages,
struct page, lru); struct page, lru);
@ -6979,23 +6963,29 @@ static bool try_to_accept_memory_one(struct zone *zone)
return true; return true;
} }
static bool try_to_accept_memory(struct zone *zone, unsigned int order) static bool cond_accept_memory(struct zone *zone, unsigned int order)
{ {
long to_accept; long to_accept;
int ret = false; bool ret = false;
if (!has_unaccepted_memory())
return false;
if (list_empty(&zone->unaccepted_pages))
return false;
/* How much to accept to get to high watermark? */ /* How much to accept to get to high watermark? */
to_accept = high_wmark_pages(zone) - to_accept = high_wmark_pages(zone) -
(zone_page_state(zone, NR_FREE_PAGES) - (zone_page_state(zone, NR_FREE_PAGES) -
__zone_watermark_unusable_free(zone, order, 0)); __zone_watermark_unusable_free(zone, order, 0) -
zone_page_state(zone, NR_UNACCEPTED));
/* Accept at least one page */ while (to_accept > 0) {
do {
if (!try_to_accept_memory_one(zone)) if (!try_to_accept_memory_one(zone))
break; break;
ret = true; ret = true;
to_accept -= MAX_ORDER_NR_PAGES; to_accept -= MAX_ORDER_NR_PAGES;
} while (to_accept > 0); }
return ret; return ret;
} }
@ -7038,7 +7028,7 @@ static void accept_page(struct page *page, unsigned int order)
{ {
} }
static bool try_to_accept_memory(struct zone *zone, unsigned int order) static bool cond_accept_memory(struct zone *zone, unsigned int order)
{ {
return false; return false;
} }

View File

@ -214,8 +214,7 @@ static int __init alloc_node_page_ext(int nid)
return -ENOMEM; return -ENOMEM;
NODE_DATA(nid)->node_page_ext = base; NODE_DATA(nid)->node_page_ext = base;
total_usage += table_size; total_usage += table_size;
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE));
DIV_ROUND_UP(table_size, PAGE_SIZE));
return 0; return 0;
} }
@ -275,10 +274,8 @@ static void *__meminit alloc_page_ext(size_t size, int nid)
else else
addr = vzalloc_node(size, nid); addr = vzalloc_node(size, nid);
if (addr) { if (addr)
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
DIV_ROUND_UP(size, PAGE_SIZE));
}
return addr; return addr;
} }
@ -323,25 +320,18 @@ static void free_page_ext(void *addr)
{ {
size_t table_size; size_t table_size;
struct page *page; struct page *page;
struct pglist_data *pgdat;
table_size = page_ext_size * PAGES_PER_SECTION; table_size = page_ext_size * PAGES_PER_SECTION;
memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE)));
if (is_vmalloc_addr(addr)) { if (is_vmalloc_addr(addr)) {
page = vmalloc_to_page(addr);
pgdat = page_pgdat(page);
vfree(addr); vfree(addr);
} else { } else {
page = virt_to_page(addr); page = virt_to_page(addr);
pgdat = page_pgdat(page);
BUG_ON(PageReserved(page)); BUG_ON(PageReserved(page));
kmemleak_free(addr); kmemleak_free(addr);
free_pages_exact(addr, table_size); free_pages_exact(addr, table_size);
} }
mod_node_page_state(pgdat, NR_MEMMAP,
-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE)));
} }
static void __free_page_ext(unsigned long pfn) static void __free_page_ext(unsigned long pfn)

View File

@ -469,13 +469,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,
if (r < 0) if (r < 0)
return NULL; return NULL;
if (system_state == SYSTEM_BOOTING) { if (system_state == SYSTEM_BOOTING)
mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(end - start, memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
PAGE_SIZE)); else
} else { memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP,
DIV_ROUND_UP(end - start, PAGE_SIZE));
}
return pfn_to_page(pfn); return pfn_to_page(pfn);
} }

View File

@ -463,7 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
sparsemap_buf_end = sparsemap_buf + size; sparsemap_buf_end = sparsemap_buf + size;
#ifndef CONFIG_SPARSEMEM_VMEMMAP #ifndef CONFIG_SPARSEMEM_VMEMMAP
mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(size, PAGE_SIZE)); memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
#endif #endif
} }
@ -643,8 +643,7 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long start = (unsigned long) pfn_to_page(pfn);
unsigned long end = start + nr_pages * sizeof(struct page); unsigned long end = start + nr_pages * sizeof(struct page);
mod_node_page_state(page_pgdat(pfn_to_page(pfn)), NR_MEMMAP, memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE)));
-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE)));
vmemmap_free(start, end, altmap); vmemmap_free(start, end, altmap);
} }
static void free_map_bootmem(struct page *memmap) static void free_map_bootmem(struct page *memmap)

View File

@ -3584,15 +3584,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
page = alloc_pages_noprof(alloc_gfp, order); page = alloc_pages_noprof(alloc_gfp, order);
else else
page = alloc_pages_node_noprof(nid, alloc_gfp, order); page = alloc_pages_node_noprof(nid, alloc_gfp, order);
if (unlikely(!page)) { if (unlikely(!page))
if (!nofail) break;
break;
/* fall back to the zero order allocations */
alloc_gfp |= __GFP_NOFAIL;
order = 0;
continue;
}
/* /*
* Higher order allocations must be able to be treated as * Higher order allocations must be able to be treated as

View File

@ -1033,6 +1033,24 @@ unsigned long node_page_state(struct pglist_data *pgdat,
} }
#endif #endif
/*
* Count number of pages "struct page" and "struct page_ext" consume.
* nr_memmap_boot_pages: # of pages allocated by boot allocator
* nr_memmap_pages: # of pages that were allocated by buddy allocator
*/
static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0);
static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0);
void memmap_boot_pages_add(long delta)
{
atomic_long_add(delta, &nr_memmap_boot_pages);
}
void memmap_pages_add(long delta)
{
atomic_long_add(delta, &nr_memmap_pages);
}
#ifdef CONFIG_COMPACTION #ifdef CONFIG_COMPACTION
struct contig_page_info { struct contig_page_info {
@ -1255,11 +1273,11 @@ const char * const vmstat_text[] = {
"pgdemote_kswapd", "pgdemote_kswapd",
"pgdemote_direct", "pgdemote_direct",
"pgdemote_khugepaged", "pgdemote_khugepaged",
"nr_memmap", /* system-wide enum vm_stat_item counters */
"nr_memmap_boot",
/* enum writeback_stat_item counters */
"nr_dirty_threshold", "nr_dirty_threshold",
"nr_dirty_background_threshold", "nr_dirty_background_threshold",
"nr_memmap_pages",
"nr_memmap_boot_pages",
#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
/* enum vm_event_item counters */ /* enum vm_event_item counters */
@ -1790,7 +1808,7 @@ static const struct seq_operations zoneinfo_op = {
#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
NR_VM_NUMA_EVENT_ITEMS + \ NR_VM_NUMA_EVENT_ITEMS + \
NR_VM_NODE_STAT_ITEMS + \ NR_VM_NODE_STAT_ITEMS + \
NR_VM_WRITEBACK_STAT_ITEMS + \ NR_VM_STAT_ITEMS + \
(IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
NR_VM_EVENT_ITEMS : 0)) NR_VM_EVENT_ITEMS : 0))
@ -1827,7 +1845,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
v + NR_DIRTY_THRESHOLD); v + NR_DIRTY_THRESHOLD);
v += NR_VM_WRITEBACK_STAT_ITEMS; v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages);
v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages);
v += NR_VM_STAT_ITEMS;
#ifdef CONFIG_VM_EVENT_COUNTERS #ifdef CONFIG_VM_EVENT_COUNTERS
all_vm_events(v); all_vm_events(v);
@ -2285,25 +2305,3 @@ static int __init extfrag_debug_init(void)
module_init(extfrag_debug_init); module_init(extfrag_debug_init);
#endif #endif
/*
* Page metadata size (struct page and page_ext) in pages
*/
static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata;
void __meminit mod_node_early_perpage_metadata(int nid, long delta)
{
early_perpage_metadata[nid] += delta;
}
void __meminit store_early_perpage_metadata(void)
{
int nid;
struct pglist_data *pgdat;
for_each_online_pgdat(pgdat) {
nid = pgdat->node_id;
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT,
early_perpage_metadata[nid]);
}
}

View File

@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate
TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_hugetlb
TEST_GEN_FILES += map_populate TEST_GEN_FILES += map_populate
ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64))
TEST_GEN_FILES += memfd_secret TEST_GEN_FILES += memfd_secret
endif
TEST_GEN_FILES += migration TEST_GEN_FILES += migration
TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mkdirty
TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock-random-test

View File

@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
int fd, ret = -1; int fd, ret = -1;
int compaction_index = 0; int compaction_index = 0;
char nr_hugepages[20] = {0}; char nr_hugepages[20] = {0};
char init_nr_hugepages[20] = {0}; char init_nr_hugepages[24] = {0};
sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages); snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
"%lu", initial_nr_hugepages);
/* We want to test with 80% of available memory. Else, OOM killer comes /* We want to test with 80% of available memory. Else, OOM killer comes
in to play */ in to play */

View File

@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
CATEGORY="madv_populate" run_test ./madv_populate CATEGORY="madv_populate" run_test ./madv_populate
if [ -x ./memfd_secret ]
then
(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
CATEGORY="memfd_secret" run_test ./memfd_secret CATEGORY="memfd_secret" run_test ./memfd_secret
fi
# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
CATEGORY="ksm" run_test ./ksm_tests -H -s 100 CATEGORY="ksm" run_test ./ksm_tests -H -s 100