mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-07 21:53:44 +00:00
d4af56c5c7
Start tracking the VMAs with the new maple tree structure in parallel with the rb_tree. Add debug and trace events for maple tree operations and duplicate the rb_tree that is created on forks into the maple tree. The maple tree is added to the mm_struct including the mm_init struct, added support in required mm/mmap functions, added tracking in kernel/fork for process forking, and used to find the unmapped_area and checked against what the rbtree finds. This also moves the mmap_lock() in exit_mmap() since the oom reaper call does walk the VMAs. Otherwise lockdep will be unhappy if oom happens. When splitting a vma fails due to allocations of the maple tree nodes, the error path in __split_vma() calls new->vm_ops->close(new). The page accounting for hugetlb is actually in the close() operation, so it accounts for the removal of 1/2 of the VMA which was not adjusted. This results in a negative exit value. To avoid the negative charge, set vm_start = vm_end and vm_pgoff = 0. There is also a potential accounting issue in special mappings from insert_vm_struct() failing to allocate, so reverse the charge there in the failure scenario. Link: https://lkml.kernel.org/r/20220906194824.2110408-9-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Tested-by: Yu Zhao <yuzhao@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: SeongJae Park <sj@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
58 lines
1.7 KiB
C
58 lines
1.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/mm_types.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/maple_tree.h>
|
|
#include <linux/rwsem.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/list.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/pgtable.h>
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/user_namespace.h>
|
|
#include <linux/ioasid.h>
|
|
#include <asm/mmu.h>
|
|
|
|
#ifndef INIT_MM_CONTEXT
|
|
#define INIT_MM_CONTEXT(name)
|
|
#endif
|
|
|
|
/*
|
|
* For dynamically allocated mm_structs, there is a dynamically sized cpumask
|
|
* at the end of the structure, the size of which depends on the maximum CPU
|
|
* number the system can see. That way we allocate only as much memory for
|
|
* mm_cpumask() as needed for the hundreds, or thousands of processes that
|
|
* a system typically runs.
|
|
*
|
|
* Since there is only one init_mm in the entire system, keep it simple
|
|
* and size this cpu_bitmask to NR_CPUS.
|
|
*/
|
|
struct mm_struct init_mm = {
|
|
.mm_rb = RB_ROOT,
|
|
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, init_mm.mmap_lock),
|
|
.pgd = swapper_pg_dir,
|
|
.mm_users = ATOMIC_INIT(2),
|
|
.mm_count = ATOMIC_INIT(1),
|
|
.write_protect_seq = SEQCNT_ZERO(init_mm.write_protect_seq),
|
|
MMAP_LOCK_INITIALIZER(init_mm)
|
|
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
|
|
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
|
|
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
|
.user_ns = &init_user_ns,
|
|
.cpu_bitmap = CPU_BITS_NONE,
|
|
#ifdef CONFIG_IOMMU_SVA
|
|
.pasid = INVALID_IOASID,
|
|
#endif
|
|
INIT_MM_CONTEXT(init_mm)
|
|
};
|
|
|
|
void setup_initial_init_mm(void *start_code, void *end_code,
|
|
void *end_data, void *brk)
|
|
{
|
|
init_mm.start_code = (unsigned long)start_code;
|
|
init_mm.end_code = (unsigned long)end_code;
|
|
init_mm.end_data = (unsigned long)end_data;
|
|
init_mm.brk = (unsigned long)brk;
|
|
}
|