mm/vma: move brk() internals to mm/vma.c

Patch series "mm/vma: make more mmap logic userland testable".

This series carries on the work started in previous series and
continued in commit 52956b0d7f ("mm: isolate mmap internal logic to
mm/vma.c"), moving the remainder of memory mapping implementation
details logic into mm/vma.c allowing the bulk of the mapping logic to
be unit tested.

It is highly useful to do so, as this means we can both fundamentally test
this core logic, and introduce regression tests to ensure any issues
previously resolved do not recur.

Vitally, this includes the do_brk_flags() function, meaning we have both
core means of userland mapping memory now testable.

Performance testing was performed after this change given the brk() system
call's sensitivity to change, and no performance regression was observed.

The stack expansion logic is also moved into mm/vma.c, which necessitates
a change in the API exposed to the exec code, removing the invocation of
the expand_downwards() function used in get_arg_page() and instead adding
mmap_read_lock_maybe_expand() to wrap this.


This patch (of 5):

Now we have moved mmap_region() internals to mm/vma.c, making it available
to userland testing, it makes sense to do the same with brk().

This continues the pattern of VMA heavy lifting being done in mm/vma.c in
an environment where it can be subject to straightforward unit and
regression testing, with other VMA-adjacent files becoming wrappers around
this functionality.

Link: https://lkml.kernel.org/r/cover.1733248985.git.lorenzo.stoakes@oracle.com
Link: https://lkml.kernel.org/r/3d24b9e67bb0261539ca921d1188a10a1b4d4357.1733248985.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Lorenzo Stoakes 2024-12-03 18:05:08 +00:00 committed by Andrew Morton
parent 3c898d9f25
commit 90c0a05220
4 changed files with 108 additions and 84 deletions

View File

@ -111,8 +111,7 @@ static int check_brk_limits(unsigned long addr, unsigned long len)
return mlock_future_ok(current->mm, current->mm->def_flags, len) return mlock_future_ok(current->mm, current->mm->def_flags, len)
? 0 : -EAGAIN; ? 0 : -EAGAIN;
} }
static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *brkvma,
unsigned long addr, unsigned long request, unsigned long flags);
SYSCALL_DEFINE1(brk, unsigned long, brk) SYSCALL_DEFINE1(brk, unsigned long, brk)
{ {
unsigned long newbrk, oldbrk, origbrk; unsigned long newbrk, oldbrk, origbrk;
@ -1512,88 +1511,6 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
return ret; return ret;
} }
/*
* do_brk_flags() - Increase the brk vma if the flags match.
* @vmi: The vma iterator
* @addr: The start address
* @len: The length of the increase
* @vma: The vma,
* @flags: The VMA Flags
*
* Extend the brk VMA from addr to addr + len. If the VMA is NULL or the flags
* do not match then create a new anonymous VMA. Eventually we may be able to
* do some brk-specific accounting here.
*/
static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, unsigned long len, unsigned long flags)
{
struct mm_struct *mm = current->mm;
/*
* Check against address space limits by the changed size
* Note: This happens *after* clearing old mappings in some code paths.
*/
flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
return -ENOMEM;
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
/*
* Expand the existing vma if possible; Note that singular lists do not
* occur after forking, so the expand will only happen on new VMAs.
*/
if (vma && vma->vm_end == addr) {
VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr));
vmg.prev = vma;
/* vmi is positioned at prev, which this mode expects. */
vmg.merge_flags = VMG_FLAG_JUST_EXPAND;
if (vma_merge_new_range(&vmg))
goto out;
else if (vmg_nomem(&vmg))
goto unacct_fail;
}
if (vma)
vma_iter_next_range(vmi);
/* create a vma struct for an anonymous mapping */
vma = vm_area_alloc(mm);
if (!vma)
goto unacct_fail;
vma_set_anonymous(vma);
vma_set_range(vma, addr, addr + len, addr >> PAGE_SHIFT);
vm_flags_init(vma, flags);
vma->vm_page_prot = vm_get_page_prot(flags);
vma_start_write(vma);
if (vma_iter_store_gfp(vmi, vma, GFP_KERNEL))
goto mas_store_fail;
mm->map_count++;
validate_mm(mm);
ksm_add_vma(vma);
out:
perf_event_mmap(vma);
mm->total_vm += len >> PAGE_SHIFT;
mm->data_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
vm_flags_set(vma, VM_SOFTDIRTY);
return 0;
mas_store_fail:
vm_area_free(vma);
unacct_fail:
vm_unacct_memory(len >> PAGE_SHIFT);
return -ENOMEM;
}
int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;

View File

@ -2481,3 +2481,85 @@ unsigned long __mmap_region(struct file *file, unsigned long addr,
vms_abort_munmap_vmas(&map.vms, &map.mas_detach); vms_abort_munmap_vmas(&map.vms, &map.mas_detach);
return error; return error;
} }
/*
* do_brk_flags() - Increase the brk vma if the flags match.
* @vmi: The vma iterator
* @addr: The start address
* @len: The length of the increase
* @vma: The vma,
* @flags: The VMA Flags
*
* Extend the brk VMA from addr to addr + len. If the VMA is NULL or the flags
* do not match then create a new anonymous VMA. Eventually we may be able to
* do some brk-specific accounting here.
*/
int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, unsigned long len, unsigned long flags)
{
struct mm_struct *mm = current->mm;
/*
* Check against address space limits by the changed size
* Note: This happens *after* clearing old mappings in some code paths.
*/
flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
return -ENOMEM;
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
/*
* Expand the existing vma if possible; Note that singular lists do not
* occur after forking, so the expand will only happen on new VMAs.
*/
if (vma && vma->vm_end == addr) {
VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr));
vmg.prev = vma;
/* vmi is positioned at prev, which this mode expects. */
vmg.merge_flags = VMG_FLAG_JUST_EXPAND;
if (vma_merge_new_range(&vmg))
goto out;
else if (vmg_nomem(&vmg))
goto unacct_fail;
}
if (vma)
vma_iter_next_range(vmi);
/* create a vma struct for an anonymous mapping */
vma = vm_area_alloc(mm);
if (!vma)
goto unacct_fail;
vma_set_anonymous(vma);
vma_set_range(vma, addr, addr + len, addr >> PAGE_SHIFT);
vm_flags_init(vma, flags);
vma->vm_page_prot = vm_get_page_prot(flags);
vma_start_write(vma);
if (vma_iter_store_gfp(vmi, vma, GFP_KERNEL))
goto mas_store_fail;
mm->map_count++;
validate_mm(mm);
ksm_add_vma(vma);
out:
perf_event_mmap(vma);
mm->total_vm += len >> PAGE_SHIFT;
mm->data_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
vm_flags_set(vma, VM_SOFTDIRTY);
return 0;
mas_store_fail:
vm_area_free(vma);
unacct_fail:
vm_unacct_memory(len >> PAGE_SHIFT);
return -ENOMEM;
}

View File

@ -247,6 +247,9 @@ unsigned long __mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
struct list_head *uf); struct list_head *uf);
int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *brkvma,
unsigned long addr, unsigned long request, unsigned long flags);
static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
{ {
/* /*

View File

@ -39,6 +39,7 @@
#define VM_SHARED 0x00000008 #define VM_SHARED 0x00000008
#define VM_MAYREAD 0x00000010 #define VM_MAYREAD 0x00000010
#define VM_MAYWRITE 0x00000020 #define VM_MAYWRITE 0x00000020
#define VM_MAYEXEC 0x00000040
#define VM_GROWSDOWN 0x00000100 #define VM_GROWSDOWN 0x00000100
#define VM_PFNMAP 0x00000400 #define VM_PFNMAP 0x00000400
#define VM_LOCKED 0x00002000 #define VM_LOCKED 0x00002000
@ -58,6 +59,13 @@
/* This mask represents all the VMA flag bits used by mlock */ /* This mask represents all the VMA flag bits used by mlock */
#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
/* VM is sealed, in vm_flags */ /* VM is sealed, in vm_flags */
#define VM_SEALED _BITUL(63) #define VM_SEALED _BITUL(63)
@ -122,10 +130,22 @@ enum {
TASK_COMM_LEN = 16, TASK_COMM_LEN = 16,
}; };
/*
* Flags for bug emulation.
*
* These occupy the top three bytes.
*/
enum {
READ_IMPLIES_EXEC = 0x0400000,
};
struct task_struct { struct task_struct {
char comm[TASK_COMM_LEN]; char comm[TASK_COMM_LEN];
pid_t pid; pid_t pid;
struct mm_struct *mm; struct mm_struct *mm;
/* Used for emulating ABI behavior of previous Linux versions: */
unsigned int personality;
}; };
struct task_struct *get_current(void); struct task_struct *get_current(void);
@ -186,6 +206,8 @@ struct mm_struct {
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
unsigned long stack_vm; /* VM_STACK */ unsigned long stack_vm; /* VM_STACK */
unsigned long def_flags;
}; };
struct vma_lock { struct vma_lock {