mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-29 17:25:38 +00:00
mm: isolate mmap internal logic to mm/vma.c
In previous commits we effected improvements to the mmap() logic in mmap_region() and its newly introduced internal implementation function __mmap_region(). However as these changes are intended to be backported, we kept the delta as small as is possible and made as few changes as possible to the newly introduced mm/vma.* files. Take the opportunity to move this logic to mm/vma.c which not only isolates it, but also makes it available for later userland testing which can help us catch such logic errors far earlier. Link: https://lkml.kernel.org/r/93fc2c3aa37dd30590b7e4ee067dfd832007bf7e.1729858176.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Cc: Jann Horn <jannh@google.com> Cc: Liam R. Howlett <Liam.Howlett@Oracle.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Xu <peterx@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
c14f8046cd
commit
52956b0d7f
234
mm/mmap.c
234
mm/mmap.c
@ -577,22 +577,6 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
|
||||
}
|
||||
#endif /* __ARCH_WANT_SYS_OLD_MMAP */
|
||||
|
||||
/*
|
||||
* We account for memory if it's a private writeable mapping,
|
||||
* not hugepages and VM_NORESERVE wasn't set.
|
||||
*/
|
||||
static inline bool accountable_mapping(struct file *file, vm_flags_t vm_flags)
|
||||
{
|
||||
/*
|
||||
* hugetlb has its own accounting separate from the core VM
|
||||
* VM_HUGETLB may not be set yet so we cannot check for that flag.
|
||||
*/
|
||||
if (file && is_file_hugepages(file))
|
||||
return false;
|
||||
|
||||
return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
|
||||
}
|
||||
|
||||
/**
|
||||
* unmapped_area() - Find an area between the low_limit and the high_limit with
|
||||
* the correct alignment and offset, all from @info. Note: current->mm is used
|
||||
@ -1362,224 +1346,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
|
||||
return do_vmi_munmap(&vmi, mm, start, len, uf, false);
|
||||
}
|
||||
|
||||
static unsigned long __mmap_region(struct file *file, unsigned long addr,
|
||||
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
|
||||
struct list_head *uf)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma = NULL;
|
||||
pgoff_t pglen = PHYS_PFN(len);
|
||||
unsigned long charged = 0;
|
||||
struct vma_munmap_struct vms;
|
||||
struct ma_state mas_detach;
|
||||
struct maple_tree mt_detach;
|
||||
unsigned long end = addr + len;
|
||||
int error;
|
||||
VMA_ITERATOR(vmi, mm, addr);
|
||||
VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff);
|
||||
|
||||
vmg.file = file;
|
||||
/* Find the first overlapping VMA */
|
||||
vma = vma_find(&vmi, end);
|
||||
init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
|
||||
if (vma) {
|
||||
mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
|
||||
mt_on_stack(mt_detach);
|
||||
mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
|
||||
/* Prepare to unmap any existing mapping in the area */
|
||||
error = vms_gather_munmap_vmas(&vms, &mas_detach);
|
||||
if (error)
|
||||
goto gather_failed;
|
||||
|
||||
vmg.next = vms.next;
|
||||
vmg.prev = vms.prev;
|
||||
vma = NULL;
|
||||
} else {
|
||||
vmg.next = vma_iter_next_rewind(&vmi, &vmg.prev);
|
||||
}
|
||||
|
||||
/* Check against address space limit. */
|
||||
if (!may_expand_vm(mm, vm_flags, pglen - vms.nr_pages)) {
|
||||
error = -ENOMEM;
|
||||
goto abort_munmap;
|
||||
}
|
||||
|
||||
/*
|
||||
* Private writable mapping: check memory availability
|
||||
*/
|
||||
if (accountable_mapping(file, vm_flags)) {
|
||||
charged = pglen;
|
||||
charged -= vms.nr_accounted;
|
||||
if (charged) {
|
||||
error = security_vm_enough_memory_mm(mm, charged);
|
||||
if (error)
|
||||
goto abort_munmap;
|
||||
}
|
||||
|
||||
vms.nr_accounted = 0;
|
||||
vm_flags |= VM_ACCOUNT;
|
||||
vmg.flags = vm_flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* clear PTEs while the vma is still in the tree so that rmap
|
||||
* cannot race with the freeing later in the truncate scenario.
|
||||
* This is also needed for mmap_file(), which is why vm_ops
|
||||
* close function is called.
|
||||
*/
|
||||
vms_clean_up_area(&vms, &mas_detach);
|
||||
vma = vma_merge_new_range(&vmg);
|
||||
if (vma)
|
||||
goto expanded;
|
||||
/*
|
||||
* Determine the object being mapped and call the appropriate
|
||||
* specific mapper. the address has already been validated, but
|
||||
* not unmapped, but the maps are removed from the list.
|
||||
*/
|
||||
vma = vm_area_alloc(mm);
|
||||
if (!vma) {
|
||||
error = -ENOMEM;
|
||||
goto unacct_error;
|
||||
}
|
||||
|
||||
vma_iter_config(&vmi, addr, end);
|
||||
vma_set_range(vma, addr, end, pgoff);
|
||||
vm_flags_init(vma, vm_flags);
|
||||
vma->vm_page_prot = vm_get_page_prot(vm_flags);
|
||||
|
||||
if (vma_iter_prealloc(&vmi, vma)) {
|
||||
error = -ENOMEM;
|
||||
goto free_vma;
|
||||
}
|
||||
|
||||
if (file) {
|
||||
vma->vm_file = get_file(file);
|
||||
error = mmap_file(file, vma);
|
||||
if (error)
|
||||
goto unmap_and_free_file_vma;
|
||||
|
||||
/* Drivers cannot alter the address of the VMA. */
|
||||
WARN_ON_ONCE(addr != vma->vm_start);
|
||||
/*
|
||||
* Drivers should not permit writability when previously it was
|
||||
* disallowed.
|
||||
*/
|
||||
VM_WARN_ON_ONCE(vm_flags != vma->vm_flags &&
|
||||
!(vm_flags & VM_MAYWRITE) &&
|
||||
(vma->vm_flags & VM_MAYWRITE));
|
||||
|
||||
vma_iter_config(&vmi, addr, end);
|
||||
/*
|
||||
* If vm_flags changed after mmap_file(), we should try merge
|
||||
* vma again as we may succeed this time.
|
||||
*/
|
||||
if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) {
|
||||
struct vm_area_struct *merge;
|
||||
|
||||
vmg.flags = vma->vm_flags;
|
||||
/* If this fails, state is reset ready for a reattempt. */
|
||||
merge = vma_merge_new_range(&vmg);
|
||||
|
||||
if (merge) {
|
||||
/*
|
||||
* ->mmap() can change vma->vm_file and fput
|
||||
* the original file. So fput the vma->vm_file
|
||||
* here or we would add an extra fput for file
|
||||
* and cause general protection fault
|
||||
* ultimately.
|
||||
*/
|
||||
fput(vma->vm_file);
|
||||
vm_area_free(vma);
|
||||
vma = merge;
|
||||
/* Update vm_flags to pick up the change. */
|
||||
vm_flags = vma->vm_flags;
|
||||
goto file_expanded;
|
||||
}
|
||||
vma_iter_config(&vmi, addr, end);
|
||||
}
|
||||
|
||||
vm_flags = vma->vm_flags;
|
||||
} else if (vm_flags & VM_SHARED) {
|
||||
error = shmem_zero_setup(vma);
|
||||
if (error)
|
||||
goto free_iter_vma;
|
||||
} else {
|
||||
vma_set_anonymous(vma);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
/* TODO: Fix SPARC ADI! */
|
||||
WARN_ON_ONCE(!arch_validate_flags(vm_flags));
|
||||
#endif
|
||||
|
||||
/* Lock the VMA since it is modified after insertion into VMA tree */
|
||||
vma_start_write(vma);
|
||||
vma_iter_store(&vmi, vma);
|
||||
mm->map_count++;
|
||||
vma_link_file(vma);
|
||||
|
||||
/*
|
||||
* vma_merge_new_range() calls khugepaged_enter_vma() too, the below
|
||||
* call covers the non-merge case.
|
||||
*/
|
||||
khugepaged_enter_vma(vma, vma->vm_flags);
|
||||
|
||||
file_expanded:
|
||||
file = vma->vm_file;
|
||||
ksm_add_vma(vma);
|
||||
expanded:
|
||||
perf_event_mmap(vma);
|
||||
|
||||
/* Unmap any existing mapping in the area */
|
||||
vms_complete_munmap_vmas(&vms, &mas_detach);
|
||||
|
||||
vm_stat_account(mm, vm_flags, pglen);
|
||||
if (vm_flags & VM_LOCKED) {
|
||||
if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
|
||||
is_vm_hugetlb_page(vma) ||
|
||||
vma == get_gate_vma(current->mm))
|
||||
vm_flags_clear(vma, VM_LOCKED_MASK);
|
||||
else
|
||||
mm->locked_vm += pglen;
|
||||
}
|
||||
|
||||
if (file)
|
||||
uprobe_mmap(vma);
|
||||
|
||||
/*
|
||||
* New (or expanded) vma always get soft dirty status.
|
||||
* Otherwise user-space soft-dirty page tracker won't
|
||||
* be able to distinguish situation when vma area unmapped,
|
||||
* then new mapped in-place (which must be aimed as
|
||||
* a completely new data area).
|
||||
*/
|
||||
vm_flags_set(vma, VM_SOFTDIRTY);
|
||||
|
||||
vma_set_page_prot(vma);
|
||||
|
||||
return addr;
|
||||
|
||||
unmap_and_free_file_vma:
|
||||
fput(vma->vm_file);
|
||||
vma->vm_file = NULL;
|
||||
|
||||
vma_iter_set(&vmi, vma->vm_end);
|
||||
/* Undo any partial mapping done by a device driver. */
|
||||
unmap_region(&vmi.mas, vma, vmg.prev, vmg.next);
|
||||
free_iter_vma:
|
||||
vma_iter_free(&vmi);
|
||||
free_vma:
|
||||
vm_area_free(vma);
|
||||
unacct_error:
|
||||
if (charged)
|
||||
vm_unacct_memory(charged);
|
||||
|
||||
abort_munmap:
|
||||
vms_abort_munmap_vmas(&vms, &mas_detach);
|
||||
gather_failed:
|
||||
return error;
|
||||
}
|
||||
|
||||
unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
|
||||
struct list_head *uf)
|
||||
|
323
mm/vma.c
323
mm/vma.c
@ -1103,7 +1103,7 @@ static inline void vms_clear_ptes(struct vma_munmap_struct *vms,
|
||||
vms->clear_ptes = false;
|
||||
}
|
||||
|
||||
void vms_clean_up_area(struct vma_munmap_struct *vms,
|
||||
static void vms_clean_up_area(struct vma_munmap_struct *vms,
|
||||
struct ma_state *mas_detach)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
@ -1126,7 +1126,7 @@ void vms_clean_up_area(struct vma_munmap_struct *vms,
|
||||
* used for the munmap() and may downgrade the lock - if requested. Everything
|
||||
* needed to be done once the vma maple tree is updated.
|
||||
*/
|
||||
void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
struct ma_state *mas_detach)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
@ -1167,6 +1167,23 @@ void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
__mt_destroy(mas_detach->tree);
|
||||
}
|
||||
|
||||
/*
|
||||
* reattach_vmas() - Undo any munmap work and free resources
|
||||
* @mas_detach: The maple state with the detached maple tree
|
||||
*
|
||||
* Reattach any detached vmas and free up the maple tree used to track the vmas.
|
||||
*/
|
||||
static void reattach_vmas(struct ma_state *mas_detach)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
mas_set(mas_detach, 0);
|
||||
mas_for_each(mas_detach, vma, ULONG_MAX)
|
||||
vma_mark_detached(vma, false);
|
||||
|
||||
__mt_destroy(mas_detach->tree);
|
||||
}
|
||||
|
||||
/*
|
||||
* vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree
|
||||
* for removal at a later date. Handles splitting first and last if necessary
|
||||
@ -1177,7 +1194,7 @@ void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
*
|
||||
* Return: 0 on success, error otherwise
|
||||
*/
|
||||
int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
struct ma_state *mas_detach)
|
||||
{
|
||||
struct vm_area_struct *next = NULL;
|
||||
@ -1315,6 +1332,39 @@ int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* init_vma_munmap() - Initializer wrapper for vma_munmap_struct
|
||||
* @vms: The vma munmap struct
|
||||
* @vmi: The vma iterator
|
||||
* @vma: The first vm_area_struct to munmap
|
||||
* @start: The aligned start address to munmap
|
||||
* @end: The aligned end address to munmap
|
||||
* @uf: The userfaultfd list_head
|
||||
* @unlock: Unlock after the operation. Only unlocked on success
|
||||
*/
|
||||
static void init_vma_munmap(struct vma_munmap_struct *vms,
|
||||
struct vma_iterator *vmi, struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end, struct list_head *uf,
|
||||
bool unlock)
|
||||
{
|
||||
vms->vmi = vmi;
|
||||
vms->vma = vma;
|
||||
if (vma) {
|
||||
vms->start = start;
|
||||
vms->end = end;
|
||||
} else {
|
||||
vms->start = vms->end = 0;
|
||||
}
|
||||
vms->unlock = unlock;
|
||||
vms->uf = uf;
|
||||
vms->vma_count = 0;
|
||||
vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0;
|
||||
vms->exec_vm = vms->stack_vm = vms->data_vm = 0;
|
||||
vms->unmap_start = FIRST_USER_ADDRESS;
|
||||
vms->unmap_end = USER_PGTABLES_CEILING;
|
||||
vms->clear_ptes = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* do_vmi_align_munmap() - munmap the aligned region from @start to @end.
|
||||
* @vmi: The vma iterator
|
||||
@ -2069,3 +2119,270 @@ void mm_drop_all_locks(struct mm_struct *mm)
|
||||
|
||||
mutex_unlock(&mm_all_locks_mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* We account for memory if it's a private writeable mapping,
|
||||
* not hugepages and VM_NORESERVE wasn't set.
|
||||
*/
|
||||
static bool accountable_mapping(struct file *file, vm_flags_t vm_flags)
|
||||
{
|
||||
/*
|
||||
* hugetlb has its own accounting separate from the core VM
|
||||
* VM_HUGETLB may not be set yet so we cannot check for that flag.
|
||||
*/
|
||||
if (file && is_file_hugepages(file))
|
||||
return false;
|
||||
|
||||
return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
|
||||
}
|
||||
|
||||
/*
|
||||
* vms_abort_munmap_vmas() - Undo as much as possible from an aborted munmap()
|
||||
* operation.
|
||||
* @vms: The vma unmap structure
|
||||
* @mas_detach: The maple state with the detached maple tree
|
||||
*
|
||||
* Reattach any detached vmas, free up the maple tree used to track the vmas.
|
||||
* If that's not possible because the ptes are cleared (and vm_ops->closed() may
|
||||
* have been called), then a NULL is written over the vmas and the vmas are
|
||||
* removed (munmap() completed).
|
||||
*/
|
||||
static void vms_abort_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
struct ma_state *mas_detach)
|
||||
{
|
||||
struct ma_state *mas = &vms->vmi->mas;
|
||||
|
||||
if (!vms->nr_pages)
|
||||
return;
|
||||
|
||||
if (vms->clear_ptes)
|
||||
return reattach_vmas(mas_detach);
|
||||
|
||||
/*
|
||||
* Aborting cannot just call the vm_ops open() because they are often
|
||||
* not symmetrical and state data has been lost. Resort to the old
|
||||
* failure method of leaving a gap where the MAP_FIXED mapping failed.
|
||||
*/
|
||||
mas_set_range(mas, vms->start, vms->end - 1);
|
||||
mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL);
|
||||
/* Clean up the insertion of the unfortunate gap */
|
||||
vms_complete_munmap_vmas(vms, mas_detach);
|
||||
}
|
||||
|
||||
unsigned long __mmap_region(struct file *file, unsigned long addr,
|
||||
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
|
||||
struct list_head *uf)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma = NULL;
|
||||
pgoff_t pglen = PHYS_PFN(len);
|
||||
unsigned long charged = 0;
|
||||
struct vma_munmap_struct vms;
|
||||
struct ma_state mas_detach;
|
||||
struct maple_tree mt_detach;
|
||||
unsigned long end = addr + len;
|
||||
int error;
|
||||
VMA_ITERATOR(vmi, mm, addr);
|
||||
VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff);
|
||||
|
||||
vmg.file = file;
|
||||
/* Find the first overlapping VMA */
|
||||
vma = vma_find(&vmi, end);
|
||||
init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
|
||||
if (vma) {
|
||||
mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
|
||||
mt_on_stack(mt_detach);
|
||||
mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
|
||||
/* Prepare to unmap any existing mapping in the area */
|
||||
error = vms_gather_munmap_vmas(&vms, &mas_detach);
|
||||
if (error)
|
||||
goto gather_failed;
|
||||
|
||||
vmg.next = vms.next;
|
||||
vmg.prev = vms.prev;
|
||||
vma = NULL;
|
||||
} else {
|
||||
vmg.next = vma_iter_next_rewind(&vmi, &vmg.prev);
|
||||
}
|
||||
|
||||
/* Check against address space limit. */
|
||||
if (!may_expand_vm(mm, vm_flags, pglen - vms.nr_pages)) {
|
||||
error = -ENOMEM;
|
||||
goto abort_munmap;
|
||||
}
|
||||
|
||||
/*
|
||||
* Private writable mapping: check memory availability
|
||||
*/
|
||||
if (accountable_mapping(file, vm_flags)) {
|
||||
charged = pglen;
|
||||
charged -= vms.nr_accounted;
|
||||
if (charged) {
|
||||
error = security_vm_enough_memory_mm(mm, charged);
|
||||
if (error)
|
||||
goto abort_munmap;
|
||||
}
|
||||
|
||||
vms.nr_accounted = 0;
|
||||
vm_flags |= VM_ACCOUNT;
|
||||
vmg.flags = vm_flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* clear PTEs while the vma is still in the tree so that rmap
|
||||
* cannot race with the freeing later in the truncate scenario.
|
||||
* This is also needed for mmap_file(), which is why vm_ops
|
||||
* close function is called.
|
||||
*/
|
||||
vms_clean_up_area(&vms, &mas_detach);
|
||||
vma = vma_merge_new_range(&vmg);
|
||||
if (vma)
|
||||
goto expanded;
|
||||
/*
|
||||
* Determine the object being mapped and call the appropriate
|
||||
* specific mapper. the address has already been validated, but
|
||||
* not unmapped, but the maps are removed from the list.
|
||||
*/
|
||||
vma = vm_area_alloc(mm);
|
||||
if (!vma) {
|
||||
error = -ENOMEM;
|
||||
goto unacct_error;
|
||||
}
|
||||
|
||||
vma_iter_config(&vmi, addr, end);
|
||||
vma_set_range(vma, addr, end, pgoff);
|
||||
vm_flags_init(vma, vm_flags);
|
||||
vma->vm_page_prot = vm_get_page_prot(vm_flags);
|
||||
|
||||
if (vma_iter_prealloc(&vmi, vma)) {
|
||||
error = -ENOMEM;
|
||||
goto free_vma;
|
||||
}
|
||||
|
||||
if (file) {
|
||||
vma->vm_file = get_file(file);
|
||||
error = mmap_file(file, vma);
|
||||
if (error)
|
||||
goto unmap_and_free_file_vma;
|
||||
|
||||
/* Drivers cannot alter the address of the VMA. */
|
||||
WARN_ON_ONCE(addr != vma->vm_start);
|
||||
/*
|
||||
* Drivers should not permit writability when previously it was
|
||||
* disallowed.
|
||||
*/
|
||||
VM_WARN_ON_ONCE(vm_flags != vma->vm_flags &&
|
||||
!(vm_flags & VM_MAYWRITE) &&
|
||||
(vma->vm_flags & VM_MAYWRITE));
|
||||
|
||||
vma_iter_config(&vmi, addr, end);
|
||||
/*
|
||||
* If vm_flags changed after mmap_file(), we should try merge
|
||||
* vma again as we may succeed this time.
|
||||
*/
|
||||
if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) {
|
||||
struct vm_area_struct *merge;
|
||||
|
||||
vmg.flags = vma->vm_flags;
|
||||
/* If this fails, state is reset ready for a reattempt. */
|
||||
merge = vma_merge_new_range(&vmg);
|
||||
|
||||
if (merge) {
|
||||
/*
|
||||
* ->mmap() can change vma->vm_file and fput
|
||||
* the original file. So fput the vma->vm_file
|
||||
* here or we would add an extra fput for file
|
||||
* and cause general protection fault
|
||||
* ultimately.
|
||||
*/
|
||||
fput(vma->vm_file);
|
||||
vm_area_free(vma);
|
||||
vma = merge;
|
||||
/* Update vm_flags to pick up the change. */
|
||||
vm_flags = vma->vm_flags;
|
||||
goto file_expanded;
|
||||
}
|
||||
vma_iter_config(&vmi, addr, end);
|
||||
}
|
||||
|
||||
vm_flags = vma->vm_flags;
|
||||
} else if (vm_flags & VM_SHARED) {
|
||||
error = shmem_zero_setup(vma);
|
||||
if (error)
|
||||
goto free_iter_vma;
|
||||
} else {
|
||||
vma_set_anonymous(vma);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
/* TODO: Fix SPARC ADI! */
|
||||
WARN_ON_ONCE(!arch_validate_flags(vm_flags));
|
||||
#endif
|
||||
|
||||
/* Lock the VMA since it is modified after insertion into VMA tree */
|
||||
vma_start_write(vma);
|
||||
vma_iter_store(&vmi, vma);
|
||||
mm->map_count++;
|
||||
vma_link_file(vma);
|
||||
|
||||
/*
|
||||
* vma_merge_new_range() calls khugepaged_enter_vma() too, the below
|
||||
* call covers the non-merge case.
|
||||
*/
|
||||
khugepaged_enter_vma(vma, vma->vm_flags);
|
||||
|
||||
file_expanded:
|
||||
file = vma->vm_file;
|
||||
ksm_add_vma(vma);
|
||||
expanded:
|
||||
perf_event_mmap(vma);
|
||||
|
||||
/* Unmap any existing mapping in the area */
|
||||
vms_complete_munmap_vmas(&vms, &mas_detach);
|
||||
|
||||
vm_stat_account(mm, vm_flags, pglen);
|
||||
if (vm_flags & VM_LOCKED) {
|
||||
if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
|
||||
is_vm_hugetlb_page(vma) ||
|
||||
vma == get_gate_vma(current->mm))
|
||||
vm_flags_clear(vma, VM_LOCKED_MASK);
|
||||
else
|
||||
mm->locked_vm += pglen;
|
||||
}
|
||||
|
||||
if (file)
|
||||
uprobe_mmap(vma);
|
||||
|
||||
/*
|
||||
* New (or expanded) vma always get soft dirty status.
|
||||
* Otherwise user-space soft-dirty page tracker won't
|
||||
* be able to distinguish situation when vma area unmapped,
|
||||
* then new mapped in-place (which must be aimed as
|
||||
* a completely new data area).
|
||||
*/
|
||||
vm_flags_set(vma, VM_SOFTDIRTY);
|
||||
|
||||
vma_set_page_prot(vma);
|
||||
|
||||
return addr;
|
||||
|
||||
unmap_and_free_file_vma:
|
||||
fput(vma->vm_file);
|
||||
vma->vm_file = NULL;
|
||||
|
||||
vma_iter_set(&vmi, vma->vm_end);
|
||||
/* Undo any partial mapping done by a device driver. */
|
||||
unmap_region(&vmi.mas, vma, vmg.prev, vmg.next);
|
||||
free_iter_vma:
|
||||
vma_iter_free(&vmi);
|
||||
free_vma:
|
||||
vm_area_free(vma);
|
||||
unacct_error:
|
||||
if (charged)
|
||||
vm_unacct_memory(charged);
|
||||
|
||||
abort_munmap:
|
||||
vms_abort_munmap_vmas(&vms, &mas_detach);
|
||||
gather_failed:
|
||||
return error;
|
||||
}
|
||||
|
97
mm/vma.h
97
mm/vma.h
@ -165,99 +165,6 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
/*
|
||||
* init_vma_munmap() - Initializer wrapper for vma_munmap_struct
|
||||
* @vms: The vma munmap struct
|
||||
* @vmi: The vma iterator
|
||||
* @vma: The first vm_area_struct to munmap
|
||||
* @start: The aligned start address to munmap
|
||||
* @end: The aligned end address to munmap
|
||||
* @uf: The userfaultfd list_head
|
||||
* @unlock: Unlock after the operation. Only unlocked on success
|
||||
*/
|
||||
static inline void init_vma_munmap(struct vma_munmap_struct *vms,
|
||||
struct vma_iterator *vmi, struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end, struct list_head *uf,
|
||||
bool unlock)
|
||||
{
|
||||
vms->vmi = vmi;
|
||||
vms->vma = vma;
|
||||
if (vma) {
|
||||
vms->start = start;
|
||||
vms->end = end;
|
||||
} else {
|
||||
vms->start = vms->end = 0;
|
||||
}
|
||||
vms->unlock = unlock;
|
||||
vms->uf = uf;
|
||||
vms->vma_count = 0;
|
||||
vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0;
|
||||
vms->exec_vm = vms->stack_vm = vms->data_vm = 0;
|
||||
vms->unmap_start = FIRST_USER_ADDRESS;
|
||||
vms->unmap_end = USER_PGTABLES_CEILING;
|
||||
vms->clear_ptes = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
struct ma_state *mas_detach);
|
||||
|
||||
void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
struct ma_state *mas_detach);
|
||||
|
||||
void vms_clean_up_area(struct vma_munmap_struct *vms,
|
||||
struct ma_state *mas_detach);
|
||||
|
||||
/*
|
||||
* reattach_vmas() - Undo any munmap work and free resources
|
||||
* @mas_detach: The maple state with the detached maple tree
|
||||
*
|
||||
* Reattach any detached vmas and free up the maple tree used to track the vmas.
|
||||
*/
|
||||
static inline void reattach_vmas(struct ma_state *mas_detach)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
mas_set(mas_detach, 0);
|
||||
mas_for_each(mas_detach, vma, ULONG_MAX)
|
||||
vma_mark_detached(vma, false);
|
||||
|
||||
__mt_destroy(mas_detach->tree);
|
||||
}
|
||||
|
||||
/*
|
||||
* vms_abort_munmap_vmas() - Undo as much as possible from an aborted munmap()
|
||||
* operation.
|
||||
* @vms: The vma unmap structure
|
||||
* @mas_detach: The maple state with the detached maple tree
|
||||
*
|
||||
* Reattach any detached vmas, free up the maple tree used to track the vmas.
|
||||
* If that's not possible because the ptes are cleared (and vm_ops->closed() may
|
||||
* have been called), then a NULL is written over the vmas and the vmas are
|
||||
* removed (munmap() completed).
|
||||
*/
|
||||
static inline void vms_abort_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
struct ma_state *mas_detach)
|
||||
{
|
||||
struct ma_state *mas = &vms->vmi->mas;
|
||||
if (!vms->nr_pages)
|
||||
return;
|
||||
|
||||
if (vms->clear_ptes)
|
||||
return reattach_vmas(mas_detach);
|
||||
|
||||
/*
|
||||
* Aborting cannot just call the vm_ops open() because they are often
|
||||
* not symmetrical and state data has been lost. Resort to the old
|
||||
* failure method of leaving a gap where the MAP_FIXED mapping failed.
|
||||
*/
|
||||
mas_set_range(mas, vms->start, vms->end - 1);
|
||||
mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL);
|
||||
/* Clean up the insertion of the unfortunate gap */
|
||||
vms_complete_munmap_vmas(vms, mas_detach);
|
||||
}
|
||||
|
||||
int
|
||||
do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
||||
struct mm_struct *mm, unsigned long start,
|
||||
@ -336,6 +243,10 @@ bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
|
||||
int mm_take_all_locks(struct mm_struct *mm);
|
||||
void mm_drop_all_locks(struct mm_struct *mm);
|
||||
|
||||
unsigned long __mmap_region(struct file *file, unsigned long addr,
|
||||
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
|
||||
struct list_head *uf);
|
||||
|
||||
static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
|
||||
{
|
||||
/*
|
||||
|
@ -17,8 +17,10 @@
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/huge_mm.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/hugetlb_inline.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/ksm.h>
|
||||
#include <linux/khugepaged.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/maple_tree.h>
|
||||
@ -32,11 +34,14 @@
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/rmap.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/uprobes.h>
|
||||
#include <linux/userfaultfd_k.h>
|
||||
|
Loading…
Reference in New Issue
Block a user