mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-09 06:33:34 +00:00
Merge branch 'mm-hotfixes-stable' into mm-stable.
Pick these into mm-stable:5de195060b
mm: resolve faulty mmap_region() error path behaviour5baf8b037d
mm: refactor arch_calc_vm_flag_bits() and arm64 MTE handling0fb4a7ad27
mm: refactor map_deny_write_exec()4080ef1579
mm: unconditionally close VMAs on error3dd6ed34ce
mm: avoid unsafe VMA hook invocation when error arises on mmap hookf8f931bba0
mm/thp: fix deferred split unqueue naming and lockinge66f3185fa
mm/thp: fix deferred split queue not partially_mapped to get a clean merge of these from mm-unstable into mm-stable: Subject: memcg-v1: fully deprecate move_charge_at_immigrate Subject: memcg-v1: remove charge move code Subject: memcg-v1: no need for memcg locking for dirty tracking Subject: memcg-v1: no need for memcg locking for writeback tracking Subject: memcg-v1: no need for memcg locking for MGLRU Subject: memcg-v1: remove memcg move locking code Subject: tools: testing: add additional vma_internal.h stubs Subject: mm: isolate mmap internal logic to mm/vma.c Subject: mm: refactor __mmap_region() Subject: mm: remove unnecessary reset state logic on merge new VMA Subject: mm: defer second attempt at merge on mmap() Subject: mm/vma: the pgoff is correct if can_merge_right Subject: memcg: workingset: remove folio_memcg_rcu usage
This commit is contained in:
commit
48901e9d62
@ -6,6 +6,8 @@
|
||||
|
||||
#ifndef BUILD_VDSO
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
|
||||
@ -31,19 +33,21 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
|
||||
}
|
||||
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
|
||||
|
||||
static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
|
||||
static inline unsigned long arch_calc_vm_flag_bits(struct file *file,
|
||||
unsigned long flags)
|
||||
{
|
||||
/*
|
||||
* Only allow MTE on anonymous mappings as these are guaranteed to be
|
||||
* backed by tags-capable memory. The vm_flags may be overridden by a
|
||||
* filesystem supporting MTE (RAM-based).
|
||||
*/
|
||||
if (system_supports_mte() && (flags & MAP_ANONYMOUS))
|
||||
if (system_supports_mte() &&
|
||||
((flags & MAP_ANONYMOUS) || shmem_file(file)))
|
||||
return VM_MTE_ALLOWED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
|
||||
#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags)
|
||||
|
||||
static inline bool arch_validate_prot(unsigned long prot,
|
||||
unsigned long addr __always_unused)
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef __ASM_MMAN_H__
|
||||
#define __ASM_MMAN_H__
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <uapi/asm/mman.h>
|
||||
|
||||
/* PARISC cannot allow mdwe as it needs writable stacks */
|
||||
@ -11,7 +12,7 @@ static inline bool arch_memory_deny_write_exec_supported(void)
|
||||
}
|
||||
#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
|
||||
|
||||
static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
|
||||
static inline unsigned long arch_calc_vm_flag_bits(struct file *file, unsigned long flags)
|
||||
{
|
||||
/*
|
||||
* The stack on parisc grows upwards, so if userspace requests memory
|
||||
@ -23,6 +24,6 @@ static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
|
||||
|
||||
return 0;
|
||||
}
|
||||
#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
|
||||
#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags)
|
||||
|
||||
#endif /* __ASM_MMAN_H__ */
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef _LINUX_MMAN_H
|
||||
#define _LINUX_MMAN_H
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/percpu_counter.h>
|
||||
|
||||
@ -94,7 +95,7 @@ static inline void vm_unacct_memory(long pages)
|
||||
#endif
|
||||
|
||||
#ifndef arch_calc_vm_flag_bits
|
||||
#define arch_calc_vm_flag_bits(flags) 0
|
||||
#define arch_calc_vm_flag_bits(file, flags) 0
|
||||
#endif
|
||||
|
||||
#ifndef arch_validate_prot
|
||||
@ -151,13 +152,13 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey)
|
||||
* Combine the mmap "flags" argument into "vm_flags" used internally.
|
||||
*/
|
||||
static inline unsigned long
|
||||
calc_vm_flag_bits(unsigned long flags)
|
||||
calc_vm_flag_bits(struct file *file, unsigned long flags)
|
||||
{
|
||||
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
|
||||
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
|
||||
_calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
|
||||
_calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) |
|
||||
arch_calc_vm_flag_bits(flags);
|
||||
arch_calc_vm_flag_bits(file, flags);
|
||||
}
|
||||
|
||||
unsigned long vm_commit_limit(void);
|
||||
@ -188,16 +189,31 @@ static inline bool arch_memory_deny_write_exec_supported(void)
|
||||
*
|
||||
* d) mmap(PROT_READ | PROT_EXEC)
|
||||
* mmap(PROT_READ | PROT_EXEC | PROT_BTI)
|
||||
*
|
||||
* This is only applicable if the user has set the Memory-Deny-Write-Execute
|
||||
* (MDWE) protection mask for the current process.
|
||||
*
|
||||
* @old specifies the VMA flags the VMA originally possessed, and @new the ones
|
||||
* we propose to set.
|
||||
*
|
||||
* Return: false if proposed change is OK, true if not ok and should be denied.
|
||||
*/
|
||||
static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags)
|
||||
static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
|
||||
{
|
||||
/* If MDWE is disabled, we have nothing to deny. */
|
||||
if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
|
||||
return false;
|
||||
|
||||
if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE))
|
||||
/* If the new VMA is not executable, we have nothing to deny. */
|
||||
if (!(new & VM_EXEC))
|
||||
return false;
|
||||
|
||||
/* Under MDWE we do not accept newly writably executable VMAs... */
|
||||
if (new & VM_WRITE)
|
||||
return true;
|
||||
|
||||
if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC))
|
||||
/* ...nor previously non-executable VMAs becoming executable. */
|
||||
if (!(old & VM_EXEC))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -3588,10 +3588,27 @@ int split_folio_to_list(struct folio *folio, struct list_head *list)
|
||||
return split_huge_page_to_list_to_order(&folio->page, list, ret);
|
||||
}
|
||||
|
||||
void __folio_undo_large_rmappable(struct folio *folio)
|
||||
/*
|
||||
* __folio_unqueue_deferred_split() is not to be called directly:
|
||||
* the folio_unqueue_deferred_split() inline wrapper in mm/internal.h
|
||||
* limits its calls to those folios which may have a _deferred_list for
|
||||
* queueing THP splits, and that list is (racily observed to be) non-empty.
|
||||
*
|
||||
* It is unsafe to call folio_unqueue_deferred_split() until folio refcount is
|
||||
* zero: because even when split_queue_lock is held, a non-empty _deferred_list
|
||||
* might be in use on deferred_split_scan()'s unlocked on-stack list.
|
||||
*
|
||||
* If memory cgroups are enabled, split_queue_lock is in the mem_cgroup: it is
|
||||
* therefore important to unqueue deferred split before changing folio memcg.
|
||||
*/
|
||||
bool __folio_unqueue_deferred_split(struct folio *folio)
|
||||
{
|
||||
struct deferred_split *ds_queue;
|
||||
unsigned long flags;
|
||||
bool unqueued = false;
|
||||
|
||||
WARN_ON_ONCE(folio_ref_count(folio));
|
||||
WARN_ON_ONCE(!mem_cgroup_disabled() && !folio_memcg(folio));
|
||||
|
||||
ds_queue = get_deferred_split_queue(folio);
|
||||
spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
|
||||
@ -3603,8 +3620,11 @@ void __folio_undo_large_rmappable(struct folio *folio)
|
||||
MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
|
||||
}
|
||||
list_del_init(&folio->_deferred_list);
|
||||
unqueued = true;
|
||||
}
|
||||
spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
|
||||
|
||||
return unqueued; /* useful for debug warnings */
|
||||
}
|
||||
|
||||
/* partially_mapped=false won't clear PG_partially_mapped folio flag */
|
||||
@ -3627,14 +3647,11 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The try_to_unmap() in page reclaim path might reach here too,
|
||||
* this may cause a race condition to corrupt deferred split queue.
|
||||
* And, if page reclaim is already handling the same folio, it is
|
||||
* unnecessary to handle it again in shrinker.
|
||||
*
|
||||
* Check the swapcache flag to determine if the folio is being
|
||||
* handled by page reclaim since THP swap would add the folio into
|
||||
* swap cache before calling try_to_unmap().
|
||||
* Exclude swapcache: originally to avoid a corrupt deferred split
|
||||
* queue. Nowadays that is fully prevented by mem_cgroup_swapout();
|
||||
* but if page reclaim is already handling the same folio, it is
|
||||
* unnecessary to handle it again in the shrinker, so excluding
|
||||
* swapcache here may still be a useful optimization.
|
||||
*/
|
||||
if (folio_test_swapcache(folio))
|
||||
return;
|
||||
@ -3718,8 +3735,8 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
|
||||
struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
|
||||
unsigned long flags;
|
||||
LIST_HEAD(list);
|
||||
struct folio *folio, *next;
|
||||
int split = 0;
|
||||
struct folio *folio, *next, *prev = NULL;
|
||||
int split = 0, removed = 0;
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
if (sc->memcg)
|
||||
@ -3775,15 +3792,28 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
|
||||
*/
|
||||
if (!did_split && !folio_test_partially_mapped(folio)) {
|
||||
list_del_init(&folio->_deferred_list);
|
||||
ds_queue->split_queue_len--;
|
||||
removed++;
|
||||
} else {
|
||||
/*
|
||||
* That unlocked list_del_init() above would be unsafe,
|
||||
* unless its folio is separated from any earlier folios
|
||||
* left on the list (which may be concurrently unqueued)
|
||||
* by one safe folio with refcount still raised.
|
||||
*/
|
||||
swap(folio, prev);
|
||||
}
|
||||
folio_put(folio);
|
||||
if (folio)
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
|
||||
list_splice_tail(&list, &ds_queue->split_queue);
|
||||
ds_queue->split_queue_len -= removed;
|
||||
spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
|
||||
|
||||
if (prev)
|
||||
folio_put(prev);
|
||||
|
||||
/*
|
||||
* Stop shrinker if we didn't split any page, but the queue is empty.
|
||||
* This can happen if pages were freed under us.
|
||||
|
@ -108,6 +108,51 @@ static inline void *folio_raw_mapping(const struct folio *folio)
|
||||
return (void *)(mapping & ~PAGE_MAPPING_FLAGS);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a file-backed mapping, and is about to be memory mapped - invoke its
|
||||
* mmap hook and safely handle error conditions. On error, VMA hooks will be
|
||||
* mutated.
|
||||
*
|
||||
* @file: File which backs the mapping.
|
||||
* @vma: VMA which we are mapping.
|
||||
*
|
||||
* Returns: 0 if success, error otherwise.
|
||||
*/
|
||||
static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
int err = call_mmap(file, vma);
|
||||
|
||||
if (likely(!err))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* OK, we tried to call the file hook for mmap(), but an error
|
||||
* arose. The mapping is in an inconsistent state and we most not invoke
|
||||
* any further hooks on it.
|
||||
*/
|
||||
vma->vm_ops = &vma_dummy_vm_ops;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the VMA has a close hook then close it, and since closing it might leave
|
||||
* it in an inconsistent state which makes the use of any hooks suspect, clear
|
||||
* them down by installing dummy empty hooks.
|
||||
*/
|
||||
static inline void vma_close(struct vm_area_struct *vma)
|
||||
{
|
||||
if (vma->vm_ops && vma->vm_ops->close) {
|
||||
vma->vm_ops->close(vma);
|
||||
|
||||
/*
|
||||
* The mapping is in an inconsistent state, and no further hooks
|
||||
* may be invoked upon it.
|
||||
*/
|
||||
vma->vm_ops = &vma_dummy_vm_ops;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
||||
/* Flags for folio_pte_batch(). */
|
||||
@ -639,11 +684,11 @@ static inline void folio_set_order(struct folio *folio, unsigned int order)
|
||||
#endif
|
||||
}
|
||||
|
||||
void __folio_undo_large_rmappable(struct folio *folio);
|
||||
static inline void folio_undo_large_rmappable(struct folio *folio)
|
||||
bool __folio_unqueue_deferred_split(struct folio *folio);
|
||||
static inline bool folio_unqueue_deferred_split(struct folio *folio)
|
||||
{
|
||||
if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio))
|
||||
return;
|
||||
return false;
|
||||
|
||||
/*
|
||||
* At this point, there is no one trying to add the folio to
|
||||
@ -651,9 +696,9 @@ static inline void folio_undo_large_rmappable(struct folio *folio)
|
||||
* to check without acquiring the split_queue_lock.
|
||||
*/
|
||||
if (data_race(list_empty(&folio->_deferred_list)))
|
||||
return;
|
||||
return false;
|
||||
|
||||
__folio_undo_large_rmappable(folio);
|
||||
return __folio_unqueue_deferred_split(folio);
|
||||
}
|
||||
|
||||
static inline struct folio *page_rmappable_folio(struct page *page)
|
||||
|
@ -848,6 +848,8 @@ static int mem_cgroup_move_account(struct folio *folio,
|
||||
css_get(&to->css);
|
||||
css_put(&from->css);
|
||||
|
||||
/* Warning should never happen, so don't worry about refcount non-0 */
|
||||
WARN_ON_ONCE(folio_unqueue_deferred_split(folio));
|
||||
folio->memcg_data = (unsigned long)to;
|
||||
|
||||
__folio_memcg_unlock(from);
|
||||
@ -1217,7 +1219,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
|
||||
enum mc_target_type target_type;
|
||||
union mc_target target;
|
||||
struct folio *folio;
|
||||
bool tried_split_before = false;
|
||||
|
||||
retry_pmd:
|
||||
ptl = pmd_trans_huge_lock(pmd, vma);
|
||||
if (ptl) {
|
||||
if (mc.precharge < HPAGE_PMD_NR) {
|
||||
@ -1227,6 +1231,27 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
|
||||
target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
|
||||
if (target_type == MC_TARGET_PAGE) {
|
||||
folio = target.folio;
|
||||
/*
|
||||
* Deferred split queue locking depends on memcg,
|
||||
* and unqueue is unsafe unless folio refcount is 0:
|
||||
* split or skip if on the queue? first try to split.
|
||||
*/
|
||||
if (!list_empty(&folio->_deferred_list)) {
|
||||
spin_unlock(ptl);
|
||||
if (!tried_split_before)
|
||||
split_folio(folio);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
if (tried_split_before)
|
||||
return 0;
|
||||
tried_split_before = true;
|
||||
goto retry_pmd;
|
||||
}
|
||||
/*
|
||||
* So long as that pmd lock is held, the folio cannot
|
||||
* be racily added to the _deferred_list, because
|
||||
* __folio_remove_rmap() will find !partially_mapped.
|
||||
*/
|
||||
if (folio_isolate_lru(folio)) {
|
||||
if (!mem_cgroup_move_account(folio, true,
|
||||
mc.from, mc.to)) {
|
||||
|
@ -4629,10 +4629,6 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
|
||||
struct obj_cgroup *objcg;
|
||||
|
||||
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
|
||||
VM_BUG_ON_FOLIO(folio_order(folio) > 1 &&
|
||||
!folio_test_hugetlb(folio) &&
|
||||
!list_empty(&folio->_deferred_list) &&
|
||||
folio_test_partially_mapped(folio), folio);
|
||||
|
||||
/*
|
||||
* Nobody should be changing or seriously looking at
|
||||
@ -4679,6 +4675,7 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
|
||||
ug->nr_memory += nr_pages;
|
||||
ug->pgpgout++;
|
||||
|
||||
WARN_ON_ONCE(folio_unqueue_deferred_split(folio));
|
||||
folio->memcg_data = 0;
|
||||
}
|
||||
|
||||
@ -4790,6 +4787,9 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
|
||||
|
||||
/* Transfer the charge and the css ref */
|
||||
commit_charge(new, memcg);
|
||||
|
||||
/* Warning should never happen, so don't worry about refcount non-0 */
|
||||
WARN_ON_ONCE(folio_unqueue_deferred_split(old));
|
||||
old->memcg_data = 0;
|
||||
}
|
||||
|
||||
@ -4976,6 +4976,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
|
||||
VM_BUG_ON_FOLIO(oldid, folio);
|
||||
mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
|
||||
|
||||
folio_unqueue_deferred_split(folio);
|
||||
folio->memcg_data = 0;
|
||||
|
||||
if (!mem_cgroup_is_root(memcg))
|
||||
|
@ -490,7 +490,7 @@ static int __folio_migrate_mapping(struct address_space *mapping,
|
||||
folio_test_large_rmappable(folio)) {
|
||||
if (!folio_ref_freeze(folio, expected_count))
|
||||
return -EAGAIN;
|
||||
folio_undo_large_rmappable(folio);
|
||||
folio_unqueue_deferred_split(folio);
|
||||
folio_ref_unfreeze(folio, expected_count);
|
||||
}
|
||||
|
||||
@ -515,7 +515,7 @@ static int __folio_migrate_mapping(struct address_space *mapping,
|
||||
}
|
||||
|
||||
/* Take off deferred split queue while frozen and memcg set */
|
||||
folio_undo_large_rmappable(folio);
|
||||
folio_unqueue_deferred_split(folio);
|
||||
|
||||
/*
|
||||
* Now we know that no one else is looking at the folio:
|
||||
|
130
mm/mmap.c
130
mm/mmap.c
@ -344,7 +344,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
|
||||
* to. we assume access permissions have been handled by the open
|
||||
* of the memory object, so we don't do any here.
|
||||
*/
|
||||
vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
|
||||
vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(file, flags) |
|
||||
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
|
||||
|
||||
/* Obtain the address to map to. we verify (or select) it and ensure
|
||||
@ -1358,20 +1358,18 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
|
||||
return do_vmi_munmap(&vmi, mm, start, len, uf, false);
|
||||
}
|
||||
|
||||
unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
static unsigned long __mmap_region(struct file *file, unsigned long addr,
|
||||
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
|
||||
struct list_head *uf)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma = NULL;
|
||||
pgoff_t pglen = PHYS_PFN(len);
|
||||
struct vm_area_struct *merge;
|
||||
unsigned long charged = 0;
|
||||
struct vma_munmap_struct vms;
|
||||
struct ma_state mas_detach;
|
||||
struct maple_tree mt_detach;
|
||||
unsigned long end = addr + len;
|
||||
bool writable_file_mapping = false;
|
||||
int error;
|
||||
VMA_ITERATOR(vmi, mm, addr);
|
||||
VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff);
|
||||
@ -1422,7 +1420,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
/*
|
||||
* clear PTEs while the vma is still in the tree so that rmap
|
||||
* cannot race with the freeing later in the truncate scenario.
|
||||
* This is also needed for call_mmap(), which is why vm_ops
|
||||
* This is also needed for mmap_file(), which is why vm_ops
|
||||
* close function is called.
|
||||
*/
|
||||
vms_clean_up_area(&vms, &mas_detach);
|
||||
@ -1445,35 +1443,35 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
vm_flags_init(vma, vm_flags);
|
||||
vma->vm_page_prot = vm_get_page_prot(vm_flags);
|
||||
|
||||
if (vma_iter_prealloc(&vmi, vma)) {
|
||||
error = -ENOMEM;
|
||||
goto free_vma;
|
||||
}
|
||||
|
||||
if (file) {
|
||||
vma->vm_file = get_file(file);
|
||||
error = call_mmap(file, vma);
|
||||
error = mmap_file(file, vma);
|
||||
if (error)
|
||||
goto unmap_and_free_vma;
|
||||
|
||||
if (vma_is_shared_maywrite(vma)) {
|
||||
error = mapping_map_writable(file->f_mapping);
|
||||
if (error)
|
||||
goto close_and_free_vma;
|
||||
|
||||
writable_file_mapping = true;
|
||||
}
|
||||
goto unmap_and_free_file_vma;
|
||||
|
||||
/* Drivers cannot alter the address of the VMA. */
|
||||
WARN_ON_ONCE(addr != vma->vm_start);
|
||||
/*
|
||||
* Expansion is handled above, merging is handled below.
|
||||
* Drivers should not alter the address of the VMA.
|
||||
* Drivers should not permit writability when previously it was
|
||||
* disallowed.
|
||||
*/
|
||||
if (WARN_ON((addr != vma->vm_start))) {
|
||||
error = -EINVAL;
|
||||
goto close_and_free_vma;
|
||||
}
|
||||
VM_WARN_ON_ONCE(vm_flags != vma->vm_flags &&
|
||||
!(vm_flags & VM_MAYWRITE) &&
|
||||
(vma->vm_flags & VM_MAYWRITE));
|
||||
|
||||
vma_iter_config(&vmi, addr, end);
|
||||
/*
|
||||
* If vm_flags changed after call_mmap(), we should try merge
|
||||
* If vm_flags changed after mmap_file(), we should try merge
|
||||
* vma again as we may succeed this time.
|
||||
*/
|
||||
if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) {
|
||||
struct vm_area_struct *merge;
|
||||
|
||||
vmg.flags = vma->vm_flags;
|
||||
/* If this fails, state is reset ready for a reattempt. */
|
||||
merge = vma_merge_new_range(&vmg);
|
||||
@ -1491,7 +1489,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
vma = merge;
|
||||
/* Update vm_flags to pick up the change. */
|
||||
vm_flags = vma->vm_flags;
|
||||
goto unmap_writable;
|
||||
goto file_expanded;
|
||||
}
|
||||
vma_iter_config(&vmi, addr, end);
|
||||
}
|
||||
@ -1500,26 +1498,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
} else if (vm_flags & VM_SHARED) {
|
||||
error = shmem_zero_setup(vma);
|
||||
if (error)
|
||||
goto free_vma;
|
||||
goto free_iter_vma;
|
||||
} else {
|
||||
vma_set_anonymous(vma);
|
||||
}
|
||||
|
||||
if (map_deny_write_exec(vma, vma->vm_flags)) {
|
||||
error = -EACCES;
|
||||
goto close_and_free_vma;
|
||||
}
|
||||
|
||||
/* Allow architectures to sanity-check the vm_flags */
|
||||
if (!arch_validate_flags(vma->vm_flags)) {
|
||||
error = -EINVAL;
|
||||
goto close_and_free_vma;
|
||||
}
|
||||
|
||||
if (vma_iter_prealloc(&vmi, vma)) {
|
||||
error = -ENOMEM;
|
||||
goto close_and_free_vma;
|
||||
}
|
||||
#ifdef CONFIG_SPARC64
|
||||
/* TODO: Fix SPARC ADI! */
|
||||
WARN_ON_ONCE(!arch_validate_flags(vm_flags));
|
||||
#endif
|
||||
|
||||
/* Lock the VMA since it is modified after insertion into VMA tree */
|
||||
vma_start_write(vma);
|
||||
@ -1533,10 +1520,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
*/
|
||||
khugepaged_enter_vma(vma, vma->vm_flags);
|
||||
|
||||
/* Once vma denies write, undo our temporary denial count */
|
||||
unmap_writable:
|
||||
if (writable_file_mapping)
|
||||
mapping_unmap_writable(file->f_mapping);
|
||||
file_expanded:
|
||||
file = vma->vm_file;
|
||||
ksm_add_vma(vma);
|
||||
expanded:
|
||||
@ -1569,24 +1553,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
|
||||
vma_set_page_prot(vma);
|
||||
|
||||
validate_mm(mm);
|
||||
return addr;
|
||||
|
||||
close_and_free_vma:
|
||||
if (file && !vms.closed_vm_ops && vma->vm_ops && vma->vm_ops->close)
|
||||
vma->vm_ops->close(vma);
|
||||
unmap_and_free_file_vma:
|
||||
fput(vma->vm_file);
|
||||
vma->vm_file = NULL;
|
||||
|
||||
if (file || vma->vm_file) {
|
||||
unmap_and_free_vma:
|
||||
fput(vma->vm_file);
|
||||
vma->vm_file = NULL;
|
||||
|
||||
vma_iter_set(&vmi, vma->vm_end);
|
||||
/* Undo any partial mapping done by a device driver. */
|
||||
unmap_region(&vmi.mas, vma, vmg.prev, vmg.next);
|
||||
}
|
||||
if (writable_file_mapping)
|
||||
mapping_unmap_writable(file->f_mapping);
|
||||
vma_iter_set(&vmi, vma->vm_end);
|
||||
/* Undo any partial mapping done by a device driver. */
|
||||
unmap_region(&vmi.mas, vma, vmg.prev, vmg.next);
|
||||
free_iter_vma:
|
||||
vma_iter_free(&vmi);
|
||||
free_vma:
|
||||
vm_area_free(vma);
|
||||
unacct_error:
|
||||
@ -1596,10 +1573,43 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
abort_munmap:
|
||||
vms_abort_munmap_vmas(&vms, &mas_detach);
|
||||
gather_failed:
|
||||
validate_mm(mm);
|
||||
return error;
|
||||
}
|
||||
|
||||
unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
|
||||
struct list_head *uf)
|
||||
{
|
||||
unsigned long ret;
|
||||
bool writable_file_mapping = false;
|
||||
|
||||
/* Check to see if MDWE is applicable. */
|
||||
if (map_deny_write_exec(vm_flags, vm_flags))
|
||||
return -EACCES;
|
||||
|
||||
/* Allow architectures to sanity-check the vm_flags. */
|
||||
if (!arch_validate_flags(vm_flags))
|
||||
return -EINVAL;
|
||||
|
||||
/* Map writable and ensure this isn't a sealed memfd. */
|
||||
if (file && is_shared_maywrite(vm_flags)) {
|
||||
int error = mapping_map_writable(file->f_mapping);
|
||||
|
||||
if (error)
|
||||
return error;
|
||||
writable_file_mapping = true;
|
||||
}
|
||||
|
||||
ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
|
||||
|
||||
/* Clear our write mapping regardless of error. */
|
||||
if (writable_file_mapping)
|
||||
mapping_unmap_writable(file->f_mapping);
|
||||
|
||||
validate_mm(current->mm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __vm_munmap(unsigned long start, size_t len, bool unlock)
|
||||
{
|
||||
int ret;
|
||||
@ -1934,7 +1944,7 @@ void exit_mmap(struct mm_struct *mm)
|
||||
do {
|
||||
if (vma->vm_flags & VM_ACCOUNT)
|
||||
nr_accounted += vma_pages(vma);
|
||||
remove_vma(vma, /* unreachable = */ true, /* closed = */ false);
|
||||
remove_vma(vma, /* unreachable = */ true);
|
||||
count++;
|
||||
cond_resched();
|
||||
vma = vma_next(&vmi);
|
||||
|
@ -810,7 +810,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
|
||||
break;
|
||||
}
|
||||
|
||||
if (map_deny_write_exec(vma, newflags)) {
|
||||
if (map_deny_write_exec(vma->vm_flags, newflags)) {
|
||||
error = -EACCES;
|
||||
break;
|
||||
}
|
||||
|
@ -589,8 +589,7 @@ static int delete_vma_from_mm(struct vm_area_struct *vma)
|
||||
*/
|
||||
static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
|
||||
{
|
||||
if (vma->vm_ops && vma->vm_ops->close)
|
||||
vma->vm_ops->close(vma);
|
||||
vma_close(vma);
|
||||
if (vma->vm_file)
|
||||
fput(vma->vm_file);
|
||||
put_nommu_region(vma->vm_region);
|
||||
@ -843,7 +842,7 @@ static unsigned long determine_vm_flags(struct file *file,
|
||||
{
|
||||
unsigned long vm_flags;
|
||||
|
||||
vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags);
|
||||
vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags);
|
||||
|
||||
if (!file) {
|
||||
/*
|
||||
@ -885,7 +884,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = call_mmap(vma->vm_file, vma);
|
||||
ret = mmap_file(vma->vm_file, vma);
|
||||
if (ret == 0) {
|
||||
vma->vm_region->vm_top = vma->vm_region->vm_end;
|
||||
return 0;
|
||||
@ -918,7 +917,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
|
||||
* happy.
|
||||
*/
|
||||
if (capabilities & NOMMU_MAP_DIRECT) {
|
||||
ret = call_mmap(vma->vm_file, vma);
|
||||
ret = mmap_file(vma->vm_file, vma);
|
||||
/* shouldn't return success if we're not sharing */
|
||||
if (WARN_ON_ONCE(!is_nommu_shared_mapping(vma->vm_flags)))
|
||||
ret = -ENOSYS;
|
||||
|
@ -961,9 +961,8 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
|
||||
break;
|
||||
case 2:
|
||||
/* the second tail page: deferred_list overlaps ->mapping */
|
||||
if (unlikely(!list_empty(&folio->_deferred_list) &&
|
||||
folio_test_partially_mapped(folio))) {
|
||||
bad_page(page, "partially mapped folio on deferred list");
|
||||
if (unlikely(!list_empty(&folio->_deferred_list))) {
|
||||
bad_page(page, "on deferred list");
|
||||
goto out;
|
||||
}
|
||||
break;
|
||||
@ -2682,7 +2681,6 @@ void free_unref_folios(struct folio_batch *folios)
|
||||
unsigned long pfn = folio_pfn(folio);
|
||||
unsigned int order = folio_order(folio);
|
||||
|
||||
folio_undo_large_rmappable(folio);
|
||||
if (!free_pages_prepare(&folio->page, order))
|
||||
continue;
|
||||
/*
|
||||
|
@ -2733,9 +2733,6 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* arm64 - allow memory tagging on RAM-based files */
|
||||
vm_flags_set(vma, VM_MTE_ALLOWED);
|
||||
|
||||
file_accessed(file);
|
||||
/* This is anonymous shared memory if it is unlinked at the time of mmap */
|
||||
if (inode->i_nlink)
|
||||
|
@ -121,7 +121,7 @@ void __folio_put(struct folio *folio)
|
||||
}
|
||||
|
||||
page_cache_release(folio);
|
||||
folio_undo_large_rmappable(folio);
|
||||
folio_unqueue_deferred_split(folio);
|
||||
mem_cgroup_uncharge(folio);
|
||||
free_unref_page(&folio->page, folio_order(folio));
|
||||
}
|
||||
@ -988,7 +988,7 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
|
||||
free_huge_folio(folio);
|
||||
continue;
|
||||
}
|
||||
folio_undo_large_rmappable(folio);
|
||||
folio_unqueue_deferred_split(folio);
|
||||
__page_cache_release(folio, &lruvec, &flags);
|
||||
|
||||
if (j != i)
|
||||
|
14
mm/vma.c
14
mm/vma.c
@ -323,11 +323,10 @@ static bool can_vma_merge_right(struct vma_merge_struct *vmg,
|
||||
/*
|
||||
* Close a vm structure and free it.
|
||||
*/
|
||||
void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed)
|
||||
void remove_vma(struct vm_area_struct *vma, bool unreachable)
|
||||
{
|
||||
might_sleep();
|
||||
if (!closed && vma->vm_ops && vma->vm_ops->close)
|
||||
vma->vm_ops->close(vma);
|
||||
vma_close(vma);
|
||||
if (vma->vm_file)
|
||||
fput(vma->vm_file);
|
||||
mpol_put(vma_policy(vma));
|
||||
@ -1115,9 +1114,7 @@ void vms_clean_up_area(struct vma_munmap_struct *vms,
|
||||
vms_clear_ptes(vms, mas_detach, true);
|
||||
mas_set(mas_detach, 0);
|
||||
mas_for_each(mas_detach, vma, ULONG_MAX)
|
||||
if (vma->vm_ops && vma->vm_ops->close)
|
||||
vma->vm_ops->close(vma);
|
||||
vms->closed_vm_ops = true;
|
||||
vma_close(vma);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1160,7 +1157,7 @@ void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
|
||||
/* Remove and clean up vmas */
|
||||
mas_set(mas_detach, 0);
|
||||
mas_for_each(mas_detach, vma, ULONG_MAX)
|
||||
remove_vma(vma, /* = */ false, vms->closed_vm_ops);
|
||||
remove_vma(vma, /* unreachable = */ false);
|
||||
|
||||
vm_unacct_memory(vms->nr_accounted);
|
||||
validate_mm(mm);
|
||||
@ -1684,8 +1681,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
|
||||
return new_vma;
|
||||
|
||||
out_vma_link:
|
||||
if (new_vma->vm_ops && new_vma->vm_ops->close)
|
||||
new_vma->vm_ops->close(new_vma);
|
||||
vma_close(new_vma);
|
||||
|
||||
if (new_vma->vm_file)
|
||||
fput(new_vma->vm_file);
|
||||
|
6
mm/vma.h
6
mm/vma.h
@ -42,8 +42,7 @@ struct vma_munmap_struct {
|
||||
int vma_count; /* Number of vmas that will be removed */
|
||||
bool unlock; /* Unlock after the munmap */
|
||||
bool clear_ptes; /* If there are outstanding PTE to be cleared */
|
||||
bool closed_vm_ops; /* call_mmap() was encountered, so vmas may be closed */
|
||||
/* 1 byte hole */
|
||||
/* 2 byte hole */
|
||||
unsigned long nr_pages; /* Number of pages being removed */
|
||||
unsigned long locked_vm; /* Number of locked pages */
|
||||
unsigned long nr_accounted; /* Number of VM_ACCOUNT pages */
|
||||
@ -198,7 +197,6 @@ static inline void init_vma_munmap(struct vma_munmap_struct *vms,
|
||||
vms->unmap_start = FIRST_USER_ADDRESS;
|
||||
vms->unmap_end = USER_PGTABLES_CEILING;
|
||||
vms->clear_ptes = false;
|
||||
vms->closed_vm_ops = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -269,7 +267,7 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
|
||||
unsigned long start, size_t len, struct list_head *uf,
|
||||
bool unlock);
|
||||
|
||||
void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed);
|
||||
void remove_vma(struct vm_area_struct *vma, bool unreachable);
|
||||
|
||||
void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
|
||||
struct vm_area_struct *prev, struct vm_area_struct *next);
|
||||
|
@ -1476,7 +1476,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
|
||||
*/
|
||||
nr_reclaimed += nr_pages;
|
||||
|
||||
folio_undo_large_rmappable(folio);
|
||||
folio_unqueue_deferred_split(folio);
|
||||
if (folio_batch_add(&free_folios, folio) == 0) {
|
||||
mem_cgroup_uncharge_folios(&free_folios);
|
||||
try_to_unmap_flush();
|
||||
@ -1864,7 +1864,7 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec,
|
||||
if (unlikely(folio_put_testzero(folio))) {
|
||||
__folio_clear_lru_flags(folio);
|
||||
|
||||
folio_undo_large_rmappable(folio);
|
||||
folio_unqueue_deferred_split(folio);
|
||||
if (folio_batch_add(&free_folios, folio) == 0) {
|
||||
spin_unlock_irq(&lruvec->lru_lock);
|
||||
mem_cgroup_uncharge_folios(&free_folios);
|
||||
|
Loading…
Reference in New Issue
Block a user