Lorenzo Stoakes 985da552a9 fork: only invoke khugepaged, ksm hooks if no error
There is no reason to invoke these hooks early against an mm that is in an
incomplete state.

The change in commit d24062914837 ("fork: use __mt_dup() to duplicate
maple tree in dup_mmap()") makes this more pertinent as we may be in a
state where entries in the maple tree are not yet consistent.

Their placement early in dup_mmap() only appears to have been meaningful
for early error checking, and since functionally it'd require a very small
allocation to fail (in practice 'too small to fail') that'd only occur in
the most dire circumstances, meaning the fork would fail or be OOM'd in
any case.

Since both khugepaged and KSM tracking are there to provide optimisations
to memory performance rather than critical functionality, it doesn't
really matter all that much if, under such dire memory pressure, we fail
to register an mm with these.

As a result, we follow the example of commit d2081b2bf819 ("mm:
khugepaged: make khugepaged_enter() void function") and make ksm_fork() a
void function also.

We only expose the mm to these functions once we are done with them and
only if no error occurred in the fork operation.

Link: https://lkml.kernel.org/r/e0cb8b840c9d1d5a6e84d4f8eff5f3f2022aa10c.1729014377.git.lorenzo.stoakes@oracle.com
Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()")
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reported-by: Jann Horn <jannh@google.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Jann Horn <jannh@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Linus Torvalds <torvalds@linuxfoundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-10-28 21:40:39 -07:00

156 lines
4.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_KSM_H
#define __LINUX_KSM_H
/*
* Memory merging support.
*
* This code enables dynamic sharing of identical pages found in different
* memory areas, even if they are not shared by fork().
*/
#include <linux/bitops.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/sched.h>
#include <linux/sched/coredump.h>
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags);
void ksm_add_vma(struct vm_area_struct *vma);
int ksm_enable_merge_any(struct mm_struct *mm);
int ksm_disable_merge_any(struct mm_struct *mm);
int ksm_disable(struct mm_struct *mm);
int __ksm_enter(struct mm_struct *mm);
void __ksm_exit(struct mm_struct *mm);
/*
* To identify zeropages that were mapped by KSM, we reuse the dirty bit
* in the PTE. If the PTE is dirty, the zeropage was mapped by KSM when
* deduplicating memory.
*/
#define is_ksm_zero_pte(pte) (is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte))
extern atomic_long_t ksm_zero_pages;
static inline void ksm_map_zero_page(struct mm_struct *mm)
{
atomic_long_inc(&ksm_zero_pages);
atomic_long_inc(&mm->ksm_zero_pages);
}
static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
{
if (is_ksm_zero_pte(pte)) {
atomic_long_dec(&ksm_zero_pages);
atomic_long_dec(&mm->ksm_zero_pages);
}
}
static inline long mm_ksm_zero_pages(struct mm_struct *mm)
{
return atomic_long_read(&mm->ksm_zero_pages);
}
static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
/* Adding mm to ksm is best effort on fork. */
if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
__ksm_enter(mm);
}
static inline int ksm_execve(struct mm_struct *mm)
{
if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
return __ksm_enter(mm);
return 0;
}
static inline void ksm_exit(struct mm_struct *mm)
{
if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
__ksm_exit(mm);
}
/*
* When do_swap_page() first faults in from swap what used to be a KSM page,
* no problem, it will be assigned to this vma's anon_vma; but thereafter,
* it might be faulted into a different anon_vma (or perhaps to a different
* offset in the same anon_vma). do_swap_page() cannot do all the locking
* needed to reconstitute a cross-anon_vma KSM page: for now it has to make
* a copy, and leave remerging the pages to a later pass of ksmd.
*
* We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
* but what if the vma was unmerged while the page was swapped out?
*/
struct folio *ksm_might_need_to_copy(struct folio *folio,
struct vm_area_struct *vma, unsigned long addr);
void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
void collect_procs_ksm(struct folio *folio, struct page *page,
struct list_head *to_kill, int force_early);
long ksm_process_profit(struct mm_struct *);
#else /* !CONFIG_KSM */
static inline void ksm_add_vma(struct vm_area_struct *vma)
{
}
static inline int ksm_disable(struct mm_struct *mm)
{
return 0;
}
static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
}
static inline int ksm_execve(struct mm_struct *mm)
{
return 0;
}
static inline void ksm_exit(struct mm_struct *mm)
{
}
static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
{
}
static inline void collect_procs_ksm(struct folio *folio, struct page *page,
struct list_head *to_kill, int force_early)
{
}
#ifdef CONFIG_MMU
static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags)
{
return 0;
}
static inline struct folio *ksm_might_need_to_copy(struct folio *folio,
struct vm_area_struct *vma, unsigned long addr)
{
return folio;
}
static inline void rmap_walk_ksm(struct folio *folio,
struct rmap_walk_control *rwc)
{
}
static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old)
{
}
#endif /* CONFIG_MMU */
#endif /* !CONFIG_KSM */
#endif /* __LINUX_KSM_H */