mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-28 16:52:18 +00:00
mm: userfaultfd: recheck dst_pmd entry in move_pages_pte()
In move_pages_pte(), since dst_pte needs to be none, the subsequent pte_same() check cannot prevent the dst_pte page from being freed concurrently, so we also need to abtain dst_pmdval and recheck pmd_same(). Otherwise, once we support empty PTE page reclaimation for anonymous pages, it may result in moving the src_pte page into the dts_pte page that is about to be freed by RCU. Link: https://lkml.kernel.org/r/8108c262757fc492626f3a2ffc44b775f2710e16.1733305182.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jann Horn <jannh@google.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Muchun Song <muchun.song@linux.dev> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: Zach O'Keefe <zokeefe@google.com> Cc: Dan Carpenter <dan.carpenter@linaro.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
1f07a84d2e
commit
bb7abbaf18
@ -1020,6 +1020,14 @@ void double_pt_unlock(spinlock_t *ptl1,
|
||||
__release(ptl2);
|
||||
}
|
||||
|
||||
static inline bool is_pte_pages_stable(pte_t *dst_pte, pte_t *src_pte,
|
||||
pte_t orig_dst_pte, pte_t orig_src_pte,
|
||||
pmd_t *dst_pmd, pmd_t dst_pmdval)
|
||||
{
|
||||
return pte_same(ptep_get(src_pte), orig_src_pte) &&
|
||||
pte_same(ptep_get(dst_pte), orig_dst_pte) &&
|
||||
pmd_same(dst_pmdval, pmdp_get_lockless(dst_pmd));
|
||||
}
|
||||
|
||||
static int move_present_pte(struct mm_struct *mm,
|
||||
struct vm_area_struct *dst_vma,
|
||||
@ -1027,6 +1035,7 @@ static int move_present_pte(struct mm_struct *mm,
|
||||
unsigned long dst_addr, unsigned long src_addr,
|
||||
pte_t *dst_pte, pte_t *src_pte,
|
||||
pte_t orig_dst_pte, pte_t orig_src_pte,
|
||||
pmd_t *dst_pmd, pmd_t dst_pmdval,
|
||||
spinlock_t *dst_ptl, spinlock_t *src_ptl,
|
||||
struct folio *src_folio)
|
||||
{
|
||||
@ -1034,8 +1043,8 @@ static int move_present_pte(struct mm_struct *mm,
|
||||
|
||||
double_pt_lock(dst_ptl, src_ptl);
|
||||
|
||||
if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
|
||||
!pte_same(ptep_get(dst_pte), orig_dst_pte)) {
|
||||
if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
|
||||
dst_pmd, dst_pmdval)) {
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
@ -1071,6 +1080,7 @@ static int move_swap_pte(struct mm_struct *mm,
|
||||
unsigned long dst_addr, unsigned long src_addr,
|
||||
pte_t *dst_pte, pte_t *src_pte,
|
||||
pte_t orig_dst_pte, pte_t orig_src_pte,
|
||||
pmd_t *dst_pmd, pmd_t dst_pmdval,
|
||||
spinlock_t *dst_ptl, spinlock_t *src_ptl)
|
||||
{
|
||||
if (!pte_swp_exclusive(orig_src_pte))
|
||||
@ -1078,8 +1088,8 @@ static int move_swap_pte(struct mm_struct *mm,
|
||||
|
||||
double_pt_lock(dst_ptl, src_ptl);
|
||||
|
||||
if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
|
||||
!pte_same(ptep_get(dst_pte), orig_dst_pte)) {
|
||||
if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
|
||||
dst_pmd, dst_pmdval)) {
|
||||
double_pt_unlock(dst_ptl, src_ptl);
|
||||
return -EAGAIN;
|
||||
}
|
||||
@ -1097,13 +1107,14 @@ static int move_zeropage_pte(struct mm_struct *mm,
|
||||
unsigned long dst_addr, unsigned long src_addr,
|
||||
pte_t *dst_pte, pte_t *src_pte,
|
||||
pte_t orig_dst_pte, pte_t orig_src_pte,
|
||||
pmd_t *dst_pmd, pmd_t dst_pmdval,
|
||||
spinlock_t *dst_ptl, spinlock_t *src_ptl)
|
||||
{
|
||||
pte_t zero_pte;
|
||||
|
||||
double_pt_lock(dst_ptl, src_ptl);
|
||||
if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
|
||||
!pte_same(ptep_get(dst_pte), orig_dst_pte)) {
|
||||
if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
|
||||
dst_pmd, dst_pmdval)) {
|
||||
double_pt_unlock(dst_ptl, src_ptl);
|
||||
return -EAGAIN;
|
||||
}
|
||||
@ -1136,6 +1147,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
||||
pte_t *src_pte = NULL;
|
||||
pte_t *dst_pte = NULL;
|
||||
pmd_t dummy_pmdval;
|
||||
pmd_t dst_pmdval;
|
||||
struct folio *src_folio = NULL;
|
||||
struct anon_vma *src_anon_vma = NULL;
|
||||
struct mmu_notifier_range range;
|
||||
@ -1148,11 +1160,11 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
||||
retry:
|
||||
/*
|
||||
* Use the maywrite version to indicate that dst_pte will be modified,
|
||||
* but since we will use pte_same() to detect the change of the pte
|
||||
* entry, there is no need to get pmdval, so just pass a dummy variable
|
||||
* to it.
|
||||
* since dst_pte needs to be none, the subsequent pte_same() check
|
||||
* cannot prevent the dst_pte page from being freed concurrently, so we
|
||||
* also need to abtain dst_pmdval and recheck pmd_same() later.
|
||||
*/
|
||||
dst_pte = pte_offset_map_rw_nolock(mm, dst_pmd, dst_addr, &dummy_pmdval,
|
||||
dst_pte = pte_offset_map_rw_nolock(mm, dst_pmd, dst_addr, &dst_pmdval,
|
||||
&dst_ptl);
|
||||
|
||||
/* Retry if a huge pmd materialized from under us */
|
||||
@ -1161,7 +1173,11 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* same as dst_pte */
|
||||
/*
|
||||
* Unlike dst_pte, the subsequent pte_same() check can ensure the
|
||||
* stability of the src_pte page, so there is no need to get pmdval,
|
||||
* just pass a dummy variable to it.
|
||||
*/
|
||||
src_pte = pte_offset_map_rw_nolock(mm, src_pmd, src_addr, &dummy_pmdval,
|
||||
&src_ptl);
|
||||
|
||||
@ -1213,7 +1229,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
||||
err = move_zeropage_pte(mm, dst_vma, src_vma,
|
||||
dst_addr, src_addr, dst_pte, src_pte,
|
||||
orig_dst_pte, orig_src_pte,
|
||||
dst_ptl, src_ptl);
|
||||
dst_pmd, dst_pmdval, dst_ptl, src_ptl);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1303,8 +1319,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
||||
|
||||
err = move_present_pte(mm, dst_vma, src_vma,
|
||||
dst_addr, src_addr, dst_pte, src_pte,
|
||||
orig_dst_pte, orig_src_pte,
|
||||
dst_ptl, src_ptl, src_folio);
|
||||
orig_dst_pte, orig_src_pte, dst_pmd,
|
||||
dst_pmdval, dst_ptl, src_ptl, src_folio);
|
||||
} else {
|
||||
entry = pte_to_swp_entry(orig_src_pte);
|
||||
if (non_swap_entry(entry)) {
|
||||
@ -1319,10 +1335,9 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = move_swap_pte(mm, dst_addr, src_addr,
|
||||
dst_pte, src_pte,
|
||||
orig_dst_pte, orig_src_pte,
|
||||
dst_ptl, src_ptl);
|
||||
err = move_swap_pte(mm, dst_addr, src_addr, dst_pte, src_pte,
|
||||
orig_dst_pte, orig_src_pte, dst_pmd,
|
||||
dst_pmdval, dst_ptl, src_ptl);
|
||||
}
|
||||
|
||||
out:
|
||||
|
Loading…
Reference in New Issue
Block a user