mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
mm: userfaultfd: recheck dst_pmd entry in move_pages_pte()
In move_pages_pte(), since dst_pte needs to be none, the subsequent pte_same() check cannot prevent the dst_pte page from being freed concurrently, so we also need to abtain dst_pmdval and recheck pmd_same(). Otherwise, once we support empty PTE page reclaimation for anonymous pages, it may result in moving the src_pte page into the dts_pte page that is about to be freed by RCU. Link: https://lkml.kernel.org/r/8108c262757fc492626f3a2ffc44b775f2710e16.1733305182.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jann Horn <jannh@google.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Muchun Song <muchun.song@linux.dev> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: Zach O'Keefe <zokeefe@google.com> Cc: Dan Carpenter <dan.carpenter@linaro.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
1f07a84d2e
commit
bb7abbaf18
@ -1020,6 +1020,14 @@ void double_pt_unlock(spinlock_t *ptl1,
|
|||||||
__release(ptl2);
|
__release(ptl2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool is_pte_pages_stable(pte_t *dst_pte, pte_t *src_pte,
|
||||||
|
pte_t orig_dst_pte, pte_t orig_src_pte,
|
||||||
|
pmd_t *dst_pmd, pmd_t dst_pmdval)
|
||||||
|
{
|
||||||
|
return pte_same(ptep_get(src_pte), orig_src_pte) &&
|
||||||
|
pte_same(ptep_get(dst_pte), orig_dst_pte) &&
|
||||||
|
pmd_same(dst_pmdval, pmdp_get_lockless(dst_pmd));
|
||||||
|
}
|
||||||
|
|
||||||
static int move_present_pte(struct mm_struct *mm,
|
static int move_present_pte(struct mm_struct *mm,
|
||||||
struct vm_area_struct *dst_vma,
|
struct vm_area_struct *dst_vma,
|
||||||
@ -1027,6 +1035,7 @@ static int move_present_pte(struct mm_struct *mm,
|
|||||||
unsigned long dst_addr, unsigned long src_addr,
|
unsigned long dst_addr, unsigned long src_addr,
|
||||||
pte_t *dst_pte, pte_t *src_pte,
|
pte_t *dst_pte, pte_t *src_pte,
|
||||||
pte_t orig_dst_pte, pte_t orig_src_pte,
|
pte_t orig_dst_pte, pte_t orig_src_pte,
|
||||||
|
pmd_t *dst_pmd, pmd_t dst_pmdval,
|
||||||
spinlock_t *dst_ptl, spinlock_t *src_ptl,
|
spinlock_t *dst_ptl, spinlock_t *src_ptl,
|
||||||
struct folio *src_folio)
|
struct folio *src_folio)
|
||||||
{
|
{
|
||||||
@ -1034,8 +1043,8 @@ static int move_present_pte(struct mm_struct *mm,
|
|||||||
|
|
||||||
double_pt_lock(dst_ptl, src_ptl);
|
double_pt_lock(dst_ptl, src_ptl);
|
||||||
|
|
||||||
if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
|
if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
|
||||||
!pte_same(ptep_get(dst_pte), orig_dst_pte)) {
|
dst_pmd, dst_pmdval)) {
|
||||||
err = -EAGAIN;
|
err = -EAGAIN;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -1071,6 +1080,7 @@ static int move_swap_pte(struct mm_struct *mm,
|
|||||||
unsigned long dst_addr, unsigned long src_addr,
|
unsigned long dst_addr, unsigned long src_addr,
|
||||||
pte_t *dst_pte, pte_t *src_pte,
|
pte_t *dst_pte, pte_t *src_pte,
|
||||||
pte_t orig_dst_pte, pte_t orig_src_pte,
|
pte_t orig_dst_pte, pte_t orig_src_pte,
|
||||||
|
pmd_t *dst_pmd, pmd_t dst_pmdval,
|
||||||
spinlock_t *dst_ptl, spinlock_t *src_ptl)
|
spinlock_t *dst_ptl, spinlock_t *src_ptl)
|
||||||
{
|
{
|
||||||
if (!pte_swp_exclusive(orig_src_pte))
|
if (!pte_swp_exclusive(orig_src_pte))
|
||||||
@ -1078,8 +1088,8 @@ static int move_swap_pte(struct mm_struct *mm,
|
|||||||
|
|
||||||
double_pt_lock(dst_ptl, src_ptl);
|
double_pt_lock(dst_ptl, src_ptl);
|
||||||
|
|
||||||
if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
|
if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
|
||||||
!pte_same(ptep_get(dst_pte), orig_dst_pte)) {
|
dst_pmd, dst_pmdval)) {
|
||||||
double_pt_unlock(dst_ptl, src_ptl);
|
double_pt_unlock(dst_ptl, src_ptl);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
@ -1097,13 +1107,14 @@ static int move_zeropage_pte(struct mm_struct *mm,
|
|||||||
unsigned long dst_addr, unsigned long src_addr,
|
unsigned long dst_addr, unsigned long src_addr,
|
||||||
pte_t *dst_pte, pte_t *src_pte,
|
pte_t *dst_pte, pte_t *src_pte,
|
||||||
pte_t orig_dst_pte, pte_t orig_src_pte,
|
pte_t orig_dst_pte, pte_t orig_src_pte,
|
||||||
|
pmd_t *dst_pmd, pmd_t dst_pmdval,
|
||||||
spinlock_t *dst_ptl, spinlock_t *src_ptl)
|
spinlock_t *dst_ptl, spinlock_t *src_ptl)
|
||||||
{
|
{
|
||||||
pte_t zero_pte;
|
pte_t zero_pte;
|
||||||
|
|
||||||
double_pt_lock(dst_ptl, src_ptl);
|
double_pt_lock(dst_ptl, src_ptl);
|
||||||
if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
|
if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
|
||||||
!pte_same(ptep_get(dst_pte), orig_dst_pte)) {
|
dst_pmd, dst_pmdval)) {
|
||||||
double_pt_unlock(dst_ptl, src_ptl);
|
double_pt_unlock(dst_ptl, src_ptl);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
@ -1136,6 +1147,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
|||||||
pte_t *src_pte = NULL;
|
pte_t *src_pte = NULL;
|
||||||
pte_t *dst_pte = NULL;
|
pte_t *dst_pte = NULL;
|
||||||
pmd_t dummy_pmdval;
|
pmd_t dummy_pmdval;
|
||||||
|
pmd_t dst_pmdval;
|
||||||
struct folio *src_folio = NULL;
|
struct folio *src_folio = NULL;
|
||||||
struct anon_vma *src_anon_vma = NULL;
|
struct anon_vma *src_anon_vma = NULL;
|
||||||
struct mmu_notifier_range range;
|
struct mmu_notifier_range range;
|
||||||
@ -1148,11 +1160,11 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
|||||||
retry:
|
retry:
|
||||||
/*
|
/*
|
||||||
* Use the maywrite version to indicate that dst_pte will be modified,
|
* Use the maywrite version to indicate that dst_pte will be modified,
|
||||||
* but since we will use pte_same() to detect the change of the pte
|
* since dst_pte needs to be none, the subsequent pte_same() check
|
||||||
* entry, there is no need to get pmdval, so just pass a dummy variable
|
* cannot prevent the dst_pte page from being freed concurrently, so we
|
||||||
* to it.
|
* also need to abtain dst_pmdval and recheck pmd_same() later.
|
||||||
*/
|
*/
|
||||||
dst_pte = pte_offset_map_rw_nolock(mm, dst_pmd, dst_addr, &dummy_pmdval,
|
dst_pte = pte_offset_map_rw_nolock(mm, dst_pmd, dst_addr, &dst_pmdval,
|
||||||
&dst_ptl);
|
&dst_ptl);
|
||||||
|
|
||||||
/* Retry if a huge pmd materialized from under us */
|
/* Retry if a huge pmd materialized from under us */
|
||||||
@ -1161,7 +1173,11 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* same as dst_pte */
|
/*
|
||||||
|
* Unlike dst_pte, the subsequent pte_same() check can ensure the
|
||||||
|
* stability of the src_pte page, so there is no need to get pmdval,
|
||||||
|
* just pass a dummy variable to it.
|
||||||
|
*/
|
||||||
src_pte = pte_offset_map_rw_nolock(mm, src_pmd, src_addr, &dummy_pmdval,
|
src_pte = pte_offset_map_rw_nolock(mm, src_pmd, src_addr, &dummy_pmdval,
|
||||||
&src_ptl);
|
&src_ptl);
|
||||||
|
|
||||||
@ -1213,7 +1229,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
|||||||
err = move_zeropage_pte(mm, dst_vma, src_vma,
|
err = move_zeropage_pte(mm, dst_vma, src_vma,
|
||||||
dst_addr, src_addr, dst_pte, src_pte,
|
dst_addr, src_addr, dst_pte, src_pte,
|
||||||
orig_dst_pte, orig_src_pte,
|
orig_dst_pte, orig_src_pte,
|
||||||
dst_ptl, src_ptl);
|
dst_pmd, dst_pmdval, dst_ptl, src_ptl);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1303,8 +1319,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
|||||||
|
|
||||||
err = move_present_pte(mm, dst_vma, src_vma,
|
err = move_present_pte(mm, dst_vma, src_vma,
|
||||||
dst_addr, src_addr, dst_pte, src_pte,
|
dst_addr, src_addr, dst_pte, src_pte,
|
||||||
orig_dst_pte, orig_src_pte,
|
orig_dst_pte, orig_src_pte, dst_pmd,
|
||||||
dst_ptl, src_ptl, src_folio);
|
dst_pmdval, dst_ptl, src_ptl, src_folio);
|
||||||
} else {
|
} else {
|
||||||
entry = pte_to_swp_entry(orig_src_pte);
|
entry = pte_to_swp_entry(orig_src_pte);
|
||||||
if (non_swap_entry(entry)) {
|
if (non_swap_entry(entry)) {
|
||||||
@ -1319,10 +1335,9 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = move_swap_pte(mm, dst_addr, src_addr,
|
err = move_swap_pte(mm, dst_addr, src_addr, dst_pte, src_pte,
|
||||||
dst_pte, src_pte,
|
orig_dst_pte, orig_src_pte, dst_pmd,
|
||||||
orig_dst_pte, orig_src_pte,
|
dst_pmdval, dst_ptl, src_ptl);
|
||||||
dst_ptl, src_ptl);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
Loading…
Reference in New Issue
Block a user