arm: adjust_pte() use pte_offset_map_rw_nolock()

In do_adjust_pte(), we may modify the pte entry.  The corresponding pmd
entry may have been modified concurrently.  Therefore, in order to ensure
the stability if pmd entry, use pte_offset_map_rw_nolock() to replace
pte_offset_map_nolock(), and do pmd_same() check after holding the PTL.

All callers of update_mmu_cache_range() hold the vmf->ptl, so we can
determined whether split PTE locks is being used by doing the following,
just as we do elsewhere in the kernel.

	ptl != vmf->ptl

And then we can delete the do_pte_lock() and do_pte_unlock().

Link: https://lkml.kernel.org/r/0eaf6b69aeb2fe35092a633fed12537efe645303.1727332572.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Qi Zheng 2024-09-26 14:46:18 +08:00 committed by Andrew Morton
parent c85507857b
commit fc9c45b71f

View File

@ -61,32 +61,8 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address,
return ret;
}
#if defined(CONFIG_SPLIT_PTE_PTLOCKS)
/*
* If we are using split PTE locks, then we need to take the page
* lock here. Otherwise we are using shared mm->page_table_lock
* which is already locked, thus cannot take it.
*/
static inline void do_pte_lock(spinlock_t *ptl)
{
/*
* Use nested version here to indicate that we are already
* holding one similar spinlock.
*/
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
}
static inline void do_pte_unlock(spinlock_t *ptl)
{
spin_unlock(ptl);
}
#else /* !defined(CONFIG_SPLIT_PTE_PTLOCKS) */
static inline void do_pte_lock(spinlock_t *ptl) {}
static inline void do_pte_unlock(spinlock_t *ptl) {}
#endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */
static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
unsigned long pfn)
unsigned long pfn, struct vm_fault *vmf)
{
spinlock_t *ptl;
pgd_t *pgd;
@ -94,6 +70,7 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pmd_t pmdval;
int ret;
pgd = pgd_offset(vma->vm_mm, address);
@ -112,20 +89,33 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
if (pmd_none_or_clear_bad(pmd))
return 0;
again:
/*
* This is called while another page table is mapped, so we
* must use the nested version. This also means we need to
* open-code the spin-locking.
*/
pte = pte_offset_map_nolock(vma->vm_mm, pmd, address, &ptl);
pte = pte_offset_map_rw_nolock(vma->vm_mm, pmd, address, &pmdval, &ptl);
if (!pte)
return 0;
do_pte_lock(ptl);
/*
* If we are using split PTE locks, then we need to take the page
* lock here. Otherwise we are using shared mm->page_table_lock
* which is already locked, thus cannot take it.
*/
if (ptl != vmf->ptl) {
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
pte_unmap_unlock(pte, ptl);
goto again;
}
}
ret = do_adjust_pte(vma, address, pfn, pte);
do_pte_unlock(ptl);
if (ptl != vmf->ptl)
spin_unlock(ptl);
pte_unmap(pte);
return ret;
@ -133,7 +123,8 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
static void
make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep, unsigned long pfn)
unsigned long addr, pte_t *ptep, unsigned long pfn,
struct vm_fault *vmf)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *mpnt;
@ -160,7 +151,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
if (!(mpnt->vm_flags & VM_MAYSHARE))
continue;
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn);
aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf);
}
flush_dcache_mmap_unlock(mapping);
if (aliases)
@ -203,7 +194,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
__flush_dcache_folio(mapping, folio);
if (mapping) {
if (cache_is_vivt())
make_coherent(mapping, vma, addr, ptep, pfn);
make_coherent(mapping, vma, addr, ptep, pfn, vmf);
else if (vma->vm_flags & VM_EXEC)
__flush_icache_all();
}