mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-29 17:22:07 +00:00
fc9c45b71f
In do_adjust_pte(), we may modify the pte entry. The corresponding pmd entry may have been modified concurrently. Therefore, in order to ensure the stability if pmd entry, use pte_offset_map_rw_nolock() to replace pte_offset_map_nolock(), and do pmd_same() check after holding the PTL. All callers of update_mmu_cache_range() hold the vmf->ptl, so we can determined whether split PTE locks is being used by doing the following, just as we do elsewhere in the kernel. ptl != vmf->ptl And then we can delete the do_pte_lock() and do_pte_unlock(). Link: https://lkml.kernel.org/r/0eaf6b69aeb2fe35092a633fed12537efe645303.1727332572.git.zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> Acked-by: David Hildenbrand <david@redhat.com> Reviewed-by: Muchun Song <muchun.song@linux.dev> Cc: Hugh Dickins <hughd@google.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mike Rapoport (Microsoft) <rppt@kernel.org> Cc: Peter Xu <peterx@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
264 lines
6.5 KiB
C
264 lines
6.5 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* linux/arch/arm/mm/fault-armv.c
|
|
*
|
|
* Copyright (C) 1995 Linus Torvalds
|
|
* Modifications for ARM processor (c) 1995-2002 Russell King
|
|
*/
|
|
#include <linux/sched.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/init.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/gfp.h>
|
|
|
|
#include <asm/bugs.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/cachetype.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include "mm.h"
|
|
|
|
static pteval_t shared_pte_mask = L_PTE_MT_BUFFERABLE;
|
|
|
|
#if __LINUX_ARM_ARCH__ < 6
|
|
/*
|
|
* We take the easy way out of this problem - we make the
|
|
* PTE uncacheable. However, we leave the write buffer on.
|
|
*
|
|
* Note that the pte lock held when calling update_mmu_cache must also
|
|
* guard the pte (somewhere else in the same mm) that we modify here.
|
|
* Therefore those configurations which might call adjust_pte (those
|
|
* without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock.
|
|
*/
|
|
static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address,
|
|
unsigned long pfn, pte_t *ptep)
|
|
{
|
|
pte_t entry = *ptep;
|
|
int ret;
|
|
|
|
/*
|
|
* If this page is present, it's actually being shared.
|
|
*/
|
|
ret = pte_present(entry);
|
|
|
|
/*
|
|
* If this page isn't present, or is already setup to
|
|
* fault (ie, is old), we can safely ignore any issues.
|
|
*/
|
|
if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) {
|
|
flush_cache_page(vma, address, pfn);
|
|
outer_flush_range((pfn << PAGE_SHIFT),
|
|
(pfn << PAGE_SHIFT) + PAGE_SIZE);
|
|
pte_val(entry) &= ~L_PTE_MT_MASK;
|
|
pte_val(entry) |= shared_pte_mask;
|
|
set_pte_at(vma->vm_mm, address, ptep, entry);
|
|
flush_tlb_page(vma, address);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
|
|
unsigned long pfn, struct vm_fault *vmf)
|
|
{
|
|
spinlock_t *ptl;
|
|
pgd_t *pgd;
|
|
p4d_t *p4d;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
pmd_t pmdval;
|
|
int ret;
|
|
|
|
pgd = pgd_offset(vma->vm_mm, address);
|
|
if (pgd_none_or_clear_bad(pgd))
|
|
return 0;
|
|
|
|
p4d = p4d_offset(pgd, address);
|
|
if (p4d_none_or_clear_bad(p4d))
|
|
return 0;
|
|
|
|
pud = pud_offset(p4d, address);
|
|
if (pud_none_or_clear_bad(pud))
|
|
return 0;
|
|
|
|
pmd = pmd_offset(pud, address);
|
|
if (pmd_none_or_clear_bad(pmd))
|
|
return 0;
|
|
|
|
again:
|
|
/*
|
|
* This is called while another page table is mapped, so we
|
|
* must use the nested version. This also means we need to
|
|
* open-code the spin-locking.
|
|
*/
|
|
pte = pte_offset_map_rw_nolock(vma->vm_mm, pmd, address, &pmdval, &ptl);
|
|
if (!pte)
|
|
return 0;
|
|
|
|
/*
|
|
* If we are using split PTE locks, then we need to take the page
|
|
* lock here. Otherwise we are using shared mm->page_table_lock
|
|
* which is already locked, thus cannot take it.
|
|
*/
|
|
if (ptl != vmf->ptl) {
|
|
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
|
|
if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
|
|
pte_unmap_unlock(pte, ptl);
|
|
goto again;
|
|
}
|
|
}
|
|
|
|
ret = do_adjust_pte(vma, address, pfn, pte);
|
|
|
|
if (ptl != vmf->ptl)
|
|
spin_unlock(ptl);
|
|
pte_unmap(pte);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void
|
|
make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
|
|
unsigned long addr, pte_t *ptep, unsigned long pfn,
|
|
struct vm_fault *vmf)
|
|
{
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
struct vm_area_struct *mpnt;
|
|
unsigned long offset;
|
|
pgoff_t pgoff;
|
|
int aliases = 0;
|
|
|
|
pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
|
|
|
|
/*
|
|
* If we have any shared mappings that are in the same mm
|
|
* space, then we need to handle them specially to maintain
|
|
* cache coherency.
|
|
*/
|
|
flush_dcache_mmap_lock(mapping);
|
|
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
|
|
/*
|
|
* If this VMA is not in our MM, we can ignore it.
|
|
* Note that we intentionally mask out the VMA
|
|
* that we are fixing up.
|
|
*/
|
|
if (mpnt->vm_mm != mm || mpnt == vma)
|
|
continue;
|
|
if (!(mpnt->vm_flags & VM_MAYSHARE))
|
|
continue;
|
|
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
|
|
aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf);
|
|
}
|
|
flush_dcache_mmap_unlock(mapping);
|
|
if (aliases)
|
|
do_adjust_pte(vma, addr, pfn, ptep);
|
|
}
|
|
|
|
/*
|
|
* Take care of architecture specific things when placing a new PTE into
|
|
* a page table, or changing an existing PTE. Basically, there are two
|
|
* things that we need to take care of:
|
|
*
|
|
* 1. If PG_dcache_clean is not set for the page, we need to ensure
|
|
* that any cache entries for the kernels virtual memory
|
|
* range are written back to the page.
|
|
* 2. If we have multiple shared mappings of the same space in
|
|
* an object, we need to deal with the cache aliasing issues.
|
|
*
|
|
* Note that the pte lock will be held.
|
|
*/
|
|
void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
|
|
unsigned long addr, pte_t *ptep, unsigned int nr)
|
|
{
|
|
unsigned long pfn = pte_pfn(*ptep);
|
|
struct address_space *mapping;
|
|
struct folio *folio;
|
|
|
|
if (!pfn_valid(pfn))
|
|
return;
|
|
|
|
/*
|
|
* The zero page is never written to, so never has any dirty
|
|
* cache lines, and therefore never needs to be flushed.
|
|
*/
|
|
if (is_zero_pfn(pfn))
|
|
return;
|
|
|
|
folio = page_folio(pfn_to_page(pfn));
|
|
mapping = folio_flush_mapping(folio);
|
|
if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
|
|
__flush_dcache_folio(mapping, folio);
|
|
if (mapping) {
|
|
if (cache_is_vivt())
|
|
make_coherent(mapping, vma, addr, ptep, pfn, vmf);
|
|
else if (vma->vm_flags & VM_EXEC)
|
|
__flush_icache_all();
|
|
}
|
|
}
|
|
#endif /* __LINUX_ARM_ARCH__ < 6 */
|
|
|
|
/*
|
|
* Check whether the write buffer has physical address aliasing
|
|
* issues. If it has, we need to avoid them for the case where
|
|
* we have several shared mappings of the same object in user
|
|
* space.
|
|
*/
|
|
static int __init check_writebuffer(unsigned long *p1, unsigned long *p2)
|
|
{
|
|
register unsigned long zero = 0, one = 1, val;
|
|
|
|
local_irq_disable();
|
|
mb();
|
|
*p1 = one;
|
|
mb();
|
|
*p2 = zero;
|
|
mb();
|
|
val = *p1;
|
|
mb();
|
|
local_irq_enable();
|
|
return val != zero;
|
|
}
|
|
|
|
void __init check_writebuffer_bugs(void)
|
|
{
|
|
struct page *page;
|
|
const char *reason;
|
|
unsigned long v = 1;
|
|
|
|
pr_info("CPU: Testing write buffer coherency: ");
|
|
|
|
page = alloc_page(GFP_KERNEL);
|
|
if (page) {
|
|
unsigned long *p1, *p2;
|
|
pgprot_t prot = __pgprot_modify(PAGE_KERNEL,
|
|
L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE);
|
|
|
|
p1 = vmap(&page, 1, VM_IOREMAP, prot);
|
|
p2 = vmap(&page, 1, VM_IOREMAP, prot);
|
|
|
|
if (p1 && p2) {
|
|
v = check_writebuffer(p1, p2);
|
|
reason = "enabling work-around";
|
|
} else {
|
|
reason = "unable to map memory\n";
|
|
}
|
|
|
|
vunmap(p1);
|
|
vunmap(p2);
|
|
put_page(page);
|
|
} else {
|
|
reason = "unable to grab page\n";
|
|
}
|
|
|
|
if (v) {
|
|
pr_cont("failed, %s\n", reason);
|
|
shared_pte_mask = L_PTE_MT_UNCACHED;
|
|
} else {
|
|
pr_cont("ok\n");
|
|
}
|
|
}
|