mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-17 10:26:09 +00:00
mm: Fix pmd_read_atomic()
AFAICT there's no reason to do anything different than what we do for PTEs. Make it so (also affects SH). Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20221022114424.711181252%40infradead.org
This commit is contained in:
parent
0862ff059c
commit
024d232ae4
@ -34,62 +34,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
|
||||
ptep->pte_low = pte.pte_low;
|
||||
}
|
||||
|
||||
#define pmd_read_atomic pmd_read_atomic
|
||||
/*
|
||||
* pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
|
||||
* a "*pmdp" dereference done by GCC. Problem is, in certain places
|
||||
* where pte_offset_map_lock() is called, concurrent page faults are
|
||||
* allowed, if the mmap_lock is hold for reading. An example is mincore
|
||||
* vs page faults vs MADV_DONTNEED. On the page fault side
|
||||
* pmd_populate() rightfully does a set_64bit(), but if we're reading the
|
||||
* pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
|
||||
* because GCC will not read the 64-bit value of the pmd atomically.
|
||||
*
|
||||
* To fix this all places running pte_offset_map_lock() while holding the
|
||||
* mmap_lock in read mode, shall read the pmdp pointer using this
|
||||
* function to know if the pmd is null or not, and in turn to know if
|
||||
* they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
|
||||
* operations.
|
||||
*
|
||||
* Without THP if the mmap_lock is held for reading, the pmd can only
|
||||
* transition from null to not null while pmd_read_atomic() runs. So
|
||||
* we can always return atomic pmd values with this function.
|
||||
*
|
||||
* With THP if the mmap_lock is held for reading, the pmd can become
|
||||
* trans_huge or none or point to a pte (and in turn become "stable")
|
||||
* at any time under pmd_read_atomic(). We could read it truly
|
||||
* atomically here with an atomic64_read() for the THP enabled case (and
|
||||
* it would be a whole lot simpler), but to avoid using cmpxchg8b we
|
||||
* only return an atomic pmdval if the low part of the pmdval is later
|
||||
* found to be stable (i.e. pointing to a pte). We are also returning a
|
||||
* 'none' (zero) pmdval if the low part of the pmd is zero.
|
||||
*
|
||||
* In some cases the high and low part of the pmdval returned may not be
|
||||
* consistent if THP is enabled (the low part may point to previously
|
||||
* mapped hugepage, while the high part may point to a more recently
|
||||
* mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
|
||||
* needs the low part of the pmd to be read atomically to decide if the
|
||||
* pmd is unstable or not, with the only exception when the low part
|
||||
* of the pmd is zero, in which case we return a 'none' pmd.
|
||||
*/
|
||||
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
|
||||
{
|
||||
pmdval_t ret;
|
||||
u32 *tmp = (u32 *)pmdp;
|
||||
|
||||
ret = (pmdval_t) (*tmp);
|
||||
if (ret) {
|
||||
/*
|
||||
* If the low part is null, we must not read the high part
|
||||
* or we can end up with a partial pmd.
|
||||
*/
|
||||
smp_rmb();
|
||||
ret |= ((pmdval_t)*(tmp + 1)) << 32;
|
||||
}
|
||||
|
||||
return (pmd_t) { .pmd = ret };
|
||||
}
|
||||
|
||||
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
|
||||
|
@ -298,6 +298,13 @@ static inline pte_t ptep_get(pte_t *ptep)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PMDP_GET
|
||||
static inline pmd_t pmdp_get(pmd_t *pmdp)
|
||||
{
|
||||
return READ_ONCE(*pmdp);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
|
||||
/*
|
||||
* For walking the pagetables without holding any locks. Some architectures
|
||||
@ -340,15 +347,42 @@ static inline pte_t ptep_get_lockless(pte_t *ptep)
|
||||
|
||||
return pte;
|
||||
}
|
||||
#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
|
||||
#define ptep_get_lockless ptep_get_lockless
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 2
|
||||
static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
|
||||
{
|
||||
pmd_t pmd;
|
||||
|
||||
do {
|
||||
pmd.pmd_low = pmdp->pmd_low;
|
||||
smp_rmb();
|
||||
pmd.pmd_high = pmdp->pmd_high;
|
||||
smp_rmb();
|
||||
} while (unlikely(pmd.pmd_low != pmdp->pmd_low));
|
||||
|
||||
return pmd;
|
||||
}
|
||||
#define pmdp_get_lockless pmdp_get_lockless
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
|
||||
#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
|
||||
|
||||
/*
|
||||
* We require that the PTE can be read atomically.
|
||||
*/
|
||||
#ifndef ptep_get_lockless
|
||||
static inline pte_t ptep_get_lockless(pte_t *ptep)
|
||||
{
|
||||
return ptep_get(ptep);
|
||||
}
|
||||
#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
|
||||
#endif
|
||||
|
||||
#ifndef pmdp_get_lockless
|
||||
static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
|
||||
{
|
||||
return pmdp_get(pmdp);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
|
||||
@ -1318,17 +1352,10 @@ static inline int pud_trans_unstable(pud_t *pud)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef pmd_read_atomic
|
||||
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
|
||||
{
|
||||
/*
|
||||
* Depend on compiler for an atomic pmd read. NOTE: this is
|
||||
* only going to work, if the pmdval_t isn't larger than
|
||||
* an unsigned long.
|
||||
*/
|
||||
return *pmdp;
|
||||
return pmdp_get_lockless(pmdp);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef arch_needs_pgtable_deposit
|
||||
#define arch_needs_pgtable_deposit() (false)
|
||||
|
Loading…
x
Reference in New Issue
Block a user