mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-16 18:08:20 +00:00
fe90f3967b
Many architectures' switch_mm() (e.g. arm64) do not have an smp_mb() which the core scheduler code has depended upon since commit: commit 223baf9d17f25 ("sched: Fix performance regression introduced by mm_cid") If switch_mm() doesn't call smp_mb(), sched_mm_cid_remote_clear() can unset the actively used cid when it fails to observe active task after it sets lazy_put. There *is* a memory barrier between storing to rq->curr and _return to userspace_ (as required by membarrier), but the rseq mm_cid has stricter requirements: the barrier needs to be issued between store to rq->curr and switch_mm_cid(), which happens earlier than: - spin_unlock(), - switch_to(). So it's fine when the architecture switch_mm() happens to have that barrier already, but less so when the architecture only provides the full barrier in switch_to() or spin_unlock(). It is a bug in the rseq switch_mm_cid() implementation. All architectures that don't have memory barriers in switch_mm(), but rather have the full barrier either in finish_lock_switch() or switch_to() have them too late for the needs of switch_mm_cid(). Introduce a new smp_mb__after_switch_mm(), defined as smp_mb() in the generic barrier.h header, and use it in switch_mm_cid() for scheduler transitions where switch_mm() is expected to provide a memory barrier. Architectures can override smp_mb__after_switch_mm() if their switch_mm() implementation provides an implicit memory barrier. Override it with a no-op on x86 which implicitly provide this memory barrier by writing to CR3. Fixes: 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid") Reported-by: levi.yun <yeoreum.yun@arm.com> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> # for arm64 Acked-by: Dave Hansen <dave.hansen@linux.intel.com> # for x86 Cc: <stable@vger.kernel.org> # 6.4.x Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: https://lore.kernel.org/r/20240415152114.59122-2-mathieu.desnoyers@efficios.com
307 lines
7.4 KiB
C
307 lines
7.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* Generic barrier definitions.
|
|
*
|
|
* It should be possible to use these on really simple architectures,
|
|
* but it serves more as a starting point for new ports.
|
|
*
|
|
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*/
|
|
#ifndef __ASM_GENERIC_BARRIER_H
|
|
#define __ASM_GENERIC_BARRIER_H
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/compiler.h>
|
|
#include <linux/kcsan-checks.h>
|
|
#include <asm/rwonce.h>
|
|
|
|
#ifndef nop
|
|
#define nop() asm volatile ("nop")
|
|
#endif
|
|
|
|
/*
|
|
* Architectures that want generic instrumentation can define __ prefixed
|
|
* variants of all barriers.
|
|
*/
|
|
|
|
#ifdef __mb
|
|
#define mb() do { kcsan_mb(); __mb(); } while (0)
|
|
#endif
|
|
|
|
#ifdef __rmb
|
|
#define rmb() do { kcsan_rmb(); __rmb(); } while (0)
|
|
#endif
|
|
|
|
#ifdef __wmb
|
|
#define wmb() do { kcsan_wmb(); __wmb(); } while (0)
|
|
#endif
|
|
|
|
#ifdef __dma_mb
|
|
#define dma_mb() do { kcsan_mb(); __dma_mb(); } while (0)
|
|
#endif
|
|
|
|
#ifdef __dma_rmb
|
|
#define dma_rmb() do { kcsan_rmb(); __dma_rmb(); } while (0)
|
|
#endif
|
|
|
|
#ifdef __dma_wmb
|
|
#define dma_wmb() do { kcsan_wmb(); __dma_wmb(); } while (0)
|
|
#endif
|
|
|
|
/*
|
|
* Force strict CPU ordering. And yes, this is required on UP too when we're
|
|
* talking to devices.
|
|
*
|
|
* Fall back to compiler barriers if nothing better is provided.
|
|
*/
|
|
|
|
#ifndef mb
|
|
#define mb() barrier()
|
|
#endif
|
|
|
|
#ifndef rmb
|
|
#define rmb() mb()
|
|
#endif
|
|
|
|
#ifndef wmb
|
|
#define wmb() mb()
|
|
#endif
|
|
|
|
#ifndef dma_mb
|
|
#define dma_mb() mb()
|
|
#endif
|
|
|
|
#ifndef dma_rmb
|
|
#define dma_rmb() rmb()
|
|
#endif
|
|
|
|
#ifndef dma_wmb
|
|
#define dma_wmb() wmb()
|
|
#endif
|
|
|
|
#ifndef __smp_mb
|
|
#define __smp_mb() mb()
|
|
#endif
|
|
|
|
#ifndef __smp_rmb
|
|
#define __smp_rmb() rmb()
|
|
#endif
|
|
|
|
#ifndef __smp_wmb
|
|
#define __smp_wmb() wmb()
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
#ifndef smp_mb
|
|
#define smp_mb() do { kcsan_mb(); __smp_mb(); } while (0)
|
|
#endif
|
|
|
|
#ifndef smp_rmb
|
|
#define smp_rmb() do { kcsan_rmb(); __smp_rmb(); } while (0)
|
|
#endif
|
|
|
|
#ifndef smp_wmb
|
|
#define smp_wmb() do { kcsan_wmb(); __smp_wmb(); } while (0)
|
|
#endif
|
|
|
|
#else /* !CONFIG_SMP */
|
|
|
|
#ifndef smp_mb
|
|
#define smp_mb() barrier()
|
|
#endif
|
|
|
|
#ifndef smp_rmb
|
|
#define smp_rmb() barrier()
|
|
#endif
|
|
|
|
#ifndef smp_wmb
|
|
#define smp_wmb() barrier()
|
|
#endif
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
#ifndef __smp_store_mb
|
|
#define __smp_store_mb(var, value) do { WRITE_ONCE(var, value); __smp_mb(); } while (0)
|
|
#endif
|
|
|
|
#ifndef __smp_mb__before_atomic
|
|
#define __smp_mb__before_atomic() __smp_mb()
|
|
#endif
|
|
|
|
#ifndef __smp_mb__after_atomic
|
|
#define __smp_mb__after_atomic() __smp_mb()
|
|
#endif
|
|
|
|
#ifndef __smp_store_release
|
|
#define __smp_store_release(p, v) \
|
|
do { \
|
|
compiletime_assert_atomic_type(*p); \
|
|
__smp_mb(); \
|
|
WRITE_ONCE(*p, v); \
|
|
} while (0)
|
|
#endif
|
|
|
|
#ifndef __smp_load_acquire
|
|
#define __smp_load_acquire(p) \
|
|
({ \
|
|
__unqual_scalar_typeof(*p) ___p1 = READ_ONCE(*p); \
|
|
compiletime_assert_atomic_type(*p); \
|
|
__smp_mb(); \
|
|
(typeof(*p))___p1; \
|
|
})
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
#ifndef smp_store_mb
|
|
#define smp_store_mb(var, value) do { kcsan_mb(); __smp_store_mb(var, value); } while (0)
|
|
#endif
|
|
|
|
#ifndef smp_mb__before_atomic
|
|
#define smp_mb__before_atomic() do { kcsan_mb(); __smp_mb__before_atomic(); } while (0)
|
|
#endif
|
|
|
|
#ifndef smp_mb__after_atomic
|
|
#define smp_mb__after_atomic() do { kcsan_mb(); __smp_mb__after_atomic(); } while (0)
|
|
#endif
|
|
|
|
#ifndef smp_store_release
|
|
#define smp_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0)
|
|
#endif
|
|
|
|
#ifndef smp_load_acquire
|
|
#define smp_load_acquire(p) __smp_load_acquire(p)
|
|
#endif
|
|
|
|
#else /* !CONFIG_SMP */
|
|
|
|
#ifndef smp_store_mb
|
|
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
|
|
#endif
|
|
|
|
#ifndef smp_mb__before_atomic
|
|
#define smp_mb__before_atomic() barrier()
|
|
#endif
|
|
|
|
#ifndef smp_mb__after_atomic
|
|
#define smp_mb__after_atomic() barrier()
|
|
#endif
|
|
|
|
#ifndef smp_store_release
|
|
#define smp_store_release(p, v) \
|
|
do { \
|
|
barrier(); \
|
|
WRITE_ONCE(*p, v); \
|
|
} while (0)
|
|
#endif
|
|
|
|
#ifndef smp_load_acquire
|
|
#define smp_load_acquire(p) \
|
|
({ \
|
|
__unqual_scalar_typeof(*p) ___p1 = READ_ONCE(*p); \
|
|
barrier(); \
|
|
(typeof(*p))___p1; \
|
|
})
|
|
#endif
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
/* Barriers for virtual machine guests when talking to an SMP host */
|
|
#define virt_mb() do { kcsan_mb(); __smp_mb(); } while (0)
|
|
#define virt_rmb() do { kcsan_rmb(); __smp_rmb(); } while (0)
|
|
#define virt_wmb() do { kcsan_wmb(); __smp_wmb(); } while (0)
|
|
#define virt_store_mb(var, value) do { kcsan_mb(); __smp_store_mb(var, value); } while (0)
|
|
#define virt_mb__before_atomic() do { kcsan_mb(); __smp_mb__before_atomic(); } while (0)
|
|
#define virt_mb__after_atomic() do { kcsan_mb(); __smp_mb__after_atomic(); } while (0)
|
|
#define virt_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0)
|
|
#define virt_load_acquire(p) __smp_load_acquire(p)
|
|
|
|
/**
|
|
* smp_acquire__after_ctrl_dep() - Provide ACQUIRE ordering after a control dependency
|
|
*
|
|
* A control dependency provides a LOAD->STORE order, the additional RMB
|
|
* provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
|
|
* aka. (load)-ACQUIRE.
|
|
*
|
|
* Architectures that do not do load speculation can have this be barrier().
|
|
*/
|
|
#ifndef smp_acquire__after_ctrl_dep
|
|
#define smp_acquire__after_ctrl_dep() smp_rmb()
|
|
#endif
|
|
|
|
/**
|
|
* smp_cond_load_relaxed() - (Spin) wait for cond with no ordering guarantees
|
|
* @ptr: pointer to the variable to wait on
|
|
* @cond: boolean expression to wait for
|
|
*
|
|
* Equivalent to using READ_ONCE() on the condition variable.
|
|
*
|
|
* Due to C lacking lambda expressions we load the value of *ptr into a
|
|
* pre-named variable @VAL to be used in @cond.
|
|
*/
|
|
#ifndef smp_cond_load_relaxed
|
|
#define smp_cond_load_relaxed(ptr, cond_expr) ({ \
|
|
typeof(ptr) __PTR = (ptr); \
|
|
__unqual_scalar_typeof(*ptr) VAL; \
|
|
for (;;) { \
|
|
VAL = READ_ONCE(*__PTR); \
|
|
if (cond_expr) \
|
|
break; \
|
|
cpu_relax(); \
|
|
} \
|
|
(typeof(*ptr))VAL; \
|
|
})
|
|
#endif
|
|
|
|
/**
|
|
* smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
|
|
* @ptr: pointer to the variable to wait on
|
|
* @cond: boolean expression to wait for
|
|
*
|
|
* Equivalent to using smp_load_acquire() on the condition variable but employs
|
|
* the control dependency of the wait to reduce the barrier on many platforms.
|
|
*/
|
|
#ifndef smp_cond_load_acquire
|
|
#define smp_cond_load_acquire(ptr, cond_expr) ({ \
|
|
__unqual_scalar_typeof(*ptr) _val; \
|
|
_val = smp_cond_load_relaxed(ptr, cond_expr); \
|
|
smp_acquire__after_ctrl_dep(); \
|
|
(typeof(*ptr))_val; \
|
|
})
|
|
#endif
|
|
|
|
/*
|
|
* pmem_wmb() ensures that all stores for which the modification
|
|
* are written to persistent storage by preceding instructions have
|
|
* updated persistent storage before any data access or data transfer
|
|
* caused by subsequent instructions is initiated.
|
|
*/
|
|
#ifndef pmem_wmb
|
|
#define pmem_wmb() wmb()
|
|
#endif
|
|
|
|
/*
|
|
* ioremap_wc() maps I/O memory as memory with write-combining attributes. For
|
|
* this kind of memory accesses, the CPU may wait for prior accesses to be
|
|
* merged with subsequent ones. In some situation, such wait is bad for the
|
|
* performance. io_stop_wc() can be used to prevent the merging of
|
|
* write-combining memory accesses before this macro with those after it.
|
|
*/
|
|
#ifndef io_stop_wc
|
|
#define io_stop_wc() do { } while (0)
|
|
#endif
|
|
|
|
/*
|
|
* Architectures that guarantee an implicit smp_mb() in switch_mm()
|
|
* can override smp_mb__after_switch_mm.
|
|
*/
|
|
#ifndef smp_mb__after_switch_mm
|
|
# define smp_mb__after_switch_mm() smp_mb()
|
|
#endif
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
#endif /* __ASM_GENERIC_BARRIER_H */
|