linux-next/kernel/locking/spinlock_rt.c
Linus Torvalds 3f020399e4 Scheduler changes for v6.13:
- Core facilities:
 
     - Add the "Lazy preemption" model (CONFIG_PREEMPT_LAZY=y), which optimizes
       fair-class preemption by delaying preemption requests to the
       tick boundary, while working as full preemption for RR/FIFO/DEADLINE
       classes. (Peter Zijlstra)
 
         - x86: Enable Lazy preemption (Peter Zijlstra)
         - riscv: Enable Lazy preemption (Jisheng Zhang)
 
     - Initialize idle tasks only once (Thomas Gleixner)
 
     - sched/ext: Remove sched_fork() hack (Thomas Gleixner)
 
  - Fair scheduler:
     - Optimize the PLACE_LAG when se->vlag is zero (Huang Shijie)
 
  - Idle loop:
       Optimize the generic idle loop by removing unnecessary
       memory barrier (Zhongqiu Han)
 
  - RSEQ:
     - Improve cache locality of RSEQ concurrency IDs for
       intermittent workloads (Mathieu Desnoyers)
 
  - Waitqueues:
     - Make wake_up_{bit,var} less fragile (Neil Brown)
 
  - PSI:
     - Pass enqueue/dequeue flags to psi callbacks directly (Johannes Weiner)
 
  - Preparatory patches for proxy execution:
     - core: Add move_queued_task_locked helper (Connor O'Brien)
     - core: Consolidate pick_*_task to task_is_pushable helper (Connor O'Brien)
     - core: Split out __schedule() deactivate task logic into a helper (John Stultz)
     - core: Split scheduler and execution contexts (Peter Zijlstra)
     - locking/mutex: Make mutex::wait_lock irq safe (Juri Lelli)
     - locking/mutex: Expose __mutex_owner() (Juri Lelli)
     - locking/mutex: Remove wakeups from under mutex::wait_lock (Peter Zijlstra)
 
  - Misc fixes and cleanups:
     - core: Remove unused __HAVE_THREAD_FUNCTIONS hook support (David Disseldorp)
     - core: Update the comment for TIF_NEED_RESCHED_LAZY (Sebastian Andrzej Siewior)
     - wait: Remove unused bit_wait_io_timeout (Dr. David Alan Gilbert)
     - fair: remove the DOUBLE_TICK feature (Huang Shijie)
     - fair: fix the comment for PREEMPT_SHORT (Huang Shijie)
     - uclamp: Fix unnused variable warning (Christian Loehle)
     - rt: No PREEMPT_RT=y for all{yes,mod}config
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmc7fnQRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1hZTBAAozVdWA2m51aNa67HvAZta/olmrIagVbW
 inwbTgqa8b+UfeWEuKOfrZr5khjEh6pLgR3dBTib1uH6xxYj/Okds+qbPWSBPVLh
 yzavlm/zJZM1U1XtxE3eyVfqWik4GrY7DoIMDQQr+YH7rNXonJeJkll38OI2E5MC
 q3Q01qyMo8RJJX8qkf3f8ObOoP/51NsVniTw0Zb2fzEhXz8FjezLlxk6cMfgSkJG
 lg9gfIwUZ7Xg5neRo4kJcc3Ht31KYOhWSiupBJzRD1hss/N/AybvMcTX/Cm8d07w
 HIAdDDAn84o46miFo/a0V/hsJZ72idWbqxVJUCtaezrpOUiFkG+uInRvG/ynr0lF
 5dEI9f+6PUw8Nc7L72IyHkobjPqS2IefSaxYYCBKmxMX2qrenfTor/pKiWzzhBIl
 rX3MZSuUJ8NjV4rNGD/qXRM1IsMJrsDwxDyv+sRec3XdH33x286ds6aAUEPDQ6N7
 96VS0sOKcNUJN8776ErNjlIxRl8HTlpkaO3nZlQIfXgTlXUpRvOuKbEWqP+606lo
 oANgJTKgUhgJPWZnvmdRxDjSiOp93QcImjus9i1tN81FGiEDleONsJUxu2Di1E5+
 s1nCiytjq+cdvzCqFyiOZUh+g6kSZ4yXxNgLg2UvbXzX1zOeUQT3WtyKUhMPXhU8
 esh1TgbUbpE=
 =Zcqj
 -----END PGP SIGNATURE-----

Merge tag 'sched-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "Core facilities:

   - Add the "Lazy preemption" model (CONFIG_PREEMPT_LAZY=y), which
     optimizes fair-class preemption by delaying preemption requests to
     the tick boundary, while working as full preemption for
     RR/FIFO/DEADLINE classes. (Peter Zijlstra)
        - x86: Enable Lazy preemption (Peter Zijlstra)
        - riscv: Enable Lazy preemption (Jisheng Zhang)

   - Initialize idle tasks only once (Thomas Gleixner)

   - sched/ext: Remove sched_fork() hack (Thomas Gleixner)

  Fair scheduler:

   - Optimize the PLACE_LAG when se->vlag is zero (Huang Shijie)

  Idle loop:

   - Optimize the generic idle loop by removing unnecessary memory
     barrier (Zhongqiu Han)

  RSEQ:

   - Improve cache locality of RSEQ concurrency IDs for intermittent
     workloads (Mathieu Desnoyers)

  Waitqueues:

   - Make wake_up_{bit,var} less fragile (Neil Brown)

  PSI:

   - Pass enqueue/dequeue flags to psi callbacks directly (Johannes
     Weiner)

  Preparatory patches for proxy execution:

   - Add move_queued_task_locked helper (Connor O'Brien)

   - Consolidate pick_*_task to task_is_pushable helper (Connor O'Brien)

   - Split out __schedule() deactivate task logic into a helper (John
     Stultz)

   - Split scheduler and execution contexts (Peter Zijlstra)

   - Make mutex::wait_lock irq safe (Juri Lelli)

   - Expose __mutex_owner() (Juri Lelli)

   - Remove wakeups from under mutex::wait_lock (Peter Zijlstra)

  Misc fixes and cleanups:

   - Remove unused __HAVE_THREAD_FUNCTIONS hook support (David
     Disseldorp)

   - Update the comment for TIF_NEED_RESCHED_LAZY (Sebastian Andrzej
     Siewior)

   - Remove unused bit_wait_io_timeout (Dr. David Alan Gilbert)

   - remove the DOUBLE_TICK feature (Huang Shijie)

   - fix the comment for PREEMPT_SHORT (Huang Shijie)

   - Fix unnused variable warning (Christian Loehle)

   - No PREEMPT_RT=y for all{yes,mod}config"

* tag 'sched-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
  sched, x86: Update the comment for TIF_NEED_RESCHED_LAZY.
  sched: No PREEMPT_RT=y for all{yes,mod}config
  riscv: add PREEMPT_LAZY support
  sched, x86: Enable Lazy preemption
  sched: Enable PREEMPT_DYNAMIC for PREEMPT_RT
  sched: Add Lazy preemption model
  sched: Add TIF_NEED_RESCHED_LAZY infrastructure
  sched/ext: Remove sched_fork() hack
  sched: Initialize idle tasks only once
  sched: psi: pass enqueue/dequeue flags to psi callbacks directly
  sched/uclamp: Fix unnused variable warning
  sched: Split scheduler and execution contexts
  sched: Split out __schedule() deactivate task logic into a helper
  sched: Consolidate pick_*_task to task_is_pushable helper
  sched: Add move_queued_task_locked helper
  locking/mutex: Expose __mutex_owner()
  locking/mutex: Make mutex::wait_lock irq safe
  locking/mutex: Remove wakeups from under mutex::wait_lock
  sched: Improve cache locality of RSEQ concurrency IDs for intermittent workloads
  sched: idle: Optimize the generic idle loop by removing needless memory barrier
  ...
2024-11-19 14:16:06 -08:00

288 lines
7.1 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* PREEMPT_RT substitution for spin/rw_locks
*
* spinlocks and rwlocks on RT are based on rtmutexes, with a few twists to
* resemble the non RT semantics:
*
* - Contrary to plain rtmutexes, spinlocks and rwlocks are state
* preserving. The task state is saved before blocking on the underlying
* rtmutex, and restored when the lock has been acquired. Regular wakeups
* during that time are redirected to the saved state so no wake up is
* missed.
*
* - Non RT spin/rwlocks disable preemption and eventually interrupts.
* Disabling preemption has the side effect of disabling migration and
* preventing RCU grace periods.
*
* The RT substitutions explicitly disable migration and take
* rcu_read_lock() across the lock held section.
*/
#include <linux/spinlock.h>
#include <linux/export.h>
#define RT_MUTEX_BUILD_SPINLOCKS
#include "rtmutex.c"
/*
* __might_resched() skips the state check as rtlocks are state
* preserving. Take RCU nesting into account as spin/read/write_lock() can
* legitimately nest into an RCU read side critical section.
*/
#define RTLOCK_RESCHED_OFFSETS \
(rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT)
#define rtlock_might_resched() \
__might_resched(__FILE__, __LINE__, RTLOCK_RESCHED_OFFSETS)
static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
{
lockdep_assert(!current->pi_blocked_on);
if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
rtlock_slowlock(rtm);
}
static __always_inline void __rt_spin_lock(spinlock_t *lock)
{
rtlock_might_resched();
rtlock_lock(&lock->lock);
rcu_read_lock();
migrate_disable();
}
void __sched rt_spin_lock(spinlock_t *lock) __acquires(RCU)
{
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
__rt_spin_lock(lock);
}
EXPORT_SYMBOL(rt_spin_lock);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __sched rt_spin_lock_nested(spinlock_t *lock, int subclass)
{
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
__rt_spin_lock(lock);
}
EXPORT_SYMBOL(rt_spin_lock_nested);
void __sched rt_spin_lock_nest_lock(spinlock_t *lock,
struct lockdep_map *nest_lock)
{
spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
__rt_spin_lock(lock);
}
EXPORT_SYMBOL(rt_spin_lock_nest_lock);
#endif
void __sched rt_spin_unlock(spinlock_t *lock) __releases(RCU)
{
spin_release(&lock->dep_map, _RET_IP_);
migrate_enable();
rcu_read_unlock();
if (unlikely(!rt_mutex_cmpxchg_release(&lock->lock, current, NULL)))
rt_mutex_slowunlock(&lock->lock);
}
EXPORT_SYMBOL(rt_spin_unlock);
/*
* Wait for the lock to get unlocked: instead of polling for an unlock
* (like raw spinlocks do), lock and unlock, to force the kernel to
* schedule if there's contention:
*/
void __sched rt_spin_lock_unlock(spinlock_t *lock)
{
spin_lock(lock);
spin_unlock(lock);
}
EXPORT_SYMBOL(rt_spin_lock_unlock);
static __always_inline int __rt_spin_trylock(spinlock_t *lock)
{
int ret = 1;
if (unlikely(!rt_mutex_cmpxchg_acquire(&lock->lock, NULL, current)))
ret = rt_mutex_slowtrylock(&lock->lock);
if (ret) {
spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
rcu_read_lock();
migrate_disable();
}
return ret;
}
int __sched rt_spin_trylock(spinlock_t *lock)
{
return __rt_spin_trylock(lock);
}
EXPORT_SYMBOL(rt_spin_trylock);
int __sched rt_spin_trylock_bh(spinlock_t *lock)
{
int ret;
local_bh_disable();
ret = __rt_spin_trylock(lock);
if (!ret)
local_bh_enable();
return ret;
}
EXPORT_SYMBOL(rt_spin_trylock_bh);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __rt_spin_lock_init(spinlock_t *lock, const char *name,
struct lock_class_key *key, bool percpu)
{
u8 type = percpu ? LD_LOCK_PERCPU : LD_LOCK_NORMAL;
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map_type(&lock->dep_map, name, key, 0, LD_WAIT_CONFIG,
LD_WAIT_INV, type);
}
EXPORT_SYMBOL(__rt_spin_lock_init);
#endif
/*
* RT-specific reader/writer locks
*/
#define rwbase_set_and_save_current_state(state) \
current_save_and_set_rtlock_wait_state()
#define rwbase_restore_current_state() \
current_restore_rtlock_saved_state()
static __always_inline int
rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state)
{
if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
rtlock_slowlock(rtm);
return 0;
}
static __always_inline int
rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state,
struct wake_q_head *wake_q)
{
rtlock_slowlock_locked(rtm, wake_q);
return 0;
}
static __always_inline void rwbase_rtmutex_unlock(struct rt_mutex_base *rtm)
{
if (likely(rt_mutex_cmpxchg_acquire(rtm, current, NULL)))
return;
rt_mutex_slowunlock(rtm);
}
static __always_inline int rwbase_rtmutex_trylock(struct rt_mutex_base *rtm)
{
if (likely(rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
return 1;
return rt_mutex_slowtrylock(rtm);
}
#define rwbase_signal_pending_state(state, current) (0)
#define rwbase_pre_schedule()
#define rwbase_schedule() \
schedule_rtlock()
#define rwbase_post_schedule()
#include "rwbase_rt.c"
/*
* The common functions which get wrapped into the rwlock API.
*/
int __sched rt_read_trylock(rwlock_t *rwlock)
{
int ret;
ret = rwbase_read_trylock(&rwlock->rwbase);
if (ret) {
rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
rcu_read_lock();
migrate_disable();
}
return ret;
}
EXPORT_SYMBOL(rt_read_trylock);
int __sched rt_write_trylock(rwlock_t *rwlock)
{
int ret;
ret = rwbase_write_trylock(&rwlock->rwbase);
if (ret) {
rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
rcu_read_lock();
migrate_disable();
}
return ret;
}
EXPORT_SYMBOL(rt_write_trylock);
void __sched rt_read_lock(rwlock_t *rwlock) __acquires(RCU)
{
rtlock_might_resched();
rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
rcu_read_lock();
migrate_disable();
}
EXPORT_SYMBOL(rt_read_lock);
void __sched rt_write_lock(rwlock_t *rwlock) __acquires(RCU)
{
rtlock_might_resched();
rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
rcu_read_lock();
migrate_disable();
}
EXPORT_SYMBOL(rt_write_lock);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(RCU)
{
rtlock_might_resched();
rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_);
rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
rcu_read_lock();
migrate_disable();
}
EXPORT_SYMBOL(rt_write_lock_nested);
#endif
void __sched rt_read_unlock(rwlock_t *rwlock) __releases(RCU)
{
rwlock_release(&rwlock->dep_map, _RET_IP_);
migrate_enable();
rcu_read_unlock();
rwbase_read_unlock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
}
EXPORT_SYMBOL(rt_read_unlock);
void __sched rt_write_unlock(rwlock_t *rwlock) __releases(RCU)
{
rwlock_release(&rwlock->dep_map, _RET_IP_);
rcu_read_unlock();
migrate_enable();
rwbase_write_unlock(&rwlock->rwbase);
}
EXPORT_SYMBOL(rt_write_unlock);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __rt_rwlock_init(rwlock_t *rwlock, const char *name,
struct lock_class_key *key)
{
debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
lockdep_init_map_wait(&rwlock->dep_map, name, key, 0, LD_WAIT_CONFIG);
}
EXPORT_SYMBOL(__rt_rwlock_init);
#endif