mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 11:47:47 +00:00
3f020399e4
- Core facilities: - Add the "Lazy preemption" model (CONFIG_PREEMPT_LAZY=y), which optimizes fair-class preemption by delaying preemption requests to the tick boundary, while working as full preemption for RR/FIFO/DEADLINE classes. (Peter Zijlstra) - x86: Enable Lazy preemption (Peter Zijlstra) - riscv: Enable Lazy preemption (Jisheng Zhang) - Initialize idle tasks only once (Thomas Gleixner) - sched/ext: Remove sched_fork() hack (Thomas Gleixner) - Fair scheduler: - Optimize the PLACE_LAG when se->vlag is zero (Huang Shijie) - Idle loop: Optimize the generic idle loop by removing unnecessary memory barrier (Zhongqiu Han) - RSEQ: - Improve cache locality of RSEQ concurrency IDs for intermittent workloads (Mathieu Desnoyers) - Waitqueues: - Make wake_up_{bit,var} less fragile (Neil Brown) - PSI: - Pass enqueue/dequeue flags to psi callbacks directly (Johannes Weiner) - Preparatory patches for proxy execution: - core: Add move_queued_task_locked helper (Connor O'Brien) - core: Consolidate pick_*_task to task_is_pushable helper (Connor O'Brien) - core: Split out __schedule() deactivate task logic into a helper (John Stultz) - core: Split scheduler and execution contexts (Peter Zijlstra) - locking/mutex: Make mutex::wait_lock irq safe (Juri Lelli) - locking/mutex: Expose __mutex_owner() (Juri Lelli) - locking/mutex: Remove wakeups from under mutex::wait_lock (Peter Zijlstra) - Misc fixes and cleanups: - core: Remove unused __HAVE_THREAD_FUNCTIONS hook support (David Disseldorp) - core: Update the comment for TIF_NEED_RESCHED_LAZY (Sebastian Andrzej Siewior) - wait: Remove unused bit_wait_io_timeout (Dr. David Alan Gilbert) - fair: remove the DOUBLE_TICK feature (Huang Shijie) - fair: fix the comment for PREEMPT_SHORT (Huang Shijie) - uclamp: Fix unnused variable warning (Christian Loehle) - rt: No PREEMPT_RT=y for all{yes,mod}config Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmc7fnQRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1hZTBAAozVdWA2m51aNa67HvAZta/olmrIagVbW inwbTgqa8b+UfeWEuKOfrZr5khjEh6pLgR3dBTib1uH6xxYj/Okds+qbPWSBPVLh yzavlm/zJZM1U1XtxE3eyVfqWik4GrY7DoIMDQQr+YH7rNXonJeJkll38OI2E5MC q3Q01qyMo8RJJX8qkf3f8ObOoP/51NsVniTw0Zb2fzEhXz8FjezLlxk6cMfgSkJG lg9gfIwUZ7Xg5neRo4kJcc3Ht31KYOhWSiupBJzRD1hss/N/AybvMcTX/Cm8d07w HIAdDDAn84o46miFo/a0V/hsJZ72idWbqxVJUCtaezrpOUiFkG+uInRvG/ynr0lF 5dEI9f+6PUw8Nc7L72IyHkobjPqS2IefSaxYYCBKmxMX2qrenfTor/pKiWzzhBIl rX3MZSuUJ8NjV4rNGD/qXRM1IsMJrsDwxDyv+sRec3XdH33x286ds6aAUEPDQ6N7 96VS0sOKcNUJN8776ErNjlIxRl8HTlpkaO3nZlQIfXgTlXUpRvOuKbEWqP+606lo oANgJTKgUhgJPWZnvmdRxDjSiOp93QcImjus9i1tN81FGiEDleONsJUxu2Di1E5+ s1nCiytjq+cdvzCqFyiOZUh+g6kSZ4yXxNgLg2UvbXzX1zOeUQT3WtyKUhMPXhU8 esh1TgbUbpE= =Zcqj -----END PGP SIGNATURE----- Merge tag 'sched-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler updates from Ingo Molnar: "Core facilities: - Add the "Lazy preemption" model (CONFIG_PREEMPT_LAZY=y), which optimizes fair-class preemption by delaying preemption requests to the tick boundary, while working as full preemption for RR/FIFO/DEADLINE classes. (Peter Zijlstra) - x86: Enable Lazy preemption (Peter Zijlstra) - riscv: Enable Lazy preemption (Jisheng Zhang) - Initialize idle tasks only once (Thomas Gleixner) - sched/ext: Remove sched_fork() hack (Thomas Gleixner) Fair scheduler: - Optimize the PLACE_LAG when se->vlag is zero (Huang Shijie) Idle loop: - Optimize the generic idle loop by removing unnecessary memory barrier (Zhongqiu Han) RSEQ: - Improve cache locality of RSEQ concurrency IDs for intermittent workloads (Mathieu Desnoyers) Waitqueues: - Make wake_up_{bit,var} less fragile (Neil Brown) PSI: - Pass enqueue/dequeue flags to psi callbacks directly (Johannes Weiner) Preparatory patches for proxy execution: - Add move_queued_task_locked helper (Connor O'Brien) - Consolidate pick_*_task to task_is_pushable helper (Connor O'Brien) - Split out __schedule() deactivate task logic into a helper (John Stultz) - Split scheduler and execution contexts (Peter Zijlstra) - Make mutex::wait_lock irq safe (Juri Lelli) - Expose __mutex_owner() (Juri Lelli) - Remove wakeups from under mutex::wait_lock (Peter Zijlstra) Misc fixes and cleanups: - Remove unused __HAVE_THREAD_FUNCTIONS hook support (David Disseldorp) - Update the comment for TIF_NEED_RESCHED_LAZY (Sebastian Andrzej Siewior) - Remove unused bit_wait_io_timeout (Dr. David Alan Gilbert) - remove the DOUBLE_TICK feature (Huang Shijie) - fix the comment for PREEMPT_SHORT (Huang Shijie) - Fix unnused variable warning (Christian Loehle) - No PREEMPT_RT=y for all{yes,mod}config" * tag 'sched-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits) sched, x86: Update the comment for TIF_NEED_RESCHED_LAZY. sched: No PREEMPT_RT=y for all{yes,mod}config riscv: add PREEMPT_LAZY support sched, x86: Enable Lazy preemption sched: Enable PREEMPT_DYNAMIC for PREEMPT_RT sched: Add Lazy preemption model sched: Add TIF_NEED_RESCHED_LAZY infrastructure sched/ext: Remove sched_fork() hack sched: Initialize idle tasks only once sched: psi: pass enqueue/dequeue flags to psi callbacks directly sched/uclamp: Fix unnused variable warning sched: Split scheduler and execution contexts sched: Split out __schedule() deactivate task logic into a helper sched: Consolidate pick_*_task to task_is_pushable helper sched: Add move_queued_task_locked helper locking/mutex: Expose __mutex_owner() locking/mutex: Make mutex::wait_lock irq safe locking/mutex: Remove wakeups from under mutex::wait_lock sched: Improve cache locality of RSEQ concurrency IDs for intermittent workloads sched: idle: Optimize the generic idle loop by removing needless memory barrier ...
288 lines
7.1 KiB
C
288 lines
7.1 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* PREEMPT_RT substitution for spin/rw_locks
|
|
*
|
|
* spinlocks and rwlocks on RT are based on rtmutexes, with a few twists to
|
|
* resemble the non RT semantics:
|
|
*
|
|
* - Contrary to plain rtmutexes, spinlocks and rwlocks are state
|
|
* preserving. The task state is saved before blocking on the underlying
|
|
* rtmutex, and restored when the lock has been acquired. Regular wakeups
|
|
* during that time are redirected to the saved state so no wake up is
|
|
* missed.
|
|
*
|
|
* - Non RT spin/rwlocks disable preemption and eventually interrupts.
|
|
* Disabling preemption has the side effect of disabling migration and
|
|
* preventing RCU grace periods.
|
|
*
|
|
* The RT substitutions explicitly disable migration and take
|
|
* rcu_read_lock() across the lock held section.
|
|
*/
|
|
#include <linux/spinlock.h>
|
|
#include <linux/export.h>
|
|
|
|
#define RT_MUTEX_BUILD_SPINLOCKS
|
|
#include "rtmutex.c"
|
|
|
|
/*
|
|
* __might_resched() skips the state check as rtlocks are state
|
|
* preserving. Take RCU nesting into account as spin/read/write_lock() can
|
|
* legitimately nest into an RCU read side critical section.
|
|
*/
|
|
#define RTLOCK_RESCHED_OFFSETS \
|
|
(rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT)
|
|
|
|
#define rtlock_might_resched() \
|
|
__might_resched(__FILE__, __LINE__, RTLOCK_RESCHED_OFFSETS)
|
|
|
|
static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
|
|
{
|
|
lockdep_assert(!current->pi_blocked_on);
|
|
|
|
if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
|
|
rtlock_slowlock(rtm);
|
|
}
|
|
|
|
static __always_inline void __rt_spin_lock(spinlock_t *lock)
|
|
{
|
|
rtlock_might_resched();
|
|
rtlock_lock(&lock->lock);
|
|
rcu_read_lock();
|
|
migrate_disable();
|
|
}
|
|
|
|
void __sched rt_spin_lock(spinlock_t *lock) __acquires(RCU)
|
|
{
|
|
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
|
__rt_spin_lock(lock);
|
|
}
|
|
EXPORT_SYMBOL(rt_spin_lock);
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
void __sched rt_spin_lock_nested(spinlock_t *lock, int subclass)
|
|
{
|
|
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
|
|
__rt_spin_lock(lock);
|
|
}
|
|
EXPORT_SYMBOL(rt_spin_lock_nested);
|
|
|
|
void __sched rt_spin_lock_nest_lock(spinlock_t *lock,
|
|
struct lockdep_map *nest_lock)
|
|
{
|
|
spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
|
|
__rt_spin_lock(lock);
|
|
}
|
|
EXPORT_SYMBOL(rt_spin_lock_nest_lock);
|
|
#endif
|
|
|
|
void __sched rt_spin_unlock(spinlock_t *lock) __releases(RCU)
|
|
{
|
|
spin_release(&lock->dep_map, _RET_IP_);
|
|
migrate_enable();
|
|
rcu_read_unlock();
|
|
|
|
if (unlikely(!rt_mutex_cmpxchg_release(&lock->lock, current, NULL)))
|
|
rt_mutex_slowunlock(&lock->lock);
|
|
}
|
|
EXPORT_SYMBOL(rt_spin_unlock);
|
|
|
|
/*
|
|
* Wait for the lock to get unlocked: instead of polling for an unlock
|
|
* (like raw spinlocks do), lock and unlock, to force the kernel to
|
|
* schedule if there's contention:
|
|
*/
|
|
void __sched rt_spin_lock_unlock(spinlock_t *lock)
|
|
{
|
|
spin_lock(lock);
|
|
spin_unlock(lock);
|
|
}
|
|
EXPORT_SYMBOL(rt_spin_lock_unlock);
|
|
|
|
static __always_inline int __rt_spin_trylock(spinlock_t *lock)
|
|
{
|
|
int ret = 1;
|
|
|
|
if (unlikely(!rt_mutex_cmpxchg_acquire(&lock->lock, NULL, current)))
|
|
ret = rt_mutex_slowtrylock(&lock->lock);
|
|
|
|
if (ret) {
|
|
spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
|
|
rcu_read_lock();
|
|
migrate_disable();
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int __sched rt_spin_trylock(spinlock_t *lock)
|
|
{
|
|
return __rt_spin_trylock(lock);
|
|
}
|
|
EXPORT_SYMBOL(rt_spin_trylock);
|
|
|
|
int __sched rt_spin_trylock_bh(spinlock_t *lock)
|
|
{
|
|
int ret;
|
|
|
|
local_bh_disable();
|
|
ret = __rt_spin_trylock(lock);
|
|
if (!ret)
|
|
local_bh_enable();
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(rt_spin_trylock_bh);
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
void __rt_spin_lock_init(spinlock_t *lock, const char *name,
|
|
struct lock_class_key *key, bool percpu)
|
|
{
|
|
u8 type = percpu ? LD_LOCK_PERCPU : LD_LOCK_NORMAL;
|
|
|
|
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
|
|
lockdep_init_map_type(&lock->dep_map, name, key, 0, LD_WAIT_CONFIG,
|
|
LD_WAIT_INV, type);
|
|
}
|
|
EXPORT_SYMBOL(__rt_spin_lock_init);
|
|
#endif
|
|
|
|
/*
|
|
* RT-specific reader/writer locks
|
|
*/
|
|
#define rwbase_set_and_save_current_state(state) \
|
|
current_save_and_set_rtlock_wait_state()
|
|
|
|
#define rwbase_restore_current_state() \
|
|
current_restore_rtlock_saved_state()
|
|
|
|
static __always_inline int
|
|
rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state)
|
|
{
|
|
if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
|
|
rtlock_slowlock(rtm);
|
|
return 0;
|
|
}
|
|
|
|
static __always_inline int
|
|
rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state,
|
|
struct wake_q_head *wake_q)
|
|
{
|
|
rtlock_slowlock_locked(rtm, wake_q);
|
|
return 0;
|
|
}
|
|
|
|
static __always_inline void rwbase_rtmutex_unlock(struct rt_mutex_base *rtm)
|
|
{
|
|
if (likely(rt_mutex_cmpxchg_acquire(rtm, current, NULL)))
|
|
return;
|
|
|
|
rt_mutex_slowunlock(rtm);
|
|
}
|
|
|
|
static __always_inline int rwbase_rtmutex_trylock(struct rt_mutex_base *rtm)
|
|
{
|
|
if (likely(rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
|
|
return 1;
|
|
|
|
return rt_mutex_slowtrylock(rtm);
|
|
}
|
|
|
|
#define rwbase_signal_pending_state(state, current) (0)
|
|
|
|
#define rwbase_pre_schedule()
|
|
|
|
#define rwbase_schedule() \
|
|
schedule_rtlock()
|
|
|
|
#define rwbase_post_schedule()
|
|
|
|
#include "rwbase_rt.c"
|
|
/*
|
|
* The common functions which get wrapped into the rwlock API.
|
|
*/
|
|
int __sched rt_read_trylock(rwlock_t *rwlock)
|
|
{
|
|
int ret;
|
|
|
|
ret = rwbase_read_trylock(&rwlock->rwbase);
|
|
if (ret) {
|
|
rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
|
|
rcu_read_lock();
|
|
migrate_disable();
|
|
}
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(rt_read_trylock);
|
|
|
|
int __sched rt_write_trylock(rwlock_t *rwlock)
|
|
{
|
|
int ret;
|
|
|
|
ret = rwbase_write_trylock(&rwlock->rwbase);
|
|
if (ret) {
|
|
rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
|
|
rcu_read_lock();
|
|
migrate_disable();
|
|
}
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(rt_write_trylock);
|
|
|
|
void __sched rt_read_lock(rwlock_t *rwlock) __acquires(RCU)
|
|
{
|
|
rtlock_might_resched();
|
|
rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
|
|
rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
|
|
rcu_read_lock();
|
|
migrate_disable();
|
|
}
|
|
EXPORT_SYMBOL(rt_read_lock);
|
|
|
|
void __sched rt_write_lock(rwlock_t *rwlock) __acquires(RCU)
|
|
{
|
|
rtlock_might_resched();
|
|
rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
|
|
rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
|
|
rcu_read_lock();
|
|
migrate_disable();
|
|
}
|
|
EXPORT_SYMBOL(rt_write_lock);
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(RCU)
|
|
{
|
|
rtlock_might_resched();
|
|
rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_);
|
|
rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
|
|
rcu_read_lock();
|
|
migrate_disable();
|
|
}
|
|
EXPORT_SYMBOL(rt_write_lock_nested);
|
|
#endif
|
|
|
|
void __sched rt_read_unlock(rwlock_t *rwlock) __releases(RCU)
|
|
{
|
|
rwlock_release(&rwlock->dep_map, _RET_IP_);
|
|
migrate_enable();
|
|
rcu_read_unlock();
|
|
rwbase_read_unlock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
|
|
}
|
|
EXPORT_SYMBOL(rt_read_unlock);
|
|
|
|
void __sched rt_write_unlock(rwlock_t *rwlock) __releases(RCU)
|
|
{
|
|
rwlock_release(&rwlock->dep_map, _RET_IP_);
|
|
rcu_read_unlock();
|
|
migrate_enable();
|
|
rwbase_write_unlock(&rwlock->rwbase);
|
|
}
|
|
EXPORT_SYMBOL(rt_write_unlock);
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
void __rt_rwlock_init(rwlock_t *rwlock, const char *name,
|
|
struct lock_class_key *key)
|
|
{
|
|
debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
|
|
lockdep_init_map_wait(&rwlock->dep_map, name, key, 0, LD_WAIT_CONFIG);
|
|
}
|
|
EXPORT_SYMBOL(__rt_rwlock_init);
|
|
#endif
|