2019-01-17 10:18:16 -08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0+
|
2017-03-25 09:59:38 -07:00
|
|
|
/*
|
|
|
|
* Sleepable Read-Copy Update mechanism for mutual exclusion,
|
|
|
|
* tiny version for non-preemptible single-CPU use.
|
|
|
|
*
|
|
|
|
* Copyright (C) IBM Corporation, 2017
|
|
|
|
*
|
2019-01-17 10:18:16 -08:00
|
|
|
* Author: Paul McKenney <paulmck@linux.ibm.com>
|
2017-03-25 09:59:38 -07:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/export.h>
|
|
|
|
#include <linux/mutex.h>
|
|
|
|
#include <linux/preempt.h>
|
|
|
|
#include <linux/rcupdate_wait.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/srcu.h>
|
|
|
|
|
|
|
|
#include <linux/rcu_node_tree.h>
|
2017-05-02 10:31:18 +02:00
|
|
|
#include "rcu_segcblist.h"
|
2017-03-25 09:59:38 -07:00
|
|
|
#include "rcu.h"
|
|
|
|
|
2017-05-26 16:16:40 -07:00
|
|
|
int rcu_scheduler_active __read_mostly;
|
srcu: Make call_srcu() available during very early boot
Event tracing is moving to SRCU in order to take advantage of the fact
that SRCU may be safely used from idle and even offline CPUs. However,
event tracing can invoke call_srcu() very early in the boot process,
even before workqueue_init_early() is invoked (let alone rcu_init()).
Therefore, call_srcu()'s attempts to queue work fail miserably.
This commit therefore detects this situation, and refrains from attempting
to queue work before rcu_init() time, but does everything else that it
would have done, and in addition, adds the srcu_struct to a global list.
The rcu_init() function now invokes a new srcu_init() function, which
is empty if CONFIG_SRCU=n. Otherwise, srcu_init() queues work for
each srcu_struct on the list. This all happens early enough in boot
that there is but a single CPU with interrupts disabled, which allows
synchronization to be dispensed with.
Of course, the queued work won't actually be invoked until after
workqueue_init() is invoked, which happens shortly after the scheduler
is up and running. This means that although call_srcu() may be invoked
any time after per-CPU variables have been set up, there is still a very
narrow window when synchronize_srcu() won't work, and this window
extends from the time that the scheduler starts until the time that
workqueue_init() returns. This can be fixed in a manner similar to
the fix for synchronize_rcu_expedited() and friends, but until someone
actually needs to use synchronize_srcu() during this window, this fix
is added churn for no benefit.
Finally, note that Tree SRCU's new srcu_init() function invokes
queue_work() rather than the queue_delayed_work() function that is
invoked post-boot. The reason is that queue_delayed_work() will (as you
would expect) post a timer, and timers have not yet been initialized.
So use of queue_work() avoids the complaints about use of uninitialized
spinlocks that would otherwise result. Besides, some delay is already
provide by the aforementioned fact that the queued work won't actually
be invoked until after the scheduler is up and running.
Requested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2018-08-14 08:45:54 -07:00
|
|
|
static LIST_HEAD(srcu_boot_list);
|
|
|
|
static bool srcu_init_done;
|
2017-05-26 16:16:40 -07:00
|
|
|
|
2018-10-28 10:32:51 -07:00
|
|
|
static int init_srcu_struct_fields(struct srcu_struct *ssp)
|
2017-03-25 09:59:38 -07:00
|
|
|
{
|
2018-10-28 10:32:51 -07:00
|
|
|
ssp->srcu_lock_nesting[0] = 0;
|
|
|
|
ssp->srcu_lock_nesting[1] = 0;
|
|
|
|
init_swait_queue_head(&ssp->srcu_wq);
|
|
|
|
ssp->srcu_cb_head = NULL;
|
|
|
|
ssp->srcu_cb_tail = &ssp->srcu_cb_head;
|
|
|
|
ssp->srcu_gp_running = false;
|
|
|
|
ssp->srcu_gp_waiting = false;
|
|
|
|
ssp->srcu_idx = 0;
|
2020-11-13 12:54:48 -08:00
|
|
|
ssp->srcu_idx_max = 0;
|
2018-10-28 10:32:51 -07:00
|
|
|
INIT_WORK(&ssp->srcu_work, srcu_drive_gp);
|
|
|
|
INIT_LIST_HEAD(&ssp->srcu_work.entry);
|
2017-03-25 09:59:38 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
|
|
|
2018-10-28 10:32:51 -07:00
|
|
|
int __init_srcu_struct(struct srcu_struct *ssp, const char *name,
|
2017-03-25 09:59:38 -07:00
|
|
|
struct lock_class_key *key)
|
|
|
|
{
|
|
|
|
/* Don't re-initialize a lock while it is held. */
|
2018-10-28 10:32:51 -07:00
|
|
|
debug_check_no_locks_freed((void *)ssp, sizeof(*ssp));
|
|
|
|
lockdep_init_map(&ssp->dep_map, name, key, 0);
|
|
|
|
return init_srcu_struct_fields(ssp);
|
2017-03-25 09:59:38 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(__init_srcu_struct);
|
|
|
|
|
|
|
|
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* init_srcu_struct - initialize a sleep-RCU structure
|
2018-10-28 10:32:51 -07:00
|
|
|
* @ssp: structure to initialize.
|
2017-03-25 09:59:38 -07:00
|
|
|
*
|
|
|
|
* Must invoke this on a given srcu_struct before passing that srcu_struct
|
|
|
|
* to any other function. Each srcu_struct represents a separate domain
|
|
|
|
* of SRCU protection.
|
|
|
|
*/
|
2018-10-28 10:32:51 -07:00
|
|
|
int init_srcu_struct(struct srcu_struct *ssp)
|
2017-03-25 09:59:38 -07:00
|
|
|
{
|
2018-10-28 10:32:51 -07:00
|
|
|
return init_srcu_struct_fields(ssp);
|
2017-03-25 09:59:38 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(init_srcu_struct);
|
|
|
|
|
|
|
|
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cleanup_srcu_struct - deconstruct a sleep-RCU structure
|
2018-10-28 10:32:51 -07:00
|
|
|
* @ssp: structure to clean up.
|
2017-03-25 09:59:38 -07:00
|
|
|
*
|
|
|
|
* Must invoke this after you are finished using a given srcu_struct that
|
|
|
|
* was initialized via init_srcu_struct(), else you leak memory.
|
|
|
|
*/
|
2019-02-13 13:54:37 -08:00
|
|
|
void cleanup_srcu_struct(struct srcu_struct *ssp)
|
2017-03-25 09:59:38 -07:00
|
|
|
{
|
2018-10-28 10:32:51 -07:00
|
|
|
WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]);
|
2019-02-13 13:54:37 -08:00
|
|
|
flush_work(&ssp->srcu_work);
|
2018-10-28 10:32:51 -07:00
|
|
|
WARN_ON(ssp->srcu_gp_running);
|
|
|
|
WARN_ON(ssp->srcu_gp_waiting);
|
|
|
|
WARN_ON(ssp->srcu_cb_head);
|
|
|
|
WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail);
|
2020-11-13 12:54:48 -08:00
|
|
|
WARN_ON(ssp->srcu_idx != ssp->srcu_idx_max);
|
|
|
|
WARN_ON(ssp->srcu_idx & 0x1);
|
2017-03-25 09:59:38 -07:00
|
|
|
}
|
2019-02-13 13:54:37 -08:00
|
|
|
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
|
2017-03-25 09:59:38 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Removes the count for the old reader from the appropriate element of
|
srcu: Allow use of Tiny/Tree SRCU from both process and interrupt context
Linu Cherian reported a WARN in cleanup_srcu_struct() when shutting
down a guest running iperf on a VFIO assigned device. This happens
because irqfd_wakeup() calls srcu_read_lock(&kvm->irq_srcu) in interrupt
context, while a worker thread does the same inside kvm_set_irq(). If the
interrupt happens while the worker thread is executing __srcu_read_lock(),
updates to the Classic SRCU ->lock_count[] field or the Tree SRCU
->srcu_lock_count[] field can be lost.
The docs say you are not supposed to call srcu_read_lock() and
srcu_read_unlock() from irq context, but KVM interrupt injection happens
from (host) interrupt context and it would be nice if SRCU supported the
use case. KVM is using SRCU here not really for the "sleepable" part,
but rather due to its IPI-free fast detection of grace periods. It is
therefore not desirable to switch back to RCU, which would effectively
revert commit 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING",
2014-01-16).
However, the docs are overly conservative. You can have an SRCU instance
only has users in irq context, and you can mix process and irq context
as long as process context users disable interrupts. In addition,
__srcu_read_unlock() actually uses this_cpu_dec() on both Tree SRCU and
Classic SRCU. For those two implementations, only srcu_read_lock()
is unsafe.
When Classic SRCU's __srcu_read_unlock() was changed to use this_cpu_dec(),
in commit 5a41344a3d83 ("srcu: Simplify __srcu_read_unlock() via
this_cpu_dec()", 2012-11-29), __srcu_read_lock() did two increments.
Therefore it kept __this_cpu_inc(), with preempt_disable/enable in
the caller. Tree SRCU however only does one increment, so on most
architectures it is more efficient for __srcu_read_lock() to use
this_cpu_inc(), and any performance differences appear to be down in
the noise.
Unlike Classic and Tree SRCU, Tiny SRCU does increments and decrements on
a single variable. Therefore, as Peter Zijlstra pointed out, Tiny SRCU's
implementation already supports mixed-context use of srcu_read_lock()
and srcu_read_unlock(), at least as long as uses of srcu_read_lock()
and srcu_read_unlock() in each handler are nested and paired properly.
In other words, it is still illegal to (say) invoke srcu_read_lock()
in an interrupt handler and to invoke the matching srcu_read_unlock()
in a softirq handler. Therefore, the only change required for Tiny SRCU
is to its comments.
Fixes: 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING")
Reported-by: Linu Cherian <linuc.decode@gmail.com>
Suggested-by: Linu Cherian <linuc.decode@gmail.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Paolo Bonzini <pbonzini@redhat.com>
2017-05-31 14:03:10 +02:00
|
|
|
* the srcu_struct.
|
2017-03-25 09:59:38 -07:00
|
|
|
*/
|
2018-10-28 10:32:51 -07:00
|
|
|
void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
|
2017-03-25 09:59:38 -07:00
|
|
|
{
|
2024-02-15 09:04:30 -08:00
|
|
|
int newval;
|
2017-03-25 09:59:38 -07:00
|
|
|
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_disable(); // Needed for PREEMPT_AUTO
|
|
|
|
newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1;
|
2018-10-28 10:32:51 -07:00
|
|
|
WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_enable();
|
srcu: Prevent redundant __srcu_read_unlock() wakeup
Tiny SRCU readers can appear at task level, but also in interrupt and
softirq handlers. Because Tiny SRCU is selected only in kernels built
with CONFIG_SMP=n and CONFIG_PREEMPTION=n, it is not possible for a grace
period to start while there is a non-task-level SRCU reader executing.
This means that it does not make sense for __srcu_read_unlock() to awaken
the Tiny SRCU grace period, because that can only happen when the grace
period is waiting for one value of ->srcu_idx and __srcu_read_unlock()
is ending the last reader for some other value of ->srcu_idx. After all,
any such wakeup will be redundant.
Worse yet, in some cases, such wakeups generate lockdep splats:
======================================================
WARNING: possible circular locking dependency detected
5.15.0-rc1+ #3758 Not tainted
------------------------------------------------------
rcu_torture_rea/53 is trying to acquire lock:
ffffffff9514e6a8 (srcu_ctl.srcu_wq.lock){..-.}-{2:2}, at:
xa/0x30
but task is already holding lock:
ffff95c642479d80 (&p->pi_lock){-.-.}-{2:2}, at:
_extend+0x370/0x400
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&p->pi_lock){-.-.}-{2:2}:
_raw_spin_lock_irqsave+0x2f/0x50
try_to_wake_up+0x50/0x580
swake_up_locked.part.7+0xe/0x30
swake_up_one+0x22/0x30
rcutorture_one_extend+0x1b6/0x400
rcu_torture_one_read+0x290/0x5d0
rcu_torture_timer+0x1a/0x70
call_timer_fn+0xa6/0x230
run_timer_softirq+0x493/0x4c0
__do_softirq+0xc0/0x371
irq_exit+0x73/0x90
sysvec_apic_timer_interrupt+0x63/0x80
asm_sysvec_apic_timer_interrupt+0x12/0x20
default_idle+0xb/0x10
default_idle_call+0x5e/0x170
do_idle+0x18a/0x1f0
cpu_startup_entry+0xa/0x10
start_kernel+0x678/0x69f
secondary_startup_64_no_verify+0xc2/0xcb
-> #0 (srcu_ctl.srcu_wq.lock){..-.}-{2:2}:
__lock_acquire+0x130c/0x2440
lock_acquire+0xc2/0x270
_raw_spin_lock_irqsave+0x2f/0x50
swake_up_one+0xa/0x30
rcutorture_one_extend+0x387/0x400
rcu_torture_one_read+0x290/0x5d0
rcu_torture_reader+0xac/0x200
kthread+0x12d/0x150
ret_from_fork+0x22/0x30
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&p->pi_lock);
lock(srcu_ctl.srcu_wq.lock);
lock(&p->pi_lock);
lock(srcu_ctl.srcu_wq.lock);
*** DEADLOCK ***
1 lock held by rcu_torture_rea/53:
#0: ffff95c642479d80 (&p->pi_lock){-.-.}-{2:2}, at:
_extend+0x370/0x400
stack backtrace:
CPU: 0 PID: 53 Comm: rcu_torture_rea Not tainted 5.15.0-rc1+
Hardware name: Red Hat KVM/RHEL-AV, BIOS
e_el8.5.0+746+bbd5d70c 04/01/2014
Call Trace:
check_noncircular+0xfe/0x110
? find_held_lock+0x2d/0x90
__lock_acquire+0x130c/0x2440
lock_acquire+0xc2/0x270
? swake_up_one+0xa/0x30
? find_held_lock+0x72/0x90
_raw_spin_lock_irqsave+0x2f/0x50
? swake_up_one+0xa/0x30
swake_up_one+0xa/0x30
rcutorture_one_extend+0x387/0x400
rcu_torture_one_read+0x290/0x5d0
rcu_torture_reader+0xac/0x200
? rcutorture_oom_notify+0xf0/0xf0
? __kthread_parkme+0x61/0x90
? rcu_torture_one_read+0x5d0/0x5d0
kthread+0x12d/0x150
? set_kthread_struct+0x40/0x40
ret_from_fork+0x22/0x30
This is a false positive because there is only one CPU, and both locks
are raw (non-preemptible) spinlocks. However, it is worthwhile getting
rid of the redundant wakeup, which has the side effect of breaking
the theoretical deadlock cycle. This commit therefore eliminates the
redundant wakeups.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-09-23 10:07:14 -07:00
|
|
|
if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task())
|
2018-10-28 10:32:51 -07:00
|
|
|
swake_up_one(&ssp->srcu_wq);
|
2017-03-25 09:59:38 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Workqueue handler to drive one grace period and invoke any callbacks
|
2019-10-15 21:18:14 +02:00
|
|
|
* that become ready as a result. Single-CPU and !PREEMPTION operation
|
2017-03-25 09:59:38 -07:00
|
|
|
* means that we get away with murder on synchronization. ;-)
|
|
|
|
*/
|
|
|
|
void srcu_drive_gp(struct work_struct *wp)
|
|
|
|
{
|
|
|
|
int idx;
|
2017-05-04 14:29:16 -07:00
|
|
|
struct rcu_head *lh;
|
2017-03-25 09:59:38 -07:00
|
|
|
struct rcu_head *rhp;
|
2018-10-28 10:32:51 -07:00
|
|
|
struct srcu_struct *ssp;
|
2017-03-25 09:59:38 -07:00
|
|
|
|
2018-10-28 10:32:51 -07:00
|
|
|
ssp = container_of(wp, struct srcu_struct, srcu_work);
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_disable(); // Needed for PREEMPT_AUTO
|
|
|
|
if (ssp->srcu_gp_running || ULONG_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) {
|
|
|
|
preempt_enable();
|
2024-10-07 12:14:15 +02:00
|
|
|
return; /* Already running or nothing to do. */
|
2024-02-15 09:04:30 -08:00
|
|
|
}
|
2017-03-25 09:59:38 -07:00
|
|
|
|
2017-05-04 14:29:16 -07:00
|
|
|
/* Remove recently arrived callbacks and wait for readers. */
|
2018-10-28 10:32:51 -07:00
|
|
|
WRITE_ONCE(ssp->srcu_gp_running, true);
|
2017-05-04 14:29:16 -07:00
|
|
|
local_irq_disable();
|
2018-10-28 10:32:51 -07:00
|
|
|
lh = ssp->srcu_cb_head;
|
|
|
|
ssp->srcu_cb_head = NULL;
|
|
|
|
ssp->srcu_cb_tail = &ssp->srcu_cb_head;
|
2017-05-04 14:29:16 -07:00
|
|
|
local_irq_enable();
|
2020-11-12 16:34:09 -08:00
|
|
|
idx = (ssp->srcu_idx & 0x2) / 2;
|
|
|
|
WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
|
2018-10-28 10:32:51 -07:00
|
|
|
WRITE_ONCE(ssp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_enable();
|
2018-10-28 10:32:51 -07:00
|
|
|
swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx]));
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_disable(); // Needed for PREEMPT_AUTO
|
2018-10-28 10:32:51 -07:00
|
|
|
WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
|
2020-11-12 16:34:09 -08:00
|
|
|
WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_enable();
|
2017-05-04 14:29:16 -07:00
|
|
|
|
|
|
|
/* Invoke the callbacks we removed above. */
|
|
|
|
while (lh) {
|
|
|
|
rhp = lh;
|
|
|
|
lh = lh->next;
|
rcu: Dump memory object info if callback function is invalid
When a structure containing an RCU callback rhp is (incorrectly) freed
and reallocated after rhp is passed to call_rcu(), it is not unusual for
rhp->func to be set to NULL. This defeats the debugging prints used by
__call_rcu_common() in kernels built with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y,
which expect to identify the offending code using the identity of this
function.
And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things
are even worse, as can be seen from this splat:
Unable to handle kernel NULL pointer dereference at virtual address 0
... ...
PC is at 0x0
LR is at rcu_do_batch+0x1c0/0x3b8
... ...
(rcu_do_batch) from (rcu_core+0x1d4/0x284)
(rcu_core) from (__do_softirq+0x24c/0x344)
(__do_softirq) from (__irq_exit_rcu+0x64/0x108)
(__irq_exit_rcu) from (irq_exit+0x8/0x10)
(irq_exit) from (__handle_domain_irq+0x74/0x9c)
(__handle_domain_irq) from (gic_handle_irq+0x8c/0x98)
(gic_handle_irq) from (__irq_svc+0x5c/0x94)
(__irq_svc) from (arch_cpu_idle+0x20/0x3c)
(arch_cpu_idle) from (default_idle_call+0x4c/0x78)
(default_idle_call) from (do_idle+0xf8/0x150)
(do_idle) from (cpu_startup_entry+0x18/0x20)
(cpu_startup_entry) from (0xc01530)
This commit therefore adds calls to mem_dump_obj(rhp) to output some
information, for example:
slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256
This provides the rough size of the memory block and the offset of the
rcu_head structure, which as least provides at least a few clues to help
locate the problem. If the problem is reproducible, additional slab
debugging can be enabled, for example, CONFIG_DEBUG_SLAB=y, which can
provide significantly more information.
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
2023-08-05 11:17:26 +08:00
|
|
|
debug_rcu_head_callback(rhp);
|
2017-05-04 14:29:16 -07:00
|
|
|
local_bh_disable();
|
|
|
|
rhp->func(rhp);
|
|
|
|
local_bh_enable();
|
2017-03-25 09:59:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2017-05-04 14:29:16 -07:00
|
|
|
* Enable rescheduling, and if there are more callbacks,
|
|
|
|
* reschedule ourselves. This can race with a call_srcu()
|
|
|
|
* at interrupt level, but the ->srcu_gp_running checks will
|
|
|
|
* straighten that out.
|
2017-03-25 09:59:38 -07:00
|
|
|
*/
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_disable(); // Needed for PREEMPT_AUTO
|
2018-10-28 10:32:51 -07:00
|
|
|
WRITE_ONCE(ssp->srcu_gp_running, false);
|
2024-02-15 09:04:30 -08:00
|
|
|
idx = ULONG_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max));
|
|
|
|
preempt_enable();
|
|
|
|
if (idx)
|
2018-10-28 10:32:51 -07:00
|
|
|
schedule_work(&ssp->srcu_work);
|
2017-03-25 09:59:38 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(srcu_drive_gp);
|
|
|
|
|
2020-11-13 09:37:39 -08:00
|
|
|
static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
|
|
|
|
{
|
2022-08-02 15:32:47 -07:00
|
|
|
unsigned long cookie;
|
2020-11-13 12:54:48 -08:00
|
|
|
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_disable(); // Needed for PREEMPT_AUTO
|
2020-11-13 12:54:48 -08:00
|
|
|
cookie = get_state_synchronize_srcu(ssp);
|
2024-02-15 09:04:30 -08:00
|
|
|
if (ULONG_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) {
|
|
|
|
preempt_enable();
|
2020-11-13 12:54:48 -08:00
|
|
|
return;
|
2024-02-15 09:04:30 -08:00
|
|
|
}
|
2020-11-13 12:54:48 -08:00
|
|
|
WRITE_ONCE(ssp->srcu_idx_max, cookie);
|
2020-11-13 09:37:39 -08:00
|
|
|
if (!READ_ONCE(ssp->srcu_gp_running)) {
|
|
|
|
if (likely(srcu_init_done))
|
|
|
|
schedule_work(&ssp->srcu_work);
|
|
|
|
else if (list_empty(&ssp->srcu_work.entry))
|
|
|
|
list_add(&ssp->srcu_work.entry, &srcu_boot_list);
|
|
|
|
}
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_enable();
|
2020-11-13 09:37:39 -08:00
|
|
|
}
|
|
|
|
|
2017-03-25 09:59:38 -07:00
|
|
|
/*
|
|
|
|
* Enqueue an SRCU callback on the specified srcu_struct structure,
|
|
|
|
* initiating grace-period processing if it is not already running.
|
|
|
|
*/
|
2018-10-28 10:32:51 -07:00
|
|
|
void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
|
2017-03-25 09:59:38 -07:00
|
|
|
rcu_callback_t func)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
2017-05-04 14:29:16 -07:00
|
|
|
rhp->func = func;
|
|
|
|
rhp->next = NULL;
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_disable(); // Needed for PREEMPT_AUTO
|
2017-03-25 09:59:38 -07:00
|
|
|
local_irq_save(flags);
|
2018-10-28 10:32:51 -07:00
|
|
|
*ssp->srcu_cb_tail = rhp;
|
|
|
|
ssp->srcu_cb_tail = &rhp->next;
|
2017-03-25 09:59:38 -07:00
|
|
|
local_irq_restore(flags);
|
2020-11-13 09:37:39 -08:00
|
|
|
srcu_gp_start_if_needed(ssp);
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_enable();
|
2017-03-25 09:59:38 -07:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(call_srcu);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* synchronize_srcu - wait for prior SRCU read-side critical-section completion
|
|
|
|
*/
|
2018-10-28 10:32:51 -07:00
|
|
|
void synchronize_srcu(struct srcu_struct *ssp)
|
2017-03-25 09:59:38 -07:00
|
|
|
{
|
|
|
|
struct rcu_synchronize rs;
|
|
|
|
|
2023-01-12 22:59:54 -08:00
|
|
|
srcu_lock_sync(&ssp->dep_map);
|
|
|
|
|
2022-11-09 15:36:38 +08:00
|
|
|
RCU_LOCKDEP_WARN(lockdep_is_held(ssp) ||
|
|
|
|
lock_is_held(&rcu_bh_lock_map) ||
|
|
|
|
lock_is_held(&rcu_lock_map) ||
|
|
|
|
lock_is_held(&rcu_sched_lock_map),
|
|
|
|
"Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
|
|
|
|
|
|
|
|
if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
|
|
|
|
return;
|
|
|
|
|
|
|
|
might_sleep();
|
2017-03-25 09:59:38 -07:00
|
|
|
init_rcu_head_on_stack(&rs.head);
|
|
|
|
init_completion(&rs.completion);
|
2018-10-28 10:32:51 -07:00
|
|
|
call_srcu(ssp, &rs.head, wakeme_after_rcu);
|
2017-03-25 09:59:38 -07:00
|
|
|
wait_for_completion(&rs.completion);
|
|
|
|
destroy_rcu_head_on_stack(&rs.head);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(synchronize_srcu);
|
2017-05-26 16:16:40 -07:00
|
|
|
|
2020-11-13 12:54:48 -08:00
|
|
|
/*
|
|
|
|
* get_state_synchronize_srcu - Provide an end-of-grace-period cookie
|
|
|
|
*/
|
|
|
|
unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
|
|
|
|
{
|
|
|
|
unsigned long ret;
|
|
|
|
|
|
|
|
barrier();
|
|
|
|
ret = (READ_ONCE(ssp->srcu_idx) + 3) & ~0x1;
|
|
|
|
barrier();
|
2022-08-02 15:32:47 -07:00
|
|
|
return ret;
|
2020-11-13 12:54:48 -08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(get_state_synchronize_srcu);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* start_poll_synchronize_srcu - Provide cookie and start grace period
|
|
|
|
*
|
|
|
|
* The difference between this and get_state_synchronize_srcu() is that
|
|
|
|
* this function ensures that the poll_state_synchronize_srcu() will
|
|
|
|
* eventually return the value true.
|
|
|
|
*/
|
|
|
|
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
|
|
|
|
{
|
2024-02-15 09:04:30 -08:00
|
|
|
unsigned long ret;
|
2020-11-13 12:54:48 -08:00
|
|
|
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_disable(); // Needed for PREEMPT_AUTO
|
|
|
|
ret = get_state_synchronize_srcu(ssp);
|
2020-11-13 12:54:48 -08:00
|
|
|
srcu_gp_start_if_needed(ssp);
|
2024-02-15 09:04:30 -08:00
|
|
|
preempt_enable();
|
2020-11-13 12:54:48 -08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* poll_state_synchronize_srcu - Has cookie's grace period ended?
|
|
|
|
*/
|
|
|
|
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
|
|
|
|
{
|
2022-08-02 15:32:47 -07:00
|
|
|
unsigned long cur_s = READ_ONCE(ssp->srcu_idx);
|
2020-11-13 12:54:48 -08:00
|
|
|
|
|
|
|
barrier();
|
2024-06-14 13:26:44 -07:00
|
|
|
return cookie == SRCU_GET_STATE_COMPLETED ||
|
|
|
|
ULONG_CMP_GE(cur_s, cookie) || ULONG_CMP_LT(cur_s, cookie - 3);
|
2020-11-13 12:54:48 -08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu);
|
|
|
|
|
2017-05-26 16:16:40 -07:00
|
|
|
/* Lockdep diagnostics. */
|
|
|
|
void __init rcu_scheduler_starting(void)
|
|
|
|
{
|
|
|
|
rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
|
|
|
|
}
|
srcu: Make call_srcu() available during very early boot
Event tracing is moving to SRCU in order to take advantage of the fact
that SRCU may be safely used from idle and even offline CPUs. However,
event tracing can invoke call_srcu() very early in the boot process,
even before workqueue_init_early() is invoked (let alone rcu_init()).
Therefore, call_srcu()'s attempts to queue work fail miserably.
This commit therefore detects this situation, and refrains from attempting
to queue work before rcu_init() time, but does everything else that it
would have done, and in addition, adds the srcu_struct to a global list.
The rcu_init() function now invokes a new srcu_init() function, which
is empty if CONFIG_SRCU=n. Otherwise, srcu_init() queues work for
each srcu_struct on the list. This all happens early enough in boot
that there is but a single CPU with interrupts disabled, which allows
synchronization to be dispensed with.
Of course, the queued work won't actually be invoked until after
workqueue_init() is invoked, which happens shortly after the scheduler
is up and running. This means that although call_srcu() may be invoked
any time after per-CPU variables have been set up, there is still a very
narrow window when synchronize_srcu() won't work, and this window
extends from the time that the scheduler starts until the time that
workqueue_init() returns. This can be fixed in a manner similar to
the fix for synchronize_rcu_expedited() and friends, but until someone
actually needs to use synchronize_srcu() during this window, this fix
is added churn for no benefit.
Finally, note that Tree SRCU's new srcu_init() function invokes
queue_work() rather than the queue_delayed_work() function that is
invoked post-boot. The reason is that queue_delayed_work() will (as you
would expect) post a timer, and timers have not yet been initialized.
So use of queue_work() avoids the complaints about use of uninitialized
spinlocks that would otherwise result. Besides, some delay is already
provide by the aforementioned fact that the queued work won't actually
be invoked until after the scheduler is up and running.
Requested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2018-08-14 08:45:54 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Queue work for srcu_struct structures with early boot callbacks.
|
|
|
|
* The work won't actually execute until the workqueue initialization
|
|
|
|
* phase that takes place after the scheduler starts.
|
|
|
|
*/
|
|
|
|
void __init srcu_init(void)
|
|
|
|
{
|
2018-10-28 10:32:51 -07:00
|
|
|
struct srcu_struct *ssp;
|
srcu: Make call_srcu() available during very early boot
Event tracing is moving to SRCU in order to take advantage of the fact
that SRCU may be safely used from idle and even offline CPUs. However,
event tracing can invoke call_srcu() very early in the boot process,
even before workqueue_init_early() is invoked (let alone rcu_init()).
Therefore, call_srcu()'s attempts to queue work fail miserably.
This commit therefore detects this situation, and refrains from attempting
to queue work before rcu_init() time, but does everything else that it
would have done, and in addition, adds the srcu_struct to a global list.
The rcu_init() function now invokes a new srcu_init() function, which
is empty if CONFIG_SRCU=n. Otherwise, srcu_init() queues work for
each srcu_struct on the list. This all happens early enough in boot
that there is but a single CPU with interrupts disabled, which allows
synchronization to be dispensed with.
Of course, the queued work won't actually be invoked until after
workqueue_init() is invoked, which happens shortly after the scheduler
is up and running. This means that although call_srcu() may be invoked
any time after per-CPU variables have been set up, there is still a very
narrow window when synchronize_srcu() won't work, and this window
extends from the time that the scheduler starts until the time that
workqueue_init() returns. This can be fixed in a manner similar to
the fix for synchronize_rcu_expedited() and friends, but until someone
actually needs to use synchronize_srcu() during this window, this fix
is added churn for no benefit.
Finally, note that Tree SRCU's new srcu_init() function invokes
queue_work() rather than the queue_delayed_work() function that is
invoked post-boot. The reason is that queue_delayed_work() will (as you
would expect) post a timer, and timers have not yet been initialized.
So use of queue_work() avoids the complaints about use of uninitialized
spinlocks that would otherwise result. Besides, some delay is already
provide by the aforementioned fact that the queued work won't actually
be invoked until after the scheduler is up and running.
Requested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2018-08-14 08:45:54 -07:00
|
|
|
|
|
|
|
srcu_init_done = true;
|
|
|
|
while (!list_empty(&srcu_boot_list)) {
|
2018-10-28 10:32:51 -07:00
|
|
|
ssp = list_first_entry(&srcu_boot_list,
|
2018-08-14 14:41:49 -07:00
|
|
|
struct srcu_struct, srcu_work.entry);
|
2018-10-28 10:32:51 -07:00
|
|
|
list_del_init(&ssp->srcu_work.entry);
|
|
|
|
schedule_work(&ssp->srcu_work);
|
srcu: Make call_srcu() available during very early boot
Event tracing is moving to SRCU in order to take advantage of the fact
that SRCU may be safely used from idle and even offline CPUs. However,
event tracing can invoke call_srcu() very early in the boot process,
even before workqueue_init_early() is invoked (let alone rcu_init()).
Therefore, call_srcu()'s attempts to queue work fail miserably.
This commit therefore detects this situation, and refrains from attempting
to queue work before rcu_init() time, but does everything else that it
would have done, and in addition, adds the srcu_struct to a global list.
The rcu_init() function now invokes a new srcu_init() function, which
is empty if CONFIG_SRCU=n. Otherwise, srcu_init() queues work for
each srcu_struct on the list. This all happens early enough in boot
that there is but a single CPU with interrupts disabled, which allows
synchronization to be dispensed with.
Of course, the queued work won't actually be invoked until after
workqueue_init() is invoked, which happens shortly after the scheduler
is up and running. This means that although call_srcu() may be invoked
any time after per-CPU variables have been set up, there is still a very
narrow window when synchronize_srcu() won't work, and this window
extends from the time that the scheduler starts until the time that
workqueue_init() returns. This can be fixed in a manner similar to
the fix for synchronize_rcu_expedited() and friends, but until someone
actually needs to use synchronize_srcu() during this window, this fix
is added churn for no benefit.
Finally, note that Tree SRCU's new srcu_init() function invokes
queue_work() rather than the queue_delayed_work() function that is
invoked post-boot. The reason is that queue_delayed_work() will (as you
would expect) post a timer, and timers have not yet been initialized.
So use of queue_work() avoids the complaints about use of uninitialized
spinlocks that would otherwise result. Besides, some delay is already
provide by the aforementioned fact that the queued work won't actually
be invoked until after the scheduler is up and running.
Requested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2018-08-14 08:45:54 -07:00
|
|
|
}
|
|
|
|
}
|