mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-12 00:38:55 +00:00
rcu: Kill rnp->ofl_seq and use only rcu_state.ofl_lock for exclusion
If we allow architectures to bring APs online in parallel, then we end up requiring rcu_cpu_starting() to be reentrant. But currently, the manipulation of rnp->ofl_seq is not thread-safe. However, rnp->ofl_seq is also fairly much pointless anyway since both rcu_cpu_starting() and rcu_report_dead() hold rcu_state.ofl_lock for fairly much the whole time that rnp->ofl_seq is set to an odd number to indicate that an operation is in progress. So drop rnp->ofl_seq completely, and use only rcu_state.ofl_lock. This has a couple of minor complexities: lockdep will complain when we take rcu_state.ofl_lock, and currently accepts the 'excuse' of having an odd value in rnp->ofl_seq. So switch it to an arch_spinlock_t to avoid that false positive complaint. Since we're killing rnp->ofl_seq of course that 'excuse' has to be changed too, so make it check for arch_spin_is_locked(rcu_state.ofl_lock). There's no arch_spin_lock_irqsave() so we have to manually save and restore local interrupts around the locking. At Paul's request based on Neeraj's analysis, make rcu_gp_init not just wait but *exclude* any CPU online/offline activity, which was fairly much true already by virtue of it holding rcu_state.ofl_lock. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
This commit is contained in:
parent
da123016ca
commit
82980b1622
@ -91,7 +91,7 @@ static struct rcu_state rcu_state = {
|
|||||||
.abbr = RCU_ABBR,
|
.abbr = RCU_ABBR,
|
||||||
.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
|
.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
|
||||||
.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
|
.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
|
||||||
.ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock),
|
.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Dump rcu_node combining tree at boot to verify correct setup. */
|
/* Dump rcu_node combining tree at boot to verify correct setup. */
|
||||||
@ -1175,7 +1175,15 @@ bool rcu_lockdep_current_cpu_online(void)
|
|||||||
preempt_disable_notrace();
|
preempt_disable_notrace();
|
||||||
rdp = this_cpu_ptr(&rcu_data);
|
rdp = this_cpu_ptr(&rcu_data);
|
||||||
rnp = rdp->mynode;
|
rnp = rdp->mynode;
|
||||||
if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
|
/*
|
||||||
|
* Strictly, we care here about the case where the current CPU is
|
||||||
|
* in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
|
||||||
|
* not being up to date. So arch_spin_is_locked() might have a
|
||||||
|
* false positive if it's held by some *other* CPU, but that's
|
||||||
|
* OK because that just means a false *negative* on the warning.
|
||||||
|
*/
|
||||||
|
if (rdp->grpmask & rcu_rnp_online_cpus(rnp) ||
|
||||||
|
arch_spin_is_locked(&rcu_state.ofl_lock))
|
||||||
ret = true;
|
ret = true;
|
||||||
preempt_enable_notrace();
|
preempt_enable_notrace();
|
||||||
return ret;
|
return ret;
|
||||||
@ -1739,7 +1747,6 @@ static void rcu_strict_gp_boundary(void *unused)
|
|||||||
*/
|
*/
|
||||||
static noinline_for_stack bool rcu_gp_init(void)
|
static noinline_for_stack bool rcu_gp_init(void)
|
||||||
{
|
{
|
||||||
unsigned long firstseq;
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned long oldmask;
|
unsigned long oldmask;
|
||||||
unsigned long mask;
|
unsigned long mask;
|
||||||
@ -1782,22 +1789,17 @@ static noinline_for_stack bool rcu_gp_init(void)
|
|||||||
* of RCU's Requirements documentation.
|
* of RCU's Requirements documentation.
|
||||||
*/
|
*/
|
||||||
WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
|
WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
|
||||||
|
/* Exclude CPU hotplug operations. */
|
||||||
rcu_for_each_leaf_node(rnp) {
|
rcu_for_each_leaf_node(rnp) {
|
||||||
// Wait for CPU-hotplug operations that might have
|
local_irq_save(flags);
|
||||||
// started before this grace period did.
|
arch_spin_lock(&rcu_state.ofl_lock);
|
||||||
smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
|
raw_spin_lock_rcu_node(rnp);
|
||||||
firstseq = READ_ONCE(rnp->ofl_seq);
|
|
||||||
if (firstseq & 0x1)
|
|
||||||
while (firstseq == READ_ONCE(rnp->ofl_seq))
|
|
||||||
schedule_timeout_idle(1); // Can't wake unless RCU is watching.
|
|
||||||
smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
|
|
||||||
raw_spin_lock(&rcu_state.ofl_lock);
|
|
||||||
raw_spin_lock_irq_rcu_node(rnp);
|
|
||||||
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
|
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
|
||||||
!rnp->wait_blkd_tasks) {
|
!rnp->wait_blkd_tasks) {
|
||||||
/* Nothing to do on this leaf rcu_node structure. */
|
/* Nothing to do on this leaf rcu_node structure. */
|
||||||
raw_spin_unlock_irq_rcu_node(rnp);
|
raw_spin_unlock_rcu_node(rnp);
|
||||||
raw_spin_unlock(&rcu_state.ofl_lock);
|
arch_spin_unlock(&rcu_state.ofl_lock);
|
||||||
|
local_irq_restore(flags);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1832,8 +1834,9 @@ static noinline_for_stack bool rcu_gp_init(void)
|
|||||||
rcu_cleanup_dead_rnp(rnp);
|
rcu_cleanup_dead_rnp(rnp);
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irq_rcu_node(rnp);
|
raw_spin_unlock_rcu_node(rnp);
|
||||||
raw_spin_unlock(&rcu_state.ofl_lock);
|
arch_spin_unlock(&rcu_state.ofl_lock);
|
||||||
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */
|
rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */
|
||||||
|
|
||||||
@ -4287,11 +4290,10 @@ void rcu_cpu_starting(unsigned int cpu)
|
|||||||
|
|
||||||
rnp = rdp->mynode;
|
rnp = rdp->mynode;
|
||||||
mask = rdp->grpmask;
|
mask = rdp->grpmask;
|
||||||
WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
|
local_irq_save(flags);
|
||||||
WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
|
arch_spin_lock(&rcu_state.ofl_lock);
|
||||||
rcu_dynticks_eqs_online();
|
rcu_dynticks_eqs_online();
|
||||||
smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
|
raw_spin_lock_rcu_node(rnp);
|
||||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
|
||||||
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
|
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
|
||||||
newcpu = !(rnp->expmaskinitnext & mask);
|
newcpu = !(rnp->expmaskinitnext & mask);
|
||||||
rnp->expmaskinitnext |= mask;
|
rnp->expmaskinitnext |= mask;
|
||||||
@ -4304,15 +4306,18 @@ void rcu_cpu_starting(unsigned int cpu)
|
|||||||
|
|
||||||
/* An incoming CPU should never be blocking a grace period. */
|
/* An incoming CPU should never be blocking a grace period. */
|
||||||
if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
|
if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
|
||||||
|
/* rcu_report_qs_rnp() *really* wants some flags to restore */
|
||||||
|
unsigned long flags2;
|
||||||
|
|
||||||
|
local_irq_save(flags2);
|
||||||
rcu_disable_urgency_upon_qs(rdp);
|
rcu_disable_urgency_upon_qs(rdp);
|
||||||
/* Report QS -after- changing ->qsmaskinitnext! */
|
/* Report QS -after- changing ->qsmaskinitnext! */
|
||||||
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
|
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2);
|
||||||
} else {
|
} else {
|
||||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
raw_spin_unlock_rcu_node(rnp);
|
||||||
}
|
}
|
||||||
smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
|
arch_spin_unlock(&rcu_state.ofl_lock);
|
||||||
WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
|
local_irq_restore(flags);
|
||||||
WARN_ON_ONCE(rnp->ofl_seq & 0x1);
|
|
||||||
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
|
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4326,7 +4331,7 @@ void rcu_cpu_starting(unsigned int cpu)
|
|||||||
*/
|
*/
|
||||||
void rcu_report_dead(unsigned int cpu)
|
void rcu_report_dead(unsigned int cpu)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags, seq_flags;
|
||||||
unsigned long mask;
|
unsigned long mask;
|
||||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||||
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
|
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
|
||||||
@ -4340,10 +4345,8 @@ void rcu_report_dead(unsigned int cpu)
|
|||||||
|
|
||||||
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
|
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
|
||||||
mask = rdp->grpmask;
|
mask = rdp->grpmask;
|
||||||
WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
|
local_irq_save(seq_flags);
|
||||||
WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
|
arch_spin_lock(&rcu_state.ofl_lock);
|
||||||
smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
|
|
||||||
raw_spin_lock(&rcu_state.ofl_lock);
|
|
||||||
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
|
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
|
||||||
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
|
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
|
||||||
rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);
|
rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);
|
||||||
@ -4354,10 +4357,8 @@ void rcu_report_dead(unsigned int cpu)
|
|||||||
}
|
}
|
||||||
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
|
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
|
||||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||||
raw_spin_unlock(&rcu_state.ofl_lock);
|
arch_spin_unlock(&rcu_state.ofl_lock);
|
||||||
smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
|
local_irq_restore(seq_flags);
|
||||||
WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
|
|
||||||
WARN_ON_ONCE(rnp->ofl_seq & 0x1);
|
|
||||||
|
|
||||||
rdp->cpu_started = false;
|
rdp->cpu_started = false;
|
||||||
}
|
}
|
||||||
|
@ -56,8 +56,6 @@ struct rcu_node {
|
|||||||
/* Initialized from ->qsmaskinitnext at the */
|
/* Initialized from ->qsmaskinitnext at the */
|
||||||
/* beginning of each grace period. */
|
/* beginning of each grace period. */
|
||||||
unsigned long qsmaskinitnext;
|
unsigned long qsmaskinitnext;
|
||||||
unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */
|
|
||||||
/* Online CPUs for next grace period. */
|
|
||||||
unsigned long expmask; /* CPUs or groups that need to check in */
|
unsigned long expmask; /* CPUs or groups that need to check in */
|
||||||
/* to allow the current expedited GP */
|
/* to allow the current expedited GP */
|
||||||
/* to complete. */
|
/* to complete. */
|
||||||
@ -355,7 +353,7 @@ struct rcu_state {
|
|||||||
const char *name; /* Name of structure. */
|
const char *name; /* Name of structure. */
|
||||||
char abbr; /* Abbreviated name. */
|
char abbr; /* Abbreviated name. */
|
||||||
|
|
||||||
raw_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
|
arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
|
||||||
/* Synchronize offline with */
|
/* Synchronize offline with */
|
||||||
/* GP pre-initialization. */
|
/* GP pre-initialization. */
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user