rcu: Break rcu_node_0 --> &rq->__lock order

Commit 851a723e45 ("sched: Always clear user_cpus_ptr in
do_set_cpus_allowed()") added a kfree() call to free any user
provided affinity mask, if present. It was changed later to use
kfree_rcu() in commit 9a5418bc48 ("sched/core: Use kfree_rcu()
in do_set_cpus_allowed()") to avoid a circular locking dependency
problem.

It turns out that even kfree_rcu() isn't safe for avoiding
circular locking problem. As reported by kernel test robot,
the following circular locking dependency now exists:

  &rdp->nocb_lock --> rcu_node_0 --> &rq->__lock

Solve this by breaking the rcu_node_0 --> &rq->__lock chain by moving
the resched_cpu() out from under rcu_node lock.

[peterz: heavily borrowed from Waiman's Changelog]
[paulmck: applied Z qiang feedback]

Fixes: 851a723e45 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()")
Reported-by: kernel test robot <oliver.sang@intel.com>
Acked-by: Waiman Long <longman@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/oe-lkp/202310302207.a25f1a30-oliver.sang@intel.com
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
This commit is contained in:
Peter Zijlstra 2023-10-31 09:53:08 +01:00 committed by Frederic Weisbecker
parent 2656821f1f
commit 85d68222dd

View File

@ -755,14 +755,19 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
} }
/* /*
* Return true if the specified CPU has passed through a quiescent * Returns positive if the specified CPU has passed through a quiescent state
* state by virtue of being in or having passed through an dynticks * by virtue of being in or having passed through an dynticks idle state since
* idle state since the last call to dyntick_save_progress_counter() * the last call to dyntick_save_progress_counter() for this same CPU, or by
* for this same CPU, or by virtue of having been offline. * virtue of having been offline.
*
* Returns negative if the specified CPU needs a force resched.
*
* Returns zero otherwise.
*/ */
static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
{ {
unsigned long jtsq; unsigned long jtsq;
int ret = 0;
struct rcu_node *rnp = rdp->mynode; struct rcu_node *rnp = rdp->mynode;
/* /*
@ -848,8 +853,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
(time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) || (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) ||
rcu_state.cbovld)) { rcu_state.cbovld)) {
WRITE_ONCE(rdp->rcu_urgent_qs, true); WRITE_ONCE(rdp->rcu_urgent_qs, true);
resched_cpu(rdp->cpu);
WRITE_ONCE(rdp->last_fqs_resched, jiffies); WRITE_ONCE(rdp->last_fqs_resched, jiffies);
ret = -1;
} }
/* /*
@ -862,8 +867,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
if (time_after(jiffies, rcu_state.jiffies_resched)) { if (time_after(jiffies, rcu_state.jiffies_resched)) {
if (time_after(jiffies, if (time_after(jiffies,
READ_ONCE(rdp->last_fqs_resched) + jtsq)) { READ_ONCE(rdp->last_fqs_resched) + jtsq)) {
resched_cpu(rdp->cpu);
WRITE_ONCE(rdp->last_fqs_resched, jiffies); WRITE_ONCE(rdp->last_fqs_resched, jiffies);
ret = -1;
} }
if (IS_ENABLED(CONFIG_IRQ_WORK) && if (IS_ENABLED(CONFIG_IRQ_WORK) &&
!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
@ -892,7 +897,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
} }
} }
return 0; return ret;
} }
/* Trace-event wrapper function for trace_rcu_future_grace_period. */ /* Trace-event wrapper function for trace_rcu_future_grace_period. */
@ -2271,15 +2276,15 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
{ {
int cpu; int cpu;
unsigned long flags; unsigned long flags;
unsigned long mask;
struct rcu_data *rdp;
struct rcu_node *rnp; struct rcu_node *rnp;
rcu_state.cbovld = rcu_state.cbovldnext; rcu_state.cbovld = rcu_state.cbovldnext;
rcu_state.cbovldnext = false; rcu_state.cbovldnext = false;
rcu_for_each_leaf_node(rnp) { rcu_for_each_leaf_node(rnp) {
unsigned long mask = 0;
unsigned long rsmask = 0;
cond_resched_tasks_rcu_qs(); cond_resched_tasks_rcu_qs();
mask = 0;
raw_spin_lock_irqsave_rcu_node(rnp, flags); raw_spin_lock_irqsave_rcu_node(rnp, flags);
rcu_state.cbovldnext |= !!rnp->cbovldmask; rcu_state.cbovldnext |= !!rnp->cbovldmask;
if (rnp->qsmask == 0) { if (rnp->qsmask == 0) {
@ -2297,11 +2302,17 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
continue; continue;
} }
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) { for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) {
struct rcu_data *rdp;
int ret;
rdp = per_cpu_ptr(&rcu_data, cpu); rdp = per_cpu_ptr(&rcu_data, cpu);
if (f(rdp)) { ret = f(rdp);
if (ret > 0) {
mask |= rdp->grpmask; mask |= rdp->grpmask;
rcu_disable_urgency_upon_qs(rdp); rcu_disable_urgency_upon_qs(rdp);
} }
if (ret < 0)
rsmask |= rdp->grpmask;
} }
if (mask != 0) { if (mask != 0) {
/* Idle/offline CPUs, report (releases rnp->lock). */ /* Idle/offline CPUs, report (releases rnp->lock). */
@ -2310,6 +2321,9 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
/* Nothing to do here, so just drop the lock. */ /* Nothing to do here, so just drop the lock. */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags); raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
} }
for_each_leaf_node_cpu_mask(rnp, cpu, rsmask)
resched_cpu(cpu);
} }
} }