rcu: Remove restrictions on no-CBs CPUs

Currently, CPU 0 is constrained to not be a no-CBs CPU, and furthermore
at least one no-CBs CPU must remain online at any given time.  These
restrictions are problematic in some situations, such as cases where
all CPUs must run a real-time workload that needs to be insulated from
OS jitter and latencies due to RCU callback invocation.  This commit
therefore provides no-CBs CPUs a (very crude and energy-inefficient)
way to start and to wait for grace periods independently of the normal
RCU callback mechanisms.  This approach allows any or all of the CPUs to
be designated as no-CBs CPUs, and allows any proper subset of the CPUs
(whether no-CBs CPUs or not) to be offlined.

This commit also provides a fix for a locking bug spotted by Xie
ChanglongX <changlongx.xie@intel.com>.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
Paul E. McKenney 2013-01-07 13:37:42 -08:00 committed by Paul E. McKenney
parent f6161aa153
commit 34ed62461a
4 changed files with 58 additions and 130 deletions

View File

@ -655,7 +655,7 @@ config RCU_BOOST_DELAY
Accept the default if unsure. Accept the default if unsure.
config RCU_NOCB_CPU config RCU_NOCB_CPU
bool "Offload RCU callback processing from boot-selected CPUs" bool "Offload RCU callback processing from boot-selected CPUs (EXPERIMENTAL"
depends on TREE_RCU || TREE_PREEMPT_RCU depends on TREE_RCU || TREE_PREEMPT_RCU
default n default n
help help
@ -673,7 +673,7 @@ config RCU_NOCB_CPU
callback, and (2) affinity or cgroups can be used to force callback, and (2) affinity or cgroups can be used to force
the kthreads to run on whatever set of CPUs is desired. the kthreads to run on whatever set of CPUs is desired.
Say Y here if you want reduced OS jitter on selected CPUs. Say Y here if you want to help to debug reduced OS jitter.
Say N here if you are unsure. Say N here if you are unsure.
endmenu # "RCU Subsystem" endmenu # "RCU Subsystem"

View File

@ -310,6 +310,8 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
if (rcu_gp_in_progress(rsp)) if (rcu_gp_in_progress(rsp))
return 0; /* No, a grace period is already in progress. */ return 0; /* No, a grace period is already in progress. */
if (rcu_nocb_needs_gp(rdp))
return 1; /* Yes, a no-CBs CPU needs one. */
if (!rdp->nxttail[RCU_NEXT_TAIL]) if (!rdp->nxttail[RCU_NEXT_TAIL])
return 0; /* No, this is a no-CBs (or offline) CPU. */ return 0; /* No, this is a no-CBs (or offline) CPU. */
if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
@ -1035,10 +1037,11 @@ static void init_callback_list(struct rcu_data *rdp)
{ {
int i; int i;
if (init_nocb_callback_list(rdp))
return;
rdp->nxtlist = NULL; rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++) for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist; rdp->nxttail[i] = &rdp->nxtlist;
init_nocb_callback_list(rdp);
} }
/* /*
@ -2909,7 +2912,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode; struct rcu_node *rnp = rdp->mynode;
struct rcu_state *rsp; struct rcu_state *rsp;
int ret = NOTIFY_OK;
trace_rcu_utilization("Start CPU hotplug"); trace_rcu_utilization("Start CPU hotplug");
switch (action) { switch (action) {
@ -2923,10 +2925,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
rcu_boost_kthread_setaffinity(rnp, -1); rcu_boost_kthread_setaffinity(rnp, -1);
break; break;
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE:
if (nocb_cpu_expendable(cpu)) rcu_boost_kthread_setaffinity(rnp, cpu);
rcu_boost_kthread_setaffinity(rnp, cpu);
else
ret = NOTIFY_BAD;
break; break;
case CPU_DYING: case CPU_DYING:
case CPU_DYING_FROZEN: case CPU_DYING_FROZEN:
@ -2950,7 +2949,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
break; break;
} }
trace_rcu_utilization("End CPU hotplug"); trace_rcu_utilization("End CPU hotplug");
return ret; return NOTIFY_OK;
} }
/* /*
@ -3170,7 +3169,6 @@ void __init rcu_init(void)
rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_sched_state, &rcu_sched_data);
rcu_init_one(&rcu_bh_state, &rcu_bh_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data);
__rcu_init_preempt(); __rcu_init_preempt();
rcu_init_nocb();
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
/* /*

View File

@ -326,6 +326,7 @@ struct rcu_data {
int nocb_p_count_lazy; /* (approximate). */ int nocb_p_count_lazy; /* (approximate). */
wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
struct task_struct *nocb_kthread; struct task_struct *nocb_kthread;
bool nocb_needs_gp;
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
int cpu; int cpu;
@ -375,12 +376,6 @@ struct rcu_state {
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
void (*func)(struct rcu_head *head)); void (*func)(struct rcu_head *head));
#ifdef CONFIG_RCU_NOCB_CPU
void (*call_remote)(struct rcu_head *head,
void (*func)(struct rcu_head *head));
/* call_rcu() flavor, but for */
/* placing on remote CPU. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
/* The following fields are guarded by the root rcu_node's lock. */ /* The following fields are guarded by the root rcu_node's lock. */
@ -529,16 +524,15 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
static void print_cpu_stall_info_end(void); static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static void increment_cpu_stall_ticks(void); static void increment_cpu_stall_ticks(void);
static int rcu_nocb_needs_gp(struct rcu_data *rdp);
static bool is_nocb_cpu(int cpu); static bool is_nocb_cpu(int cpu);
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
bool lazy); bool lazy);
static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
struct rcu_data *rdp); struct rcu_data *rdp);
static bool nocb_cpu_expendable(int cpu);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
static void init_nocb_callback_list(struct rcu_data *rdp); static bool init_nocb_callback_list(struct rcu_data *rdp);
static void __init rcu_init_nocb(void);
#endif /* #ifndef RCU_TREE_NONCORE */ #endif /* #ifndef RCU_TREE_NONCORE */

View File

@ -86,10 +86,6 @@ static void __init rcu_bootup_announce_oddness(void)
printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
#ifdef CONFIG_RCU_NOCB_CPU #ifdef CONFIG_RCU_NOCB_CPU
if (have_rcu_nocb_mask) { if (have_rcu_nocb_mask) {
if (cpumask_test_cpu(0, rcu_nocb_mask)) {
cpumask_clear_cpu(0, rcu_nocb_mask);
pr_info("\tCPU 0: illegal no-CBs CPU (cleared).\n");
}
cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);
if (rcu_nocb_poll) if (rcu_nocb_poll)
@ -2165,6 +2161,14 @@ static int __init parse_rcu_nocb_poll(char *arg)
} }
early_param("rcu_nocb_poll", parse_rcu_nocb_poll); early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
/*
* Does this CPU needs a grace period due to offloaded callbacks?
*/
static int rcu_nocb_needs_gp(struct rcu_data *rdp)
{
return rdp->nocb_needs_gp;
}
/* Is the specified CPU a no-CPUs CPU? */ /* Is the specified CPU a no-CPUs CPU? */
static bool is_nocb_cpu(int cpu) static bool is_nocb_cpu(int cpu)
{ {
@ -2265,95 +2269,39 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
} }
/* /*
* There must be at least one non-no-CBs CPU in operation at any given * If necessary, kick off a new grace period, and either way wait
* time, because no-CBs CPUs are not capable of initiating grace periods * for a subsequent grace period to complete.
* independently. This function therefore complains if the specified
* CPU is the last non-no-CBs CPU, allowing the CPU-hotplug system to
* avoid offlining the last such CPU. (Recursion is a wonderful thing,
* but you have to have a base case!)
*/ */
static bool nocb_cpu_expendable(int cpu) static void rcu_nocb_wait_gp(struct rcu_data *rdp)
{ {
cpumask_var_t non_nocb_cpus; unsigned long c;
int ret; unsigned long flags;
unsigned long j;
struct rcu_node *rnp = rdp->mynode;
raw_spin_lock_irqsave(&rnp->lock, flags);
c = rnp->completed + 2;
rdp->nocb_needs_gp = true;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
/* /*
* If there are no no-CB CPUs or if this CPU is not a no-CB CPU, * Wait for the grace period. Do so interruptibly to avoid messing
* then offlining this CPU is harmless. Let it happen. * up the load average.
*/ */
if (!have_rcu_nocb_mask || is_nocb_cpu(cpu)) for (;;) {
return 1; j = jiffies;
schedule_timeout_interruptible(2);
/* If no memory, play it safe and keep the CPU around. */ raw_spin_lock_irqsave(&rnp->lock, flags);
if (!alloc_cpumask_var(&non_nocb_cpus, GFP_NOIO)) if (ULONG_CMP_GE(rnp->completed, c)) {
return 0; rdp->nocb_needs_gp = false;
cpumask_andnot(non_nocb_cpus, cpu_online_mask, rcu_nocb_mask); raw_spin_unlock_irqrestore(&rnp->lock, flags);
cpumask_clear_cpu(cpu, non_nocb_cpus); break;
ret = !cpumask_empty(non_nocb_cpus); }
free_cpumask_var(non_nocb_cpus); if (j == jiffies)
return ret; flush_signals(current);
} raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/* smp_mb(); /* Ensure that CB invocation happens after GP end. */
* Helper structure for remote registry of RCU callbacks.
* This is needed for when a no-CBs CPU needs to start a grace period.
* If it just invokes call_rcu(), the resulting callback will be queued,
* which can result in deadlock.
*/
struct rcu_head_remote {
struct rcu_head *rhp;
call_rcu_func_t *crf;
void (*func)(struct rcu_head *rhp);
};
/*
* Register a callback as specified by the rcu_head_remote struct.
* This function is intended to be invoked via smp_call_function_single().
*/
static void call_rcu_local(void *arg)
{
struct rcu_head_remote *rhrp =
container_of(arg, struct rcu_head_remote, rhp);
rhrp->crf(rhrp->rhp, rhrp->func);
}
/*
* Set up an rcu_head_remote structure and the invoke call_rcu_local()
* on CPU 0 (which is guaranteed to be a non-no-CBs CPU) via
* smp_call_function_single().
*/
static void invoke_crf_remote(struct rcu_head *rhp,
void (*func)(struct rcu_head *rhp),
call_rcu_func_t crf)
{
struct rcu_head_remote rhr;
rhr.rhp = rhp;
rhr.crf = crf;
rhr.func = func;
smp_call_function_single(0, call_rcu_local, &rhr, 1);
}
/*
* Helper functions to be passed to wait_rcu_gp(), each of which
* invokes invoke_crf_remote() to register a callback appropriately.
*/
static void __maybe_unused
call_rcu_preempt_remote(struct rcu_head *rhp,
void (*func)(struct rcu_head *rhp))
{
invoke_crf_remote(rhp, func, call_rcu);
}
static void call_rcu_bh_remote(struct rcu_head *rhp,
void (*func)(struct rcu_head *rhp))
{
invoke_crf_remote(rhp, func, call_rcu_bh);
}
static void call_rcu_sched_remote(struct rcu_head *rhp,
void (*func)(struct rcu_head *rhp))
{
invoke_crf_remote(rhp, func, call_rcu_sched);
} }
/* /*
@ -2390,7 +2338,7 @@ static int rcu_nocb_kthread(void *arg)
cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
ACCESS_ONCE(rdp->nocb_p_count) += c; ACCESS_ONCE(rdp->nocb_p_count) += c;
ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl; ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
wait_rcu_gp(rdp->rsp->call_remote); rcu_nocb_wait_gp(rdp);
/* Each pass through the following loop invokes a callback. */ /* Each pass through the following loop invokes a callback. */
trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
@ -2443,26 +2391,22 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
} }
/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
static void init_nocb_callback_list(struct rcu_data *rdp) static bool init_nocb_callback_list(struct rcu_data *rdp)
{ {
if (rcu_nocb_mask == NULL || if (rcu_nocb_mask == NULL ||
!cpumask_test_cpu(rdp->cpu, rcu_nocb_mask)) !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
return; return false;
rdp->nxttail[RCU_NEXT_TAIL] = NULL; rdp->nxttail[RCU_NEXT_TAIL] = NULL;
} return true;
/* Initialize the ->call_remote fields in the rcu_state structures. */
static void __init rcu_init_nocb(void)
{
#ifdef CONFIG_PREEMPT_RCU
rcu_preempt_state.call_remote = call_rcu_preempt_remote;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
rcu_bh_state.call_remote = call_rcu_bh_remote;
rcu_sched_state.call_remote = call_rcu_sched_remote;
} }
#else /* #ifdef CONFIG_RCU_NOCB_CPU */ #else /* #ifdef CONFIG_RCU_NOCB_CPU */
static int rcu_nocb_needs_gp(struct rcu_data *rdp)
{
return 0;
}
static bool is_nocb_cpu(int cpu) static bool is_nocb_cpu(int cpu)
{ {
return false; return false;
@ -2480,11 +2424,6 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
return 0; return 0;
} }
static bool nocb_cpu_expendable(int cpu)
{
return 1;
}
static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
{ {
} }
@ -2493,12 +2432,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
{ {
} }
static void init_nocb_callback_list(struct rcu_data *rdp) static bool init_nocb_callback_list(struct rcu_data *rdp)
{
}
static void __init rcu_init_nocb(void)
{ {
return false;
} }
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */