2005-04-16 15:20:36 -07:00
|
|
|
/*
|
|
|
|
* include/linux/cpu.h - generic cpu definition
|
|
|
|
*
|
|
|
|
* This is mainly for topological representation. We define the
|
|
|
|
* basic 'struct cpu' here, which can be embedded in per-arch
|
|
|
|
* definitions of processors.
|
|
|
|
*
|
|
|
|
* Basic handling of the devices is done in drivers/base/cpu.c
|
|
|
|
*
|
2013-05-03 06:45:48 -04:00
|
|
|
* CPUs are exported via sysfs in the devices/system/cpu
|
2005-04-16 15:20:36 -07:00
|
|
|
* directory.
|
|
|
|
*/
|
|
|
|
#ifndef _LINUX_CPU_H_
|
|
|
|
#define _LINUX_CPU_H_
|
|
|
|
|
|
|
|
#include <linux/node.h>
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/cpumask.h>
|
2016-02-26 18:43:28 +00:00
|
|
|
#include <linux/cpuhotplug.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2012-01-30 11:46:54 -05:00
|
|
|
struct device;
|
2013-10-03 17:24:51 -04:00
|
|
|
struct device_node;
|
2014-09-30 14:48:24 +01:00
|
|
|
struct attribute_group;
|
2012-01-30 11:46:54 -05:00
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
struct cpu {
|
|
|
|
int node_id; /* The node which contains the CPU */
|
2006-12-07 02:14:10 +01:00
|
|
|
int hotpluggable; /* creates sysfs control file if hotpluggable */
|
2011-12-21 14:29:42 -08:00
|
|
|
struct device dev;
|
2005-04-16 15:20:36 -07:00
|
|
|
};
|
|
|
|
|
2016-02-26 18:43:28 +00:00
|
|
|
extern void boot_cpu_init(void);
|
|
|
|
extern void boot_cpu_state_init(void);
|
2017-02-05 14:47:12 +01:00
|
|
|
extern void cpu_init(void);
|
|
|
|
extern void trap_init(void);
|
2016-02-26 18:43:28 +00:00
|
|
|
|
[PATCH] node hotplug: register cpu: remove node struct
With Goto-san's patch, we can add new pgdat/node at runtime. I'm now
considering node-hot-add with cpu + memory on ACPI.
I found acpi container, which describes node, could evaluate cpu before
memory. This means cpu-hot-add occurs before memory hot add.
In most part, cpu-hot-add doesn't depend on node hot add. But register_cpu(),
which creates symbolic link from node to cpu, requires that node should be
onlined before register_cpu(). When a node is onlined, its pgdat should be
there.
This patch-set holds off creating symbolic link from node to cpu
until node is onlined.
This removes node arguments from register_cpu().
Now, register_cpu() requires 'struct node' as its argument. But the array of
struct node is now unified in driver/base/node.c now (By Goto's node hotplug
patch). We can get struct node in generic way. So, this argument is not
necessary now.
This patch also guarantees add cpu under node only when node is onlined. It
is necessary for node-hot-add vs. cpu-hot-add patch following this.
Moreover, register_cpu calculates cpu->node_id by cpu_to_node() without regard
to its 'struct node *root' argument. This patch removes it.
Also modify callers of register_cpu()/unregister_cpu, whose args are changed
by register-cpu-remove-node-struct patch.
[Brice.Goglin@ens-lyon.org: fix it]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Brice Goglin <Brice.Goglin@ens-lyon.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 02:53:41 -07:00
|
|
|
extern int register_cpu(struct cpu *cpu, int num);
|
2011-12-21 14:29:42 -08:00
|
|
|
extern struct device *get_cpu_device(unsigned cpu);
|
2011-12-03 13:06:50 -08:00
|
|
|
extern bool cpu_is_hotpluggable(unsigned cpu);
|
2013-08-15 14:01:40 +01:00
|
|
|
extern bool arch_match_cpu_phys_id(int cpu, u64 phys_id);
|
2013-10-03 17:24:51 -04:00
|
|
|
extern bool arch_find_n_match_cpu_physical_id(struct device_node *cpun,
|
|
|
|
int cpu, unsigned int *thread);
|
2006-10-24 18:31:24 +02:00
|
|
|
|
2011-12-21 14:29:42 -08:00
|
|
|
extern int cpu_add_dev_attr(struct device_attribute *attr);
|
|
|
|
extern void cpu_remove_dev_attr(struct device_attribute *attr);
|
2006-10-24 18:31:24 +02:00
|
|
|
|
2011-12-21 14:29:42 -08:00
|
|
|
extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
|
|
|
|
extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
|
2006-10-24 18:31:24 +02:00
|
|
|
|
2015-07-17 16:23:42 -07:00
|
|
|
extern __printf(4, 5)
|
|
|
|
struct device *cpu_device_create(struct device *parent, void *drvdata,
|
|
|
|
const struct attribute_group **groups,
|
|
|
|
const char *fmt, ...);
|
2005-04-16 15:20:36 -07:00
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
[PATCH] node hotplug: register cpu: remove node struct
With Goto-san's patch, we can add new pgdat/node at runtime. I'm now
considering node-hot-add with cpu + memory on ACPI.
I found acpi container, which describes node, could evaluate cpu before
memory. This means cpu-hot-add occurs before memory hot add.
In most part, cpu-hot-add doesn't depend on node hot add. But register_cpu(),
which creates symbolic link from node to cpu, requires that node should be
onlined before register_cpu(). When a node is onlined, its pgdat should be
there.
This patch-set holds off creating symbolic link from node to cpu
until node is onlined.
This removes node arguments from register_cpu().
Now, register_cpu() requires 'struct node' as its argument. But the array of
struct node is now unified in driver/base/node.c now (By Goto's node hotplug
patch). We can get struct node in generic way. So, this argument is not
necessary now.
This patch also guarantees add cpu under node only when node is onlined. It
is necessary for node-hot-add vs. cpu-hot-add patch following this.
Moreover, register_cpu calculates cpu->node_id by cpu_to_node() without regard
to its 'struct node *root' argument. This patch removes it.
Also modify callers of register_cpu()/unregister_cpu, whose args are changed
by register-cpu-remove-node-struct patch.
[Brice.Goglin@ens-lyon.org: fix it]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Brice Goglin <Brice.Goglin@ens-lyon.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 02:53:41 -07:00
|
|
|
extern void unregister_cpu(struct cpu *cpu);
|
2009-11-25 17:23:25 +00:00
|
|
|
extern ssize_t arch_cpu_probe(const char *, size_t);
|
|
|
|
extern ssize_t arch_cpu_release(const char *, size_t);
|
2005-04-16 15:20:36 -07:00
|
|
|
#endif
|
|
|
|
struct notifier_block;
|
|
|
|
|
2011-07-25 17:13:08 -07:00
|
|
|
#define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */
|
|
|
|
#define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */
|
|
|
|
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
|
|
|
|
#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug
|
|
|
|
* lock is dropped */
|
2016-02-26 18:43:44 +00:00
|
|
|
#define CPU_BROKEN 0x000B /* CPU (unsigned)v did not die properly,
|
smpboot: Add common code for notification from dying CPU
RCU ignores offlined CPUs, so they cannot safely run RCU read-side code.
(They -can- use SRCU, but not RCU.) This means that any use of RCU
during or after the call to arch_cpu_idle_dead(). Unfortunately,
commit 2ed53c0d6cc99 added a complete() call, which will contain RCU
read-side critical sections if there is a task waiting to be awakened.
Which, as it turns out, there almost never is. In my qemu/KVM testing,
the to-be-awakened task is not yet asleep more than 99.5% of the time.
In current mainline, failure is even harder to reproduce, requiring a
virtualized environment that delays the outgoing CPU by at least three
jiffies between the time it exits its stop_machine() task at CPU_DYING
time and the time it calls arch_cpu_idle_dead() from the idle loop.
However, this problem really can occur, especially in virtualized
environments, and therefore really does need to be fixed
This suggests moving back to the polling loop, but using a much shorter
wait, with gentle exponential backoff instead of the old 100-millisecond
wait. Most of the time, the loop will exit without waiting at all,
and almost all of the remaining uses will wait only five microseconds.
If the outgoing CPU is preempted, a loop will wait one jiffy, then
increase the wait by a factor of 11/10ths, rounding up. As before, there
is a five-second timeout.
This commit therefore provides common-code infrastructure to do the
dying-to-surviving CPU handoff in a safe manner. This code also
provides an indication at CPU-online of whether the CPU to be onlined
previously timed out on offline. The new cpu_check_up_prepare() function
returns -EBUSY if this CPU previously took more than five seconds to
go offline, or -EAGAIN if it has not yet managed to go offline. The
rationale for -EAGAIN is that it might still be preempted, so an additional
wait might well find it correctly offlined. Architecture-specific code
can decide how to handle these conditions. Systems in which CPUs take
themselves completely offline might respond to an -EBUSY return as if
it was a zero (success) return. Systems in which the surviving CPU must
take some action might take it at this time, or might simply mark the
other CPU as unusable.
Note that architectures that take the easy way out and simply pass the
-EBUSY and -EAGAIN upwards will change the sysfs API.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
[ paulmck: Fixed state machine for architectures that don't check earlier
CPU-hotplug results as suggested by James Hogan. ]
2015-02-25 10:34:39 -08:00
|
|
|
* perhaps due to preemption. */
|
2011-07-25 17:13:08 -07:00
|
|
|
|
|
|
|
/* Used for CPU hotplug events occurring while tasks are frozen due to a suspend
|
|
|
|
* operation in progress
|
|
|
|
*/
|
|
|
|
#define CPU_TASKS_FROZEN 0x0010
|
|
|
|
|
|
|
|
#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN)
|
|
|
|
#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN)
|
|
|
|
#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN)
|
|
|
|
#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
|
|
|
|
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
|
|
|
|
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#ifdef CONFIG_SMP
|
2016-02-26 18:43:23 +00:00
|
|
|
extern bool cpuhp_tasks_frozen;
|
2005-04-16 15:20:36 -07:00
|
|
|
int cpu_up(unsigned int cpu);
|
2008-09-07 16:57:22 +02:00
|
|
|
void notify_cpu_starting(unsigned int cpu);
|
2008-07-25 01:47:50 -07:00
|
|
|
extern void cpu_maps_update_begin(void);
|
|
|
|
extern void cpu_maps_update_done(void);
|
2008-01-25 21:08:02 +01:00
|
|
|
|
2008-07-25 01:47:50 -07:00
|
|
|
#else /* CONFIG_SMP */
|
2016-02-26 18:43:23 +00:00
|
|
|
#define cpuhp_tasks_frozen 0
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2008-07-25 01:47:50 -07:00
|
|
|
static inline void cpu_maps_update_begin(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void cpu_maps_update_done(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#endif /* CONFIG_SMP */
|
2011-12-21 14:29:42 -08:00
|
|
|
extern struct bus_type cpu_subsys;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
/* Stop CPUs going up and down. */
|
2006-12-06 20:38:58 -08:00
|
|
|
|
ACPI / processor: Acquire writer lock to update CPU maps
CPU system maps are protected with reader/writer locks. The reader
lock, get_online_cpus(), assures that the maps are not updated while
holding the lock. The writer lock, cpu_hotplug_begin(), is used to
udpate the cpu maps along with cpu_maps_update_begin().
However, the ACPI processor handler updates the cpu maps without
holding the the writer lock.
acpi_map_lsapic() is called from acpi_processor_hotadd_init() to
update cpu_possible_mask and cpu_present_mask. acpi_unmap_lsapic()
is called from acpi_processor_remove() to update cpu_possible_mask.
Currently, they are either unprotected or protected with the reader
lock, which is not correct.
For example, the get_online_cpus() below is supposed to assure that
cpu_possible_mask is not changed while the code is iterating with
for_each_possible_cpu().
get_online_cpus();
for_each_possible_cpu(cpu) {
:
}
put_online_cpus();
However, this lock has no protection with CPU hotplug since the ACPI
processor handler does not use the writer lock when it updates
cpu_possible_mask. The reader lock does not serialize within the
readers.
This patch protects them with the writer lock with cpu_hotplug_begin()
along with cpu_maps_update_begin(), which must be held before calling
cpu_hotplug_begin(). It also protects arch_register_cpu() /
arch_unregister_cpu(), which creates / deletes a sysfs cpu device
interface. For this purpose it changes cpu_hotplug_begin() and
cpu_hotplug_done() to global and exports them in cpu.h.
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-08-12 09:45:53 -06:00
|
|
|
extern void cpu_hotplug_begin(void);
|
|
|
|
extern void cpu_hotplug_done(void);
|
2008-01-25 21:08:02 +01:00
|
|
|
extern void get_online_cpus(void);
|
|
|
|
extern void put_online_cpus(void);
|
2013-06-12 14:04:36 -07:00
|
|
|
extern void cpu_hotplug_disable(void);
|
|
|
|
extern void cpu_hotplug_enable(void);
|
cpu: introduce clear_tasks_mm_cpumask() helper
Many architectures clear tasks' mm_cpumask like this:
read_lock(&tasklist_lock);
for_each_process(p) {
if (p->mm)
cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
}
read_unlock(&tasklist_lock);
Depending on the context, the code above may have several problems,
such as:
1. Working with task->mm w/o getting mm or grabing the task lock is
dangerous as ->mm might disappear (exit_mm() assigns NULL under
task_lock(), so tasklist lock is not enough).
2. Checking for process->mm is not enough because process' main
thread may exit or detach its mm via use_mm(), but other threads
may still have a valid mm.
This patch implements a small helper function that does things
correctly, i.e.:
1. We take the task's lock while whe handle its mm (we can't use
get_task_mm()/mmput() pair as mmput() might sleep);
2. To catch exited main thread case, we use find_lock_task_mm(),
which walks up all threads and returns an appropriate task
(with task lock held).
Also, Per Peter Zijlstra's idea, now we don't grab tasklist_lock in
the new helper, instead we take the rcu read lock. We can do this
because the function is called after the cpu is taken down and marked
offline, so no new tasks will get this cpu set in their mm mask.
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-05-31 16:26:22 -07:00
|
|
|
void clear_tasks_mm_cpumask(int cpu);
|
2005-04-16 15:20:36 -07:00
|
|
|
int cpu_down(unsigned int cpu);
|
2006-12-06 20:38:58 -08:00
|
|
|
|
|
|
|
#else /* CONFIG_HOTPLUG_CPU */
|
|
|
|
|
ACPI / processor: Acquire writer lock to update CPU maps
CPU system maps are protected with reader/writer locks. The reader
lock, get_online_cpus(), assures that the maps are not updated while
holding the lock. The writer lock, cpu_hotplug_begin(), is used to
udpate the cpu maps along with cpu_maps_update_begin().
However, the ACPI processor handler updates the cpu maps without
holding the the writer lock.
acpi_map_lsapic() is called from acpi_processor_hotadd_init() to
update cpu_possible_mask and cpu_present_mask. acpi_unmap_lsapic()
is called from acpi_processor_remove() to update cpu_possible_mask.
Currently, they are either unprotected or protected with the reader
lock, which is not correct.
For example, the get_online_cpus() below is supposed to assure that
cpu_possible_mask is not changed while the code is iterating with
for_each_possible_cpu().
get_online_cpus();
for_each_possible_cpu(cpu) {
:
}
put_online_cpus();
However, this lock has no protection with CPU hotplug since the ACPI
processor handler does not use the writer lock when it updates
cpu_possible_mask. The reader lock does not serialize within the
readers.
This patch protects them with the writer lock with cpu_hotplug_begin()
along with cpu_maps_update_begin(), which must be held before calling
cpu_hotplug_begin(). It also protects arch_register_cpu() /
arch_unregister_cpu(), which creates / deletes a sysfs cpu device
interface. For this purpose it changes cpu_hotplug_begin() and
cpu_hotplug_done() to global and exports them in cpu.h.
Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-08-12 09:45:53 -06:00
|
|
|
static inline void cpu_hotplug_begin(void) {}
|
|
|
|
static inline void cpu_hotplug_done(void) {}
|
2008-01-25 21:08:02 +01:00
|
|
|
#define get_online_cpus() do { } while (0)
|
|
|
|
#define put_online_cpus() do { } while (0)
|
2013-06-12 14:04:36 -07:00
|
|
|
#define cpu_hotplug_disable() do { } while (0)
|
|
|
|
#define cpu_hotplug_enable() do { } while (0)
|
2006-12-06 20:38:58 -08:00
|
|
|
#endif /* CONFIG_HOTPLUG_CPU */
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2007-08-30 23:56:29 -07:00
|
|
|
#ifdef CONFIG_PM_SLEEP_SMP
|
2016-08-17 13:50:25 +01:00
|
|
|
extern int freeze_secondary_cpus(int primary);
|
|
|
|
static inline int disable_nonboot_cpus(void)
|
|
|
|
{
|
|
|
|
return freeze_secondary_cpus(0);
|
|
|
|
}
|
2006-09-25 23:32:48 -07:00
|
|
|
extern void enable_nonboot_cpus(void);
|
2007-08-30 23:56:29 -07:00
|
|
|
#else /* !CONFIG_PM_SLEEP_SMP */
|
2006-09-25 23:32:48 -07:00
|
|
|
static inline int disable_nonboot_cpus(void) { return 0; }
|
|
|
|
static inline void enable_nonboot_cpus(void) {}
|
2007-08-30 23:56:29 -07:00
|
|
|
#endif /* !CONFIG_PM_SLEEP_SMP */
|
2006-09-25 23:32:48 -07:00
|
|
|
|
2013-03-21 22:49:34 +01:00
|
|
|
void cpu_startup_entry(enum cpuhp_state state);
|
|
|
|
|
2013-03-21 22:49:35 +01:00
|
|
|
void cpu_idle_poll_ctrl(bool enable);
|
|
|
|
|
2016-10-07 17:02:55 -07:00
|
|
|
/* Attach to any functions which should be considered cpuidle. */
|
|
|
|
#define __cpuidle __attribute__((__section__(".cpuidle.text")))
|
|
|
|
|
|
|
|
bool cpu_in_idle(unsigned long pc);
|
|
|
|
|
2013-03-21 22:49:35 +01:00
|
|
|
void arch_cpu_idle(void);
|
|
|
|
void arch_cpu_idle_prepare(void);
|
|
|
|
void arch_cpu_idle_enter(void);
|
|
|
|
void arch_cpu_idle_exit(void);
|
|
|
|
void arch_cpu_idle_dead(void);
|
|
|
|
|
smpboot: Add common code for notification from dying CPU
RCU ignores offlined CPUs, so they cannot safely run RCU read-side code.
(They -can- use SRCU, but not RCU.) This means that any use of RCU
during or after the call to arch_cpu_idle_dead(). Unfortunately,
commit 2ed53c0d6cc99 added a complete() call, which will contain RCU
read-side critical sections if there is a task waiting to be awakened.
Which, as it turns out, there almost never is. In my qemu/KVM testing,
the to-be-awakened task is not yet asleep more than 99.5% of the time.
In current mainline, failure is even harder to reproduce, requiring a
virtualized environment that delays the outgoing CPU by at least three
jiffies between the time it exits its stop_machine() task at CPU_DYING
time and the time it calls arch_cpu_idle_dead() from the idle loop.
However, this problem really can occur, especially in virtualized
environments, and therefore really does need to be fixed
This suggests moving back to the polling loop, but using a much shorter
wait, with gentle exponential backoff instead of the old 100-millisecond
wait. Most of the time, the loop will exit without waiting at all,
and almost all of the remaining uses will wait only five microseconds.
If the outgoing CPU is preempted, a loop will wait one jiffy, then
increase the wait by a factor of 11/10ths, rounding up. As before, there
is a five-second timeout.
This commit therefore provides common-code infrastructure to do the
dying-to-surviving CPU handoff in a safe manner. This code also
provides an indication at CPU-online of whether the CPU to be onlined
previously timed out on offline. The new cpu_check_up_prepare() function
returns -EBUSY if this CPU previously took more than five seconds to
go offline, or -EAGAIN if it has not yet managed to go offline. The
rationale for -EAGAIN is that it might still be preempted, so an additional
wait might well find it correctly offlined. Architecture-specific code
can decide how to handle these conditions. Systems in which CPUs take
themselves completely offline might respond to an -EBUSY return as if
it was a zero (success) return. Systems in which the surviving CPU must
take some action might take it at this time, or might simply mark the
other CPU as unusable.
Note that architectures that take the easy way out and simply pass the
-EBUSY and -EAGAIN upwards will change the sysfs API.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
[ paulmck: Fixed state machine for architectures that don't check earlier
CPU-hotplug results as suggested by James Hogan. ]
2015-02-25 10:34:39 -08:00
|
|
|
int cpu_report_state(int cpu);
|
|
|
|
int cpu_check_up_prepare(int cpu);
|
|
|
|
void cpu_set_state_online(int cpu);
|
2016-11-28 23:03:05 -08:00
|
|
|
void play_idle(unsigned long duration_ms);
|
|
|
|
|
smpboot: Add common code for notification from dying CPU
RCU ignores offlined CPUs, so they cannot safely run RCU read-side code.
(They -can- use SRCU, but not RCU.) This means that any use of RCU
during or after the call to arch_cpu_idle_dead(). Unfortunately,
commit 2ed53c0d6cc99 added a complete() call, which will contain RCU
read-side critical sections if there is a task waiting to be awakened.
Which, as it turns out, there almost never is. In my qemu/KVM testing,
the to-be-awakened task is not yet asleep more than 99.5% of the time.
In current mainline, failure is even harder to reproduce, requiring a
virtualized environment that delays the outgoing CPU by at least three
jiffies between the time it exits its stop_machine() task at CPU_DYING
time and the time it calls arch_cpu_idle_dead() from the idle loop.
However, this problem really can occur, especially in virtualized
environments, and therefore really does need to be fixed
This suggests moving back to the polling loop, but using a much shorter
wait, with gentle exponential backoff instead of the old 100-millisecond
wait. Most of the time, the loop will exit without waiting at all,
and almost all of the remaining uses will wait only five microseconds.
If the outgoing CPU is preempted, a loop will wait one jiffy, then
increase the wait by a factor of 11/10ths, rounding up. As before, there
is a five-second timeout.
This commit therefore provides common-code infrastructure to do the
dying-to-surviving CPU handoff in a safe manner. This code also
provides an indication at CPU-online of whether the CPU to be onlined
previously timed out on offline. The new cpu_check_up_prepare() function
returns -EBUSY if this CPU previously took more than five seconds to
go offline, or -EAGAIN if it has not yet managed to go offline. The
rationale for -EAGAIN is that it might still be preempted, so an additional
wait might well find it correctly offlined. Architecture-specific code
can decide how to handle these conditions. Systems in which CPUs take
themselves completely offline might respond to an -EBUSY return as if
it was a zero (success) return. Systems in which the surviving CPU must
take some action might take it at this time, or might simply mark the
other CPU as unusable.
Note that architectures that take the easy way out and simply pass the
-EBUSY and -EAGAIN upwards will change the sysfs API.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
[ paulmck: Fixed state machine for architectures that don't check earlier
CPU-hotplug results as suggested by James Hogan. ]
2015-02-25 10:34:39 -08:00
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
bool cpu_wait_death(unsigned int cpu, int seconds);
|
|
|
|
bool cpu_report_death(void);
|
2016-02-26 18:43:43 +00:00
|
|
|
void cpuhp_report_idle_dead(void);
|
|
|
|
#else
|
|
|
|
static inline void cpuhp_report_idle_dead(void) { }
|
smpboot: Add common code for notification from dying CPU
RCU ignores offlined CPUs, so they cannot safely run RCU read-side code.
(They -can- use SRCU, but not RCU.) This means that any use of RCU
during or after the call to arch_cpu_idle_dead(). Unfortunately,
commit 2ed53c0d6cc99 added a complete() call, which will contain RCU
read-side critical sections if there is a task waiting to be awakened.
Which, as it turns out, there almost never is. In my qemu/KVM testing,
the to-be-awakened task is not yet asleep more than 99.5% of the time.
In current mainline, failure is even harder to reproduce, requiring a
virtualized environment that delays the outgoing CPU by at least three
jiffies between the time it exits its stop_machine() task at CPU_DYING
time and the time it calls arch_cpu_idle_dead() from the idle loop.
However, this problem really can occur, especially in virtualized
environments, and therefore really does need to be fixed
This suggests moving back to the polling loop, but using a much shorter
wait, with gentle exponential backoff instead of the old 100-millisecond
wait. Most of the time, the loop will exit without waiting at all,
and almost all of the remaining uses will wait only five microseconds.
If the outgoing CPU is preempted, a loop will wait one jiffy, then
increase the wait by a factor of 11/10ths, rounding up. As before, there
is a five-second timeout.
This commit therefore provides common-code infrastructure to do the
dying-to-surviving CPU handoff in a safe manner. This code also
provides an indication at CPU-online of whether the CPU to be onlined
previously timed out on offline. The new cpu_check_up_prepare() function
returns -EBUSY if this CPU previously took more than five seconds to
go offline, or -EAGAIN if it has not yet managed to go offline. The
rationale for -EAGAIN is that it might still be preempted, so an additional
wait might well find it correctly offlined. Architecture-specific code
can decide how to handle these conditions. Systems in which CPUs take
themselves completely offline might respond to an -EBUSY return as if
it was a zero (success) return. Systems in which the surviving CPU must
take some action might take it at this time, or might simply mark the
other CPU as unusable.
Note that architectures that take the easy way out and simply pass the
-EBUSY and -EAGAIN upwards will change the sysfs API.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
[ paulmck: Fixed state machine for architectures that don't check earlier
CPU-hotplug results as suggested by James Hogan. ]
2015-02-25 10:34:39 -08:00
|
|
|
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#endif /* _LINUX_CPU_H_ */
|