2019-06-03 05:44:50 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2012-03-05 11:49:30 +00:00
|
|
|
/*
|
|
|
|
* SMP initialisation and IPI support
|
|
|
|
* Based on arch/arm/kernel/smp.c
|
|
|
|
*
|
|
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
|
|
*/
|
|
|
|
|
2015-05-13 13:12:47 +00:00
|
|
|
#include <linux/acpi.h>
|
arm64: kernel: Add arch-specific SDEI entry code and CPU masking
The Software Delegated Exception Interface (SDEI) is an ARM standard
for registering callbacks from the platform firmware into the OS.
This is typically used to implement RAS notifications.
Such notifications enter the kernel at the registered entry-point
with the register values of the interrupted CPU context. Because this
is not a CPU exception, it cannot reuse the existing entry code.
(crucially we don't implicitly know which exception level we interrupted),
Add the entry point to entry.S to set us up for calling into C code. If
the event interrupted code that had interrupts masked, we always return
to that location. Otherwise we pretend this was an IRQ, and use SDEI's
complete_and_resume call to return to vbar_el1 + offset.
This allows the kernel to deliver signals to user space processes. For
KVM this triggers the world switch, a quick spin round vcpu_run, then
back into the guest, unless there are pending signals.
Add sdei_mask_local_cpu() calls to the smp_send_stop() code, this covers
the panic() code-path, which doesn't invoke cpuhotplug notifiers.
Because we can interrupt entry-from/exit-to another EL, we can't trust the
value in sp_el0 or x29, even if we interrupted the kernel, in this case
the code in entry.S will save/restore sp_el0 and use the value in
__entry_task.
When we have VMAP stacks we can interrupt the stack-overflow test, which
stirs x0 into sp, meaning we have to have our own VMAP stacks. For now
these are allocated when we probe the interface. Future patches will add
refcounting hooks to allow the arch code to allocate them lazily.
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-01-08 15:38:12 +00:00
|
|
|
#include <linux/arm_sdei.h>
|
2012-03-05 11:49:30 +00:00
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/spinlock.h>
|
2017-02-01 18:08:20 +00:00
|
|
|
#include <linux/sched/mm.h>
|
2017-02-08 17:51:36 +00:00
|
|
|
#include <linux/sched/hotplug.h>
|
2017-02-08 17:51:37 +00:00
|
|
|
#include <linux/sched/task_stack.h>
|
2012-03-05 11:49:30 +00:00
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/cache.h>
|
|
|
|
#include <linux/profile.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/smp.h>
|
|
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <linux/irq.h>
|
2019-01-31 14:58:55 +00:00
|
|
|
#include <linux/irqchip/arm-gic-v3.h>
|
2012-03-05 11:49:30 +00:00
|
|
|
#include <linux/percpu.h>
|
|
|
|
#include <linux/clockchips.h>
|
|
|
|
#include <linux/completion.h>
|
|
|
|
#include <linux/of.h>
|
2014-05-12 15:48:51 +00:00
|
|
|
#include <linux/irq_work.h>
|
2020-06-20 16:19:00 +00:00
|
|
|
#include <linux/kernel_stat.h>
|
2017-04-03 02:24:36 +00:00
|
|
|
#include <linux/kexec.h>
|
2023-09-06 16:03:01 +00:00
|
|
|
#include <linux/kgdb.h>
|
2019-11-28 19:58:05 +00:00
|
|
|
#include <linux/kvm_host.h>
|
2023-09-06 16:02:59 +00:00
|
|
|
#include <linux/nmi.h>
|
2012-03-05 11:49:30 +00:00
|
|
|
|
2014-11-14 15:54:08 +00:00
|
|
|
#include <asm/alternative.h>
|
2012-03-05 11:49:30 +00:00
|
|
|
#include <asm/atomic.h>
|
|
|
|
#include <asm/cacheflush.h>
|
2014-07-16 15:32:44 +00:00
|
|
|
#include <asm/cpu.h>
|
2012-03-05 11:49:30 +00:00
|
|
|
#include <asm/cputype.h>
|
2013-10-24 19:30:15 +00:00
|
|
|
#include <asm/cpu_ops.h>
|
2017-11-02 12:12:34 +00:00
|
|
|
#include <asm/daifflags.h>
|
2019-11-28 19:58:05 +00:00
|
|
|
#include <asm/kvm_mmu.h>
|
2012-03-05 11:49:30 +00:00
|
|
|
#include <asm/mmu_context.h>
|
2016-04-08 22:50:27 +00:00
|
|
|
#include <asm/numa.h>
|
2012-03-05 11:49:30 +00:00
|
|
|
#include <asm/processor.h>
|
2012-08-29 08:47:19 +00:00
|
|
|
#include <asm/smp_plat.h>
|
2012-03-05 11:49:30 +00:00
|
|
|
#include <asm/sections.h>
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include <asm/ptrace.h>
|
2015-07-29 11:07:57 +00:00
|
|
|
#include <asm/virt.h>
|
2012-03-05 11:49:30 +00:00
|
|
|
|
2014-07-25 20:05:32 +00:00
|
|
|
#include <trace/events/ipi.h>
|
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
/*
|
|
|
|
* as from 2.5, kernels no longer have an init_tasks structure
|
|
|
|
* so we need some other way of telling a new secondary core
|
|
|
|
* where to place its SVC stack
|
|
|
|
*/
|
|
|
|
struct secondary_data secondary_data;
|
2016-02-23 10:31:42 +00:00
|
|
|
/* Number of CPUs which aren't online, but looping in kernel text. */
|
2020-04-23 06:33:26 +00:00
|
|
|
static int cpus_stuck_in_kernel;
|
2012-03-05 11:49:30 +00:00
|
|
|
|
|
|
|
enum ipi_msg_type {
|
|
|
|
IPI_RESCHEDULE,
|
|
|
|
IPI_CALL_FUNC,
|
|
|
|
IPI_CPU_STOP,
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
IPI_CPU_STOP_NMI,
|
2013-09-04 09:55:17 +00:00
|
|
|
IPI_TIMER,
|
2014-05-12 15:48:51 +00:00
|
|
|
IPI_IRQ_WORK,
|
2023-09-06 16:02:59 +00:00
|
|
|
NR_IPI,
|
|
|
|
/*
|
|
|
|
* Any enum >= NR_IPI and < MAX_IPI is special and not tracable
|
|
|
|
* with trace_ipi_*
|
|
|
|
*/
|
|
|
|
IPI_CPU_BACKTRACE = NR_IPI,
|
2023-09-06 16:03:01 +00:00
|
|
|
IPI_KGDB_ROUNDUP,
|
2023-09-06 16:02:59 +00:00
|
|
|
MAX_IPI
|
2012-03-05 11:49:30 +00:00
|
|
|
};
|
|
|
|
|
2023-09-06 16:03:02 +00:00
|
|
|
static int ipi_irq_base __ro_after_init;
|
|
|
|
static int nr_ipi __ro_after_init = NR_IPI;
|
|
|
|
static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
static bool crash_stop;
|
|
|
|
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
static void ipi_setup(int cpu);
|
|
|
|
|
2016-02-23 10:31:42 +00:00
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
2020-09-18 12:33:18 +00:00
|
|
|
static void ipi_teardown(int cpu);
|
2016-02-23 10:31:42 +00:00
|
|
|
static int op_cpu_kill(unsigned int cpu);
|
|
|
|
#else
|
|
|
|
static inline int op_cpu_kill(unsigned int cpu)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
/*
|
|
|
|
* Boot a secondary CPU, and assign it the specified idle task.
|
|
|
|
* This also gives us the initial stack to use for this CPU.
|
|
|
|
*/
|
2013-06-18 14:18:31 +00:00
|
|
|
static int boot_secondary(unsigned int cpu, struct task_struct *idle)
|
2012-03-05 11:49:30 +00:00
|
|
|
{
|
2020-03-18 23:01:44 +00:00
|
|
|
const struct cpu_operations *ops = get_cpu_ops(cpu);
|
|
|
|
|
|
|
|
if (ops->cpu_boot)
|
|
|
|
return ops->cpu_boot(cpu);
|
2012-03-05 11:49:30 +00:00
|
|
|
|
arm64: factor out spin-table boot method
The arm64 kernel has an internal holding pen, which is necessary for
some systems where we can't bring CPUs online individually and must hold
multiple CPUs in a safe area until the kernel is able to handle them.
The current SMP infrastructure for arm64 is closely coupled to this
holding pen, and alternative boot methods must launch CPUs into the pen,
where they sit before they are launched into the kernel proper.
With PSCI (and possibly other future boot methods), we can bring CPUs
online individually, and need not perform the secondary_holding_pen
dance. Instead, this patch factors the holding pen management code out
to the spin-table boot method code, as it is the only boot method
requiring the pen.
A new entry point for secondaries, secondary_entry is added for other
boot methods to use, which bypasses the holding pen and its associated
overhead when bringing CPUs online. The smp.pen.text section is also
removed, as the pen can live in head.text without problem.
The cpu_operations structure is extended with two new functions,
cpu_boot and cpu_postboot, for bringing a cpu into the kernel and
performing any post-boot cleanup required by a bootmethod (e.g.
resetting the secondary_holding_pen_release to INVALID_HWID).
Documentation is added for cpu_operations.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2013-10-24 19:30:16 +00:00
|
|
|
return -EOPNOTSUPP;
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static DECLARE_COMPLETION(cpu_running);
|
|
|
|
|
2013-06-18 14:18:31 +00:00
|
|
|
int __cpu_up(unsigned int cpu, struct task_struct *idle)
|
2012-03-05 11:49:30 +00:00
|
|
|
{
|
|
|
|
int ret;
|
2016-02-23 10:31:42 +00:00
|
|
|
long status;
|
2012-03-05 11:49:30 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to tell the secondary core where to find its stack and the
|
|
|
|
* page tables.
|
|
|
|
*/
|
arm64: split thread_info from task stack
This patch moves arm64's struct thread_info from the task stack into
task_struct. This protects thread_info from corruption in the case of
stack overflows, and makes its address harder to determine if stack
addresses are leaked, making a number of attacks more difficult. Precise
detection and handling of overflow is left for subsequent patches.
Largely, this involves changing code to store the task_struct in sp_el0,
and acquire the thread_info from the task struct. Core code now
implements current_thread_info(), and as noted in <linux/sched.h> this
relies on offsetof(task_struct, thread_info) == 0, enforced by core
code.
This change means that the 'tsk' register used in entry.S now points to
a task_struct, rather than a thread_info as it used to. To make this
clear, the TI_* field offsets are renamed to TSK_TI_*, with asm-offsets
appropriately updated to account for the structural change.
Userspace clobbers sp_el0, and we can no longer restore this from the
stack. Instead, the current task is cached in a per-cpu variable that we
can safely access from early assembly as interrupts are disabled (and we
are thus not preemptible).
Both secondary entry and idle are updated to stash the sp and task
pointer separately.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: James Morse <james.morse@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-11-03 20:23:13 +00:00
|
|
|
secondary_data.task = idle;
|
2016-02-23 10:31:42 +00:00
|
|
|
update_cpu_boot_status(CPU_MMU_OFF);
|
2012-03-05 11:49:30 +00:00
|
|
|
|
2020-03-02 02:03:40 +00:00
|
|
|
/* Now bring the CPU into our world */
|
2012-03-05 11:49:30 +00:00
|
|
|
ret = boot_secondary(cpu, idle);
|
2020-03-02 02:03:40 +00:00
|
|
|
if (ret) {
|
2024-05-29 13:34:42 +00:00
|
|
|
if (ret != -EPERM)
|
|
|
|
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
|
2018-12-10 18:07:33 +00:00
|
|
|
return ret;
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
2020-03-02 02:03:40 +00:00
|
|
|
/*
|
|
|
|
* CPU was successfully started, wait for it to come online or
|
|
|
|
* time out.
|
|
|
|
*/
|
|
|
|
wait_for_completion_timeout(&cpu_running,
|
|
|
|
msecs_to_jiffies(5000));
|
|
|
|
if (cpu_online(cpu))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
pr_crit("CPU%u: failed to come online\n", cpu);
|
arm64: split thread_info from task stack
This patch moves arm64's struct thread_info from the task stack into
task_struct. This protects thread_info from corruption in the case of
stack overflows, and makes its address harder to determine if stack
addresses are leaked, making a number of attacks more difficult. Precise
detection and handling of overflow is left for subsequent patches.
Largely, this involves changing code to store the task_struct in sp_el0,
and acquire the thread_info from the task struct. Core code now
implements current_thread_info(), and as noted in <linux/sched.h> this
relies on offsetof(task_struct, thread_info) == 0, enforced by core
code.
This change means that the 'tsk' register used in entry.S now points to
a task_struct, rather than a thread_info as it used to. To make this
clear, the TI_* field offsets are renamed to TSK_TI_*, with asm-offsets
appropriately updated to account for the structural change.
Userspace clobbers sp_el0, and we can no longer restore this from the
stack. Instead, the current task is cached in a per-cpu variable that we
can safely access from early assembly as interrupts are disabled (and we
are thus not preemptible).
Both secondary entry and idle are updated to stash the sp and task
pointer separately.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: James Morse <james.morse@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-11-03 20:23:13 +00:00
|
|
|
secondary_data.task = NULL;
|
2016-02-23 10:31:42 +00:00
|
|
|
status = READ_ONCE(secondary_data.status);
|
2020-03-02 02:03:40 +00:00
|
|
|
if (status == CPU_MMU_OFF)
|
|
|
|
status = READ_ONCE(__early_cpu_boot_status);
|
2016-02-23 10:31:42 +00:00
|
|
|
|
2020-03-02 02:03:40 +00:00
|
|
|
switch (status & CPU_BOOT_STATUS_MASK) {
|
|
|
|
default:
|
|
|
|
pr_err("CPU%u: failed in unknown state : 0x%lx\n",
|
|
|
|
cpu, status);
|
|
|
|
cpus_stuck_in_kernel++;
|
|
|
|
break;
|
|
|
|
case CPU_KILL_ME:
|
|
|
|
if (!op_cpu_kill(cpu)) {
|
|
|
|
pr_crit("CPU%u: died during early boot\n", cpu);
|
2016-02-23 10:31:42 +00:00
|
|
|
break;
|
|
|
|
}
|
2020-03-02 02:03:40 +00:00
|
|
|
pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
|
2020-08-23 22:36:59 +00:00
|
|
|
fallthrough;
|
2020-03-02 02:03:40 +00:00
|
|
|
case CPU_STUCK_IN_KERNEL:
|
|
|
|
pr_crit("CPU%u: is stuck in kernel\n", cpu);
|
|
|
|
if (status & CPU_STUCK_REASON_52_BIT_VA)
|
|
|
|
pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
|
|
|
|
if (status & CPU_STUCK_REASON_NO_GRAN) {
|
|
|
|
pr_crit("CPU%u: does not support %luK granule\n",
|
|
|
|
cpu, PAGE_SIZE / SZ_1K);
|
|
|
|
}
|
|
|
|
cpus_stuck_in_kernel++;
|
|
|
|
break;
|
|
|
|
case CPU_PANIC_KERNEL:
|
|
|
|
panic("CPU%u detected unsupported configuration\n", cpu);
|
2016-02-23 10:31:42 +00:00
|
|
|
}
|
2012-03-05 11:49:30 +00:00
|
|
|
|
2020-05-27 23:34:57 +00:00
|
|
|
return -EIO;
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
2019-01-31 14:58:55 +00:00
|
|
|
static void init_gic_priority_masking(void)
|
|
|
|
{
|
|
|
|
u32 cpuflags;
|
|
|
|
|
|
|
|
if (WARN_ON(!gic_enable_sre()))
|
|
|
|
return;
|
|
|
|
|
|
|
|
cpuflags = read_sysreg(daif);
|
|
|
|
|
|
|
|
WARN_ON(!(cpuflags & PSR_I_BIT));
|
2021-03-15 11:56:28 +00:00
|
|
|
WARN_ON(!(cpuflags & PSR_F_BIT));
|
2019-01-31 14:58:55 +00:00
|
|
|
|
2019-06-11 09:38:12 +00:00
|
|
|
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
|
2019-01-31 14:58:55 +00:00
|
|
|
}
|
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
/*
|
|
|
|
* This is the secondary CPU boot entry. We're using this CPUs
|
|
|
|
* idle thread stack, but a set of temporary page tables.
|
|
|
|
*/
|
2018-06-12 09:07:37 +00:00
|
|
|
asmlinkage notrace void secondary_start_kernel(void)
|
2012-03-05 11:49:30 +00:00
|
|
|
{
|
2017-09-27 13:50:38 +00:00
|
|
|
u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
|
2012-03-05 11:49:30 +00:00
|
|
|
struct mm_struct *mm = &init_mm;
|
2020-03-18 23:01:44 +00:00
|
|
|
const struct cpu_operations *ops;
|
2021-05-20 11:50:31 +00:00
|
|
|
unsigned int cpu = smp_processor_id();
|
2012-03-05 11:49:30 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* All kernel threads share the same mm context; grab a
|
|
|
|
* reference and switch to it.
|
|
|
|
*/
|
2017-02-27 22:30:07 +00:00
|
|
|
mmgrab(mm);
|
2012-03-05 11:49:30 +00:00
|
|
|
current->active_mm = mm;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TTBR0 is only used for the identity mapping at this stage. Make it
|
|
|
|
* point to zero page to avoid speculatively fetching new entries.
|
|
|
|
*/
|
2016-01-25 11:44:58 +00:00
|
|
|
cpu_uninstall_idmap();
|
2012-03-05 11:49:30 +00:00
|
|
|
|
2019-01-31 14:58:55 +00:00
|
|
|
if (system_uses_irq_prio_masking())
|
|
|
|
init_gic_priority_masking();
|
|
|
|
|
2023-09-08 20:36:01 +00:00
|
|
|
rcutree_report_cpu_starting(cpu);
|
2012-03-05 11:49:30 +00:00
|
|
|
trace_hardirqs_off();
|
|
|
|
|
arm64: Delay cpu feature capability checks
At the moment we run through the arm64_features capability list for
each CPU and set the capability if one of the CPU supports it. This
could be problematic in a heterogeneous system with differing capabilities.
Delay the CPU feature checks until all the enabled CPUs are up(i.e,
smp_cpus_done(), so that we can make better decisions based on the
overall system capability. Once we decide and advertise the capabilities
the alternatives can be applied. From this state, we cannot roll back
a feature to disabled based on the values from a new hotplugged CPU,
due to the runtime patching and other reasons. So, for all new CPUs,
we need to make sure that they have the established system capabilities.
Failing which, we bring the CPU down, preventing it from turning online.
Once the capabilities are decided, any new CPU booting up goes through
verification to ensure that it has all the enabled capabilities and also
invokes the respective enable() method on the CPU.
The CPU errata checks are not delayed and is still executed per-CPU
to detect the respective capabilities. If we ever come across a non-errata
capability that needs to be checked on each-CPU, we could introduce them via
a new capability table(or introduce a flag), which can be processed per CPU.
The next patch will make the feature checks use the system wide
safe value of a feature register.
NOTE: The enable() methods associated with the capability is scheduled
on all the CPUs (which is the only use case at the moment). If we need
a different type of 'enable()' which only needs to be run once on any CPU,
we should be able to handle that when needed.
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
[catalin.marinas@arm.com: static variable and coding style fixes]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2015-10-19 13:24:50 +00:00
|
|
|
/*
|
|
|
|
* If the system has established the capabilities, make sure
|
|
|
|
* this CPU ticks all of those. If it doesn't, the CPU will
|
|
|
|
* fail to come online.
|
|
|
|
*/
|
2016-09-09 13:07:10 +00:00
|
|
|
check_local_cpu_capabilities();
|
arm64: Delay cpu feature capability checks
At the moment we run through the arm64_features capability list for
each CPU and set the capability if one of the CPU supports it. This
could be problematic in a heterogeneous system with differing capabilities.
Delay the CPU feature checks until all the enabled CPUs are up(i.e,
smp_cpus_done(), so that we can make better decisions based on the
overall system capability. Once we decide and advertise the capabilities
the alternatives can be applied. From this state, we cannot roll back
a feature to disabled based on the values from a new hotplugged CPU,
due to the runtime patching and other reasons. So, for all new CPUs,
we need to make sure that they have the established system capabilities.
Failing which, we bring the CPU down, preventing it from turning online.
Once the capabilities are decided, any new CPU booting up goes through
verification to ensure that it has all the enabled capabilities and also
invokes the respective enable() method on the CPU.
The CPU errata checks are not delayed and is still executed per-CPU
to detect the respective capabilities. If we ever come across a non-errata
capability that needs to be checked on each-CPU, we could introduce them via
a new capability table(or introduce a flag), which can be processed per CPU.
The next patch will make the feature checks use the system wide
safe value of a feature register.
NOTE: The enable() methods associated with the capability is scheduled
on all the CPUs (which is the only use case at the moment). If we need
a different type of 'enable()' which only needs to be run once on any CPU,
we should be able to handle that when needed.
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
[catalin.marinas@arm.com: static variable and coding style fixes]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2015-10-19 13:24:50 +00:00
|
|
|
|
2020-03-18 23:01:44 +00:00
|
|
|
ops = get_cpu_ops(cpu);
|
|
|
|
if (ops->cpu_postboot)
|
|
|
|
ops->cpu_postboot();
|
2012-03-05 11:49:30 +00:00
|
|
|
|
2014-07-16 15:32:44 +00:00
|
|
|
/*
|
|
|
|
* Log the CPU info before it is marked online and might get read.
|
|
|
|
*/
|
|
|
|
cpuinfo_store_cpu();
|
2022-03-31 15:39:26 +00:00
|
|
|
store_cpu_topology(cpu);
|
2014-07-16 15:32:44 +00:00
|
|
|
|
2013-11-04 16:55:22 +00:00
|
|
|
/*
|
|
|
|
* Enable GIC and timers.
|
|
|
|
*/
|
|
|
|
notify_cpu_starting(cpu);
|
|
|
|
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
ipi_setup(cpu);
|
|
|
|
|
2018-07-06 11:02:43 +00:00
|
|
|
numa_add_cpu(cpu);
|
2014-03-04 07:51:17 +00:00
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
/*
|
|
|
|
* OK, now it's safe to let the boot CPU continue. Wait for
|
|
|
|
* the CPU migration code to notice that the CPU is online
|
|
|
|
* before we continue.
|
|
|
|
*/
|
2017-09-27 13:50:38 +00:00
|
|
|
pr_info("CPU%u: Booted secondary processor 0x%010lx [0x%08x]\n",
|
|
|
|
cpu, (unsigned long)mpidr,
|
|
|
|
read_cpuid_id());
|
2016-02-23 10:31:42 +00:00
|
|
|
update_cpu_boot_status(CPU_BOOT_SUCCESS);
|
2012-03-05 11:49:30 +00:00
|
|
|
set_cpu_online(cpu, true);
|
2012-11-07 17:00:05 +00:00
|
|
|
complete(&cpu_running);
|
2012-03-05 11:49:30 +00:00
|
|
|
|
arm64: defer clearing DAIF.D
For historical reasons we unmask debug exceptions in __cpu_setup(), but
it's not necessary to unmask debug exceptions this early in the
boot/idle entry paths. It would be better to unmask debug exceptions
later in C code as this simplifies the current code and will make it
easier to rework exception masking logic to handle non-DAIF bits in
future (e.g. PSTATE.{ALLINT,PM}).
We started clearing DAIF.D in __cpu_setup() in commit:
2ce39ad15182604b ("arm64: debug: unmask PSTATE.D earlier")
At the time, we needed to ensure that DAIF.D was clear on the primary
CPU before scheduling and preemption were possible, and chose to do this
in __cpu_setup() so that this occurred in the same place for primary and
secondary CPUs. As we cannot handle debug exceptions this early, we
placed an ISB between initializing MDSCR_EL1 and clearing DAIF.D so that
no exceptions should be triggered.
Subsequently we rewrote the return-from-{idle,suspend} paths to use
__cpu_setup() in commit:
cabe1c81ea5be983 ("arm64: Change cpu_resume() to enable mmu early then access sleep_sp by va")
... which allowed for earlier use of the MMU and had the desirable
property of using the same code to reset the CPU in the cold and warm
boot paths. This introduced a bug: DAIF.D was clear while
cpu_do_resume() restored MDSCR_EL1 and other control registers (e.g.
breakpoint/watchpoint control/value registers), and so we could
unexpectedly take debug exceptions.
We fixed that in commit:
744c6c37cc18705d ("arm64: kernel: Fix unmasked debug exceptions when restoring mdscr_el1")
... by having cpu_do_resume() use the `disable_dbg` macro to set DAIF.D
before restoring MDSCR_EL1 and other control registers. This relies on
DAIF.D being subsequently cleared again in cpu_resume().
Subsequently we reworked DAIF masking in commit:
0fbeb318754860b3 ("arm64: explicitly mask all exceptions")
... where we began enforcing a policy that DAIF.D being set implies all
other DAIF bits are set, and so e.g. we cannot take an IRQ while DAIF.D
is set. As part of this the use of `disable_dbg` in cpu_resume() was
replaced with `disable_daif` for consistency with the rest of the
kernel.
These days, there's no need to clear DAIF.D early within __cpu_setup():
* setup_arch() clears DAIF.DA before scheduling and preemption are
possible on the primary CPU, avoiding the problem we we originally
trying to work around.
Note: DAIF.IF get cleared later when interrupts are enabled for the
first time.
* secondary_start_kernel() clears all DAIF bits before scheduling and
preemption are possible on secondary CPUs.
Note: with pseudo-NMI, the PMR is initialized here before any DAIF
bits are cleared. Similar will be necessary for the architectural NMI.
* cpu_suspend() restores all DAIF bits when returning from idle,
ensuring that we don't unexpectedly leave DAIF.D clear or set.
Note: with pseudo-NMI, the PMR is initialized here before DAIF is
cleared. Similar will be necessary for the architectural NMI.
This patch removes the unmasking of debug exceptions from __cpu_setup(),
relying on the above locations to initialize DAIF. This allows some
other cleanups:
* It is no longer necessary for cpu_resume() to explicitly mask debug
(or other) exceptions, as it is always called with all DAIF bits set.
Thus we drop the use of `disable_daif`.
* The `enable_dbg` macro is no longer used, and so is dropped.
* It is no longer necessary to have an ISB immediately after
initializing MDSCR_EL1 in __cpu_setup(), and we can revert to relying
on the context synchronization that occurs when the MMU is enabled
between __cpu_setup() and code which clears DAIF.D
Comments are added to setup_arch() and secondary_start_kernel() to
explain the initial unmasking of the DAIF bits.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20240422113523.4070414-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2024-04-22 11:35:23 +00:00
|
|
|
/*
|
|
|
|
* Secondary CPUs enter the kernel with all DAIF exceptions masked.
|
|
|
|
*
|
|
|
|
* As with setup_arch() we must unmask Debug and SError exceptions, and
|
|
|
|
* as the root irqchip has already been detected and initialized we can
|
|
|
|
* unmask IRQ and FIQ at the same time.
|
|
|
|
*/
|
2017-11-02 12:12:36 +00:00
|
|
|
local_daif_restore(DAIF_PROCCTX);
|
2013-07-19 14:08:15 +00:00
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
/*
|
|
|
|
* OK, it's off to the idle thread for us
|
|
|
|
*/
|
2016-02-26 18:43:40 +00:00
|
|
|
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
2013-10-24 19:30:18 +00:00
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
static int op_cpu_disable(unsigned int cpu)
|
|
|
|
{
|
2020-03-18 23:01:44 +00:00
|
|
|
const struct cpu_operations *ops = get_cpu_ops(cpu);
|
|
|
|
|
2013-10-24 19:30:18 +00:00
|
|
|
/*
|
|
|
|
* If we don't have a cpu_die method, abort before we reach the point
|
|
|
|
* of no return. CPU0 may not have an cpu_ops, so test for it.
|
|
|
|
*/
|
2020-03-18 23:01:44 +00:00
|
|
|
if (!ops || !ops->cpu_die)
|
2013-10-24 19:30:18 +00:00
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We may need to abort a hot unplug for some other mechanism-specific
|
|
|
|
* reason.
|
|
|
|
*/
|
2020-03-18 23:01:44 +00:00
|
|
|
if (ops->cpu_disable)
|
|
|
|
return ops->cpu_disable(cpu);
|
2013-10-24 19:30:18 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* __cpu_disable runs on the processor to be shutdown.
|
|
|
|
*/
|
|
|
|
int __cpu_disable(void)
|
|
|
|
{
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = op_cpu_disable(cpu);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2018-07-06 11:02:46 +00:00
|
|
|
remove_cpu_topology(cpu);
|
|
|
|
numa_remove_cpu(cpu);
|
|
|
|
|
2013-10-24 19:30:18 +00:00
|
|
|
/*
|
|
|
|
* Take this CPU offline. Once we clear this, we can't return,
|
|
|
|
* and we must not schedule until we're ready to give up the cpu.
|
|
|
|
*/
|
|
|
|
set_cpu_online(cpu, false);
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
ipi_teardown(cpu);
|
2013-10-24 19:30:18 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* OK - migrate IRQs away from this CPU
|
|
|
|
*/
|
2015-09-24 09:32:14 +00:00
|
|
|
irq_migrate_all_off_this_cpu();
|
|
|
|
|
2013-10-24 19:30:18 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-05-07 14:18:36 +00:00
|
|
|
static int op_cpu_kill(unsigned int cpu)
|
|
|
|
{
|
2020-03-18 23:01:44 +00:00
|
|
|
const struct cpu_operations *ops = get_cpu_ops(cpu);
|
|
|
|
|
2014-05-07 14:18:36 +00:00
|
|
|
/*
|
|
|
|
* If we have no means of synchronising with the dying CPU, then assume
|
|
|
|
* that it is really dead. We can only wait for an arbitrary length of
|
|
|
|
* time and hope that it's dead, so let's skip the wait and just hope.
|
|
|
|
*/
|
2020-03-18 23:01:44 +00:00
|
|
|
if (!ops->cpu_kill)
|
2015-04-20 16:55:30 +00:00
|
|
|
return 0;
|
2014-05-07 14:18:36 +00:00
|
|
|
|
2020-03-18 23:01:44 +00:00
|
|
|
return ops->cpu_kill(cpu);
|
2014-05-07 14:18:36 +00:00
|
|
|
}
|
|
|
|
|
2013-10-24 19:30:18 +00:00
|
|
|
/*
|
2023-05-12 21:07:33 +00:00
|
|
|
* Called on the thread which is asking for a CPU to be shutdown after the
|
|
|
|
* shutdown completed.
|
2013-10-24 19:30:18 +00:00
|
|
|
*/
|
2023-05-12 21:07:33 +00:00
|
|
|
void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
|
2013-10-24 19:30:18 +00:00
|
|
|
{
|
2015-04-20 16:55:30 +00:00
|
|
|
int err;
|
|
|
|
|
2021-06-17 07:30:59 +00:00
|
|
|
pr_debug("CPU%u: shutdown\n", cpu);
|
2014-05-07 14:18:36 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Now that the dying CPU is beyond the point of no return w.r.t.
|
|
|
|
* in-kernel synchronisation, try to get the firwmare to help us to
|
|
|
|
* verify that it has really left the kernel before we consider
|
|
|
|
* clobbering anything it might still be using.
|
|
|
|
*/
|
2015-04-20 16:55:30 +00:00
|
|
|
err = op_cpu_kill(cpu);
|
|
|
|
if (err)
|
2019-10-18 03:18:19 +00:00
|
|
|
pr_warn("CPU%d may not have shut down cleanly: %d\n", cpu, err);
|
2013-10-24 19:30:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Called from the idle thread for the CPU which has been shutdown.
|
|
|
|
*
|
|
|
|
*/
|
2023-02-16 18:42:01 +00:00
|
|
|
void __noreturn cpu_die(void)
|
2013-10-24 19:30:18 +00:00
|
|
|
{
|
|
|
|
unsigned int cpu = smp_processor_id();
|
2020-03-18 23:01:44 +00:00
|
|
|
const struct cpu_operations *ops = get_cpu_ops(cpu);
|
2013-10-24 19:30:18 +00:00
|
|
|
|
|
|
|
idle_task_exit();
|
|
|
|
|
2017-11-02 12:12:34 +00:00
|
|
|
local_daif_mask();
|
2013-10-24 19:30:18 +00:00
|
|
|
|
2023-05-12 21:07:33 +00:00
|
|
|
/* Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose of */
|
|
|
|
cpuhp_ap_report_dead();
|
2013-10-24 19:30:18 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Actually shutdown the CPU. This must never fail. The specific hotplug
|
|
|
|
* mechanism must perform all required cache maintenance to ensure that
|
|
|
|
* no dirty lines are lost in the process of shutting down the CPU.
|
|
|
|
*/
|
2020-03-18 23:01:44 +00:00
|
|
|
ops->cpu_die(cpu);
|
2013-10-24 19:30:18 +00:00
|
|
|
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2020-03-18 23:01:44 +00:00
|
|
|
static void __cpu_try_die(int cpu)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
const struct cpu_operations *ops = get_cpu_ops(cpu);
|
|
|
|
|
|
|
|
if (ops && ops->cpu_die)
|
|
|
|
ops->cpu_die(cpu);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-02-23 10:31:41 +00:00
|
|
|
/*
|
|
|
|
* Kill the calling secondary CPU, early in bringup before it is turned
|
|
|
|
* online.
|
|
|
|
*/
|
2023-04-12 23:49:34 +00:00
|
|
|
void __noreturn cpu_die_early(void)
|
2016-02-23 10:31:41 +00:00
|
|
|
{
|
|
|
|
int cpu = smp_processor_id();
|
|
|
|
|
|
|
|
pr_crit("CPU%d: will not boot\n", cpu);
|
|
|
|
|
|
|
|
/* Mark this CPU absent */
|
|
|
|
set_cpu_present(cpu, 0);
|
2023-09-08 20:36:01 +00:00
|
|
|
rcutree_report_cpu_dead();
|
2016-02-23 10:31:41 +00:00
|
|
|
|
2020-03-18 23:01:44 +00:00
|
|
|
if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
|
|
|
|
update_cpu_boot_status(CPU_KILL_ME);
|
|
|
|
__cpu_try_die(cpu);
|
|
|
|
}
|
|
|
|
|
2016-02-23 10:31:42 +00:00
|
|
|
update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
|
2016-02-23 10:31:41 +00:00
|
|
|
|
|
|
|
cpu_park_loop();
|
|
|
|
}
|
|
|
|
|
2015-07-29 11:07:57 +00:00
|
|
|
static void __init hyp_mode_check(void)
|
|
|
|
{
|
|
|
|
if (is_hyp_mode_available())
|
|
|
|
pr_info("CPU: All CPU(s) started at EL2\n");
|
|
|
|
else if (is_hyp_mode_mismatched())
|
|
|
|
WARN_TAINT(1, TAINT_CPU_OUT_OF_SPEC,
|
|
|
|
"CPU: CPUs started in inconsistent modes");
|
|
|
|
else
|
|
|
|
pr_info("CPU: All CPU(s) started at EL1\n");
|
2021-01-05 18:05:38 +00:00
|
|
|
if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) {
|
2019-11-28 19:58:05 +00:00
|
|
|
kvm_compute_layout();
|
2021-01-05 18:05:38 +00:00
|
|
|
kvm_apply_hyp_relocations();
|
|
|
|
}
|
2015-07-29 11:07:57 +00:00
|
|
|
}
|
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
void __init smp_cpus_done(unsigned int max_cpus)
|
|
|
|
{
|
2013-08-30 17:06:48 +00:00
|
|
|
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
|
2015-07-29 11:07:57 +00:00
|
|
|
hyp_mode_check();
|
arm64: Cleanup system cpucap handling
Recent changes to remove cpus_have_const_cap() introduced new users of
cpus_have_cap() in the period between detecting system cpucaps and
patching alternatives. It would be preferable to defer these until after
the relevant cpucaps have been patched so that these can use the usual
feature check helper functions, which is clearer and has less risk of
accidental usage of code relying upon an alternative which has not yet
been patched.
This patch reworks the system-wide cpucap detection and patching to
minimize this transient period:
* The detection, enablement, and patching of system cpucaps is moved
into a new setup_system_capabilities() function so that these can be
grouped together more clearly, with no other functions called in the
period between detection and patching. This is called from
setup_system_features() before the subsequent checks that depend on
the cpucaps.
The logging of TTBR0 PAN and cpucaps with a mask is also moved here to
keep these as close as possible to update_cpu_capabilities().
At the same time, comments are corrected and improved to make the
intent clearer.
* As hyp_mode_check() only tests system register values (not hwcaps) and
must be called prior to patching, the call to hyp_mode_check() is
moved before the call to setup_system_features().
* In setup_system_features(), the use of system_uses_ttbr0_pan() is
restored, now that this occurs after alternatives are patched. This is
a partial revert of commit:
53d62e995d9eaed1 ("arm64: Avoid cpus_have_const_cap() for ARM64_HAS_PAN")
* In sve_setup() and sme_setup(), the use of system_supports_sve() and
system_supports_sme() respectively are restored, now that these occur
after alternatives are patched. This is a partial revert of commit:
a76521d160284a1e ("arm64: Avoid cpus_have_const_cap() for ARM64_{SVE,SME,SME2,FA64}")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20231212170910.3745497-2-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2023-12-12 17:09:09 +00:00
|
|
|
setup_system_features();
|
2023-10-16 10:24:29 +00:00
|
|
|
setup_user_features();
|
2017-03-09 20:52:01 +00:00
|
|
|
mark_linear_text_alias_ro();
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void __init smp_prepare_boot_cpu(void)
|
|
|
|
{
|
2021-05-20 11:50:31 +00:00
|
|
|
/*
|
|
|
|
* The runtime per-cpu areas have been allocated by
|
|
|
|
* setup_per_cpu_areas(), and CPU0's boot time per-cpu area will be
|
|
|
|
* freed shortly, so we must move over to the runtime per-cpu area.
|
|
|
|
*/
|
2016-07-21 10:12:55 +00:00
|
|
|
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
|
2019-01-31 14:58:53 +00:00
|
|
|
|
2023-12-12 17:09:10 +00:00
|
|
|
cpuinfo_store_boot_cpu();
|
|
|
|
setup_boot_cpu_features();
|
2019-01-31 14:58:55 +00:00
|
|
|
|
|
|
|
/* Conditionally switch to GIC PMR for interrupt masking */
|
|
|
|
if (system_uses_irq_prio_masking())
|
|
|
|
init_gic_priority_masking();
|
2020-12-22 20:02:10 +00:00
|
|
|
|
|
|
|
kasan_init_hw_tags();
|
2024-08-14 09:09:53 +00:00
|
|
|
/* Init percpu seeds for random tags after cpus are set up. */
|
|
|
|
kasan_init_sw_tags();
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
2015-05-13 13:12:47 +00:00
|
|
|
/*
|
|
|
|
* Duplicate MPIDRs are a recipe for disaster. Scan all initialized
|
|
|
|
* entries and check for duplicates. If any is found just ignore the
|
|
|
|
* cpu. cpu_logical_map was initialized to INVALID_HWID to avoid
|
|
|
|
* matching valid MPIDR values.
|
|
|
|
*/
|
|
|
|
static bool __init is_mpidr_duplicate(unsigned int cpu, u64 hwid)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 1; (i < cpu) && (i < NR_CPUS); i++)
|
|
|
|
if (cpu_logical_map(i) == hwid)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-05-13 13:12:46 +00:00
|
|
|
/*
|
|
|
|
* Initialize cpu operations for a logical cpu and
|
|
|
|
* set it in the possible mask on success
|
|
|
|
*/
|
|
|
|
static int __init smp_cpu_setup(int cpu)
|
|
|
|
{
|
2020-03-18 23:01:44 +00:00
|
|
|
const struct cpu_operations *ops;
|
|
|
|
|
2020-03-18 23:01:43 +00:00
|
|
|
if (init_cpu_ops(cpu))
|
2015-05-13 13:12:46 +00:00
|
|
|
return -ENODEV;
|
|
|
|
|
2020-03-18 23:01:44 +00:00
|
|
|
ops = get_cpu_ops(cpu);
|
|
|
|
if (ops->cpu_init(cpu))
|
2015-05-13 13:12:46 +00:00
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
set_cpu_possible(cpu, true);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-05-13 13:12:47 +00:00
|
|
|
static bool bootcpu_valid __initdata;
|
|
|
|
static unsigned int cpu_count = 1;
|
|
|
|
|
2024-05-29 13:34:43 +00:00
|
|
|
int arch_register_cpu(int cpu)
|
|
|
|
{
|
|
|
|
acpi_handle acpi_handle = acpi_get_processor_handle(cpu);
|
|
|
|
struct cpu *c = &per_cpu(cpu_devices, cpu);
|
|
|
|
|
|
|
|
if (!acpi_disabled && !acpi_handle &&
|
|
|
|
IS_ENABLED(CONFIG_ACPI_HOTPLUG_CPU))
|
|
|
|
return -EPROBE_DEFER;
|
|
|
|
|
|
|
|
#ifdef CONFIG_ACPI_HOTPLUG_CPU
|
|
|
|
/* For now block anything that looks like physical CPU Hotplug */
|
|
|
|
if (invalid_logical_cpuid(cpu) || !cpu_present(cpu)) {
|
|
|
|
pr_err_once("Changing CPU present bit is not supported\n");
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Availability of the acpi handle is sufficient to establish
|
|
|
|
* that _STA has aleady been checked. No need to recheck here.
|
|
|
|
*/
|
|
|
|
c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
|
|
|
|
|
|
|
|
return register_cpu(c, cpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_ACPI_HOTPLUG_CPU
|
|
|
|
void arch_unregister_cpu(int cpu)
|
|
|
|
{
|
|
|
|
acpi_handle acpi_handle = acpi_get_processor_handle(cpu);
|
|
|
|
struct cpu *c = &per_cpu(cpu_devices, cpu);
|
|
|
|
acpi_status status;
|
|
|
|
unsigned long long sta;
|
|
|
|
|
|
|
|
if (!acpi_handle) {
|
|
|
|
pr_err_once("Removing a CPU without associated ACPI handle\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
status = acpi_evaluate_integer(acpi_handle, "_STA", NULL, &sta);
|
|
|
|
if (ACPI_FAILURE(status))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* For now do not allow anything that looks like physical CPU HP */
|
|
|
|
if (cpu_present(cpu) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
|
|
|
|
pr_err_once("Changing CPU present bit is not supported\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
unregister_cpu(c);
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_ACPI_HOTPLUG_CPU */
|
|
|
|
|
2015-05-13 13:12:47 +00:00
|
|
|
#ifdef CONFIG_ACPI
|
2017-04-11 08:39:54 +00:00
|
|
|
static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS];
|
|
|
|
|
|
|
|
struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
|
|
|
|
{
|
|
|
|
return &cpu_madt_gicc[cpu];
|
|
|
|
}
|
cpufreq: CPPC: Add per_cpu efficiency_class
In ACPI, describing power efficiency of CPUs can be done through the
following arm specific field:
ACPI 6.4, s5.2.12.14 'GIC CPU Interface (GICC) Structure',
'Processor Power Efficiency Class field':
Describes the relative power efficiency of the associated pro-
cessor. Lower efficiency class numbers are more efficient than
higher ones (e.g. efficiency class 0 should be treated as more
efficient than efficiency class 1). However, absolute values
of this number have no meaning: 2 isn’t necessarily half as
efficient as 1.
The efficiency_class field is stored in the GicC structure of the
ACPI MADT table and it's currently supported in Linux for arm64 only.
Thus, this new functionality is introduced for arm64 only.
To allow the cppc_cpufreq driver to know and preprocess the
efficiency_class values of all the CPUs, add a per_cpu efficiency_class
variable to store them.
At least 2 different efficiency classes must be present,
otherwise there is no use in creating an Energy Model.
The efficiency_class values are squeezed in [0:#efficiency_class-1]
while conserving the order. For instance, efficiency classes of:
[111, 212, 250]
will be mapped to:
[0 (was 111), 1 (was 212), 2 (was 250)].
Each policy being independently registered in the driver, populating
the per_cpu efficiency_class is done only once at the driver
initialization. This prevents from having each policy re-searching the
efficiency_class values of other CPUs. The EM will be registered in a
following patch.
The patch also exports acpi_cpu_get_madt_gicc() to fetch the GicC
structure of the ACPI MADT table for each CPU.
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Pierre Gondois <Pierre.Gondois@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2022-04-25 12:38:07 +00:00
|
|
|
EXPORT_SYMBOL_GPL(acpi_cpu_get_madt_gicc);
|
2017-04-11 08:39:54 +00:00
|
|
|
|
2015-05-13 13:12:47 +00:00
|
|
|
/*
|
|
|
|
* acpi_map_gic_cpu_interface - parse processor MADT entry
|
|
|
|
*
|
|
|
|
* Carry out sanity checks on MADT processor entry and initialize
|
|
|
|
* cpu_logical_map on success
|
|
|
|
*/
|
|
|
|
static void __init
|
|
|
|
acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
|
|
|
|
{
|
|
|
|
u64 hwid = processor->arm_mpidr;
|
|
|
|
|
2024-05-29 13:34:41 +00:00
|
|
|
if (!(processor->flags &
|
|
|
|
(ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) {
|
2015-07-03 07:29:06 +00:00
|
|
|
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
|
2015-05-13 13:12:47 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-07-03 07:29:06 +00:00
|
|
|
if (hwid & ~MPIDR_HWID_BITMASK || hwid == INVALID_HWID) {
|
|
|
|
pr_err("skipping CPU entry with invalid MPIDR 0x%llx\n", hwid);
|
2015-05-13 13:12:47 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_mpidr_duplicate(cpu_count, hwid)) {
|
|
|
|
pr_err("duplicate CPU MPIDR 0x%llx in MADT\n", hwid);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if GICC structure of boot CPU is available in the MADT */
|
|
|
|
if (cpu_logical_map(0) == hwid) {
|
|
|
|
if (bootcpu_valid) {
|
|
|
|
pr_err("duplicate boot CPU MPIDR: 0x%llx in MADT\n",
|
|
|
|
hwid);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
bootcpu_valid = true;
|
2017-04-11 08:39:54 +00:00
|
|
|
cpu_madt_gicc[0] = *processor;
|
2015-05-13 13:12:47 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cpu_count >= NR_CPUS)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* map the logical cpu id to cpu MPIDR */
|
2020-07-27 15:29:38 +00:00
|
|
|
set_cpu_logical_map(cpu_count, hwid);
|
2015-05-13 13:12:47 +00:00
|
|
|
|
2017-04-11 08:39:54 +00:00
|
|
|
cpu_madt_gicc[cpu_count] = *processor;
|
|
|
|
|
2016-01-26 11:10:38 +00:00
|
|
|
/*
|
|
|
|
* Set-up the ACPI parking protocol cpu entries
|
|
|
|
* while initializing the cpu_logical_map to
|
|
|
|
* avoid parsing MADT entries multiple times for
|
|
|
|
* nothing (ie a valid cpu_logical_map entry should
|
|
|
|
* contain a valid parking protocol data set to
|
|
|
|
* initialize the cpu if the parking protocol is
|
|
|
|
* the only available enable method).
|
|
|
|
*/
|
|
|
|
acpi_set_mailbox_entry(cpu_count, processor);
|
|
|
|
|
2015-05-13 13:12:47 +00:00
|
|
|
cpu_count++;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init
|
2019-03-11 20:55:57 +00:00
|
|
|
acpi_parse_gic_cpu_interface(union acpi_subtable_headers *header,
|
2015-05-13 13:12:47 +00:00
|
|
|
const unsigned long end)
|
|
|
|
{
|
|
|
|
struct acpi_madt_generic_interrupt *processor;
|
|
|
|
|
|
|
|
processor = (struct acpi_madt_generic_interrupt *)header;
|
2015-07-06 23:16:48 +00:00
|
|
|
if (BAD_MADT_GICC_ENTRY(processor, end))
|
2015-05-13 13:12:47 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
2019-03-11 20:55:57 +00:00
|
|
|
acpi_table_print_madt_entry(&header->common);
|
2015-05-13 13:12:47 +00:00
|
|
|
|
|
|
|
acpi_map_gic_cpu_interface(processor);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2018-06-25 13:05:52 +00:00
|
|
|
|
|
|
|
static void __init acpi_parse_and_init_cpus(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* do a walk of MADT to determine how many CPUs
|
|
|
|
* we have including disabled CPUs, and get information
|
|
|
|
* we need for SMP init.
|
|
|
|
*/
|
|
|
|
acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
|
|
|
|
acpi_parse_gic_cpu_interface, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In ACPI, SMP and CPU NUMA information is provided in separate
|
|
|
|
* static tables, namely the MADT and the SRAT.
|
|
|
|
*
|
|
|
|
* Thus, it is simpler to first create the cpu logical map through
|
|
|
|
* an MADT walk and then map the logical cpus to their node ids
|
|
|
|
* as separate steps.
|
|
|
|
*/
|
|
|
|
acpi_map_cpus_to_nodes();
|
|
|
|
|
|
|
|
for (i = 0; i < nr_cpu_ids; i++)
|
|
|
|
early_map_cpu_to_node(i, acpi_numa_get_nid(i));
|
|
|
|
}
|
2015-05-13 13:12:47 +00:00
|
|
|
#else
|
2018-06-25 13:05:52 +00:00
|
|
|
#define acpi_parse_and_init_cpus(...) do { } while (0)
|
2015-05-13 13:12:47 +00:00
|
|
|
#endif
|
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
/*
|
2012-08-29 08:47:19 +00:00
|
|
|
* Enumerate the possible CPU set from the device tree and build the
|
|
|
|
* cpu logical map array containing MPIDR values related to logical
|
|
|
|
* cpus. Assumes that cpu_logical_map(0) has already been initialized.
|
2012-03-05 11:49:30 +00:00
|
|
|
*/
|
2015-11-12 12:04:42 +00:00
|
|
|
static void __init of_parse_and_init_cpus(void)
|
2012-03-05 11:49:30 +00:00
|
|
|
{
|
2017-02-01 23:01:05 +00:00
|
|
|
struct device_node *dn;
|
2012-03-05 11:49:30 +00:00
|
|
|
|
2018-08-27 14:43:01 +00:00
|
|
|
for_each_of_cpu_node(dn) {
|
2021-10-06 16:43:24 +00:00
|
|
|
u64 hwid = of_get_cpu_hwid(dn, 0);
|
2012-08-29 08:47:19 +00:00
|
|
|
|
2021-10-06 16:43:24 +00:00
|
|
|
if (hwid & ~MPIDR_HWID_BITMASK)
|
2012-08-29 08:47:19 +00:00
|
|
|
goto next;
|
|
|
|
|
2015-05-13 13:12:47 +00:00
|
|
|
if (is_mpidr_duplicate(cpu_count, hwid)) {
|
2017-07-18 21:42:42 +00:00
|
|
|
pr_err("%pOF: duplicate cpu reg properties in the DT\n",
|
|
|
|
dn);
|
2012-08-29 08:47:19 +00:00
|
|
|
goto next;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The numbering scheme requires that the boot CPU
|
|
|
|
* must be assigned logical id 0. Record it so that
|
|
|
|
* the logical map built from DT is validated and can
|
|
|
|
* be used.
|
|
|
|
*/
|
|
|
|
if (hwid == cpu_logical_map(0)) {
|
|
|
|
if (bootcpu_valid) {
|
2017-07-18 21:42:42 +00:00
|
|
|
pr_err("%pOF: duplicate boot cpu reg property in DT\n",
|
|
|
|
dn);
|
2012-08-29 08:47:19 +00:00
|
|
|
goto next;
|
|
|
|
}
|
|
|
|
|
|
|
|
bootcpu_valid = true;
|
2016-09-01 06:55:04 +00:00
|
|
|
early_map_cpu_to_node(0, of_node_to_nid(dn));
|
2012-08-29 08:47:19 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* cpu_logical_map has already been
|
|
|
|
* initialized and the boot cpu doesn't need
|
|
|
|
* the enable-method so continue without
|
|
|
|
* incrementing cpu.
|
|
|
|
*/
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-05-13 13:12:47 +00:00
|
|
|
if (cpu_count >= NR_CPUS)
|
2012-03-05 11:49:30 +00:00
|
|
|
goto next;
|
|
|
|
|
2012-08-29 08:47:19 +00:00
|
|
|
pr_debug("cpu logical map 0x%llx\n", hwid);
|
2020-07-27 15:29:38 +00:00
|
|
|
set_cpu_logical_map(cpu_count, hwid);
|
2016-04-08 22:50:27 +00:00
|
|
|
|
|
|
|
early_map_cpu_to_node(cpu_count, of_node_to_nid(dn));
|
2012-03-05 11:49:30 +00:00
|
|
|
next:
|
2015-05-13 13:12:47 +00:00
|
|
|
cpu_count++;
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
2015-05-13 13:12:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Enumerate the possible CPU set from the device tree or ACPI and build the
|
|
|
|
* cpu logical map array containing MPIDR values related to logical
|
|
|
|
* cpus. Assumes that cpu_logical_map(0) has already been initialized.
|
|
|
|
*/
|
|
|
|
void __init smp_init_cpus(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (acpi_disabled)
|
|
|
|
of_parse_and_init_cpus();
|
|
|
|
else
|
2018-06-25 13:05:52 +00:00
|
|
|
acpi_parse_and_init_cpus();
|
2012-03-05 11:49:30 +00:00
|
|
|
|
2016-08-09 02:30:49 +00:00
|
|
|
if (cpu_count > nr_cpu_ids)
|
2017-09-08 23:14:18 +00:00
|
|
|
pr_warn("Number of cores (%d) exceeds configured maximum of %u - clipping\n",
|
2016-08-09 02:30:49 +00:00
|
|
|
cpu_count, nr_cpu_ids);
|
2012-08-29 08:47:19 +00:00
|
|
|
|
|
|
|
if (!bootcpu_valid) {
|
2015-05-13 13:12:47 +00:00
|
|
|
pr_err("missing boot CPU MPIDR, not enabling secondaries\n");
|
2012-08-29 08:47:19 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-05-13 13:12:46 +00:00
|
|
|
* We need to set the cpu_logical_map entries before enabling
|
|
|
|
* the cpus so that cpu processor description entries (DT cpu nodes
|
|
|
|
* and ACPI MADT entries) can be retrieved by matching the cpu hwid
|
|
|
|
* with entries in cpu_logical_map while initializing the cpus.
|
|
|
|
* If the cpu set-up fails, invalidate the cpu_logical_map entry.
|
2012-08-29 08:47:19 +00:00
|
|
|
*/
|
2016-08-09 02:30:49 +00:00
|
|
|
for (i = 1; i < nr_cpu_ids; i++) {
|
2015-05-13 13:12:46 +00:00
|
|
|
if (cpu_logical_map(i) != INVALID_HWID) {
|
|
|
|
if (smp_cpu_setup(i))
|
2020-07-27 15:29:38 +00:00
|
|
|
set_cpu_logical_map(i, INVALID_HWID);
|
2015-05-13 13:12:46 +00:00
|
|
|
}
|
|
|
|
}
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void __init smp_prepare_cpus(unsigned int max_cpus)
|
|
|
|
{
|
2020-03-18 23:01:44 +00:00
|
|
|
const struct cpu_operations *ops;
|
2013-10-24 19:30:15 +00:00
|
|
|
int err;
|
2016-04-22 11:25:35 +00:00
|
|
|
unsigned int cpu;
|
arm64: Call numa_store_cpu_info() earlier.
The wq_numa_init() function makes a private CPU to node map by calling
cpu_to_node() early in the boot process, before the non-boot CPUs are
brought online. Since the default implementation of cpu_to_node()
returns zero for CPUs that have never been brought online, the
workqueue system's view is that *all* CPUs are on node zero.
When the unbound workqueue for a non-zero node is created, the
tsk_cpus_allowed() for the worker threads is the empty set because
there are, in the view of the workqueue system, no CPUs on non-zero
nodes. The code in try_to_wake_up() using this empty cpumask ends up
using the cpumask empty set value of NR_CPUS as an index into the
per-CPU area pointer array, and gets garbage as it is one past the end
of the array. This results in:
[ 0.881970] Unable to handle kernel paging request at virtual address fffffb1008b926a4
[ 1.970095] pgd = fffffc00094b0000
[ 1.973530] [fffffb1008b926a4] *pgd=0000000000000000, *pud=0000000000000000, *pmd=0000000000000000
[ 1.982610] Internal error: Oops: 96000004 [#1] SMP
[ 1.987541] Modules linked in:
[ 1.990631] CPU: 48 PID: 295 Comm: cpuhp/48 Tainted: G W 4.8.0-rc6-preempt-vol+ #9
[ 1.999435] Hardware name: Cavium ThunderX CN88XX board (DT)
[ 2.005159] task: fffffe0fe89cc300 task.stack: fffffe0fe8b8c000
[ 2.011158] PC is at try_to_wake_up+0x194/0x34c
[ 2.015737] LR is at try_to_wake_up+0x150/0x34c
[ 2.020318] pc : [<fffffc00080e7468>] lr : [<fffffc00080e7424>] pstate: 600000c5
[ 2.027803] sp : fffffe0fe8b8fb10
[ 2.031149] x29: fffffe0fe8b8fb10 x28: 0000000000000000
[ 2.036522] x27: fffffc0008c63bc8 x26: 0000000000001000
[ 2.041896] x25: fffffc0008c63c80 x24: fffffc0008bfb200
[ 2.047270] x23: 00000000000000c0 x22: 0000000000000004
[ 2.052642] x21: fffffe0fe89d25bc x20: 0000000000001000
[ 2.058014] x19: fffffe0fe89d1d00 x18: 0000000000000000
[ 2.063386] x17: 0000000000000000 x16: 0000000000000000
[ 2.068760] x15: 0000000000000018 x14: 0000000000000000
[ 2.074133] x13: 0000000000000000 x12: 0000000000000000
[ 2.079505] x11: 0000000000000000 x10: 0000000000000000
[ 2.084879] x9 : 0000000000000000 x8 : 0000000000000000
[ 2.090251] x7 : 0000000000000040 x6 : 0000000000000000
[ 2.095621] x5 : ffffffffffffffff x4 : 0000000000000000
[ 2.100991] x3 : 0000000000000000 x2 : 0000000000000000
[ 2.106364] x1 : fffffc0008be4c24 x0 : ffffff0ffffada80
[ 2.111737]
[ 2.113236] Process cpuhp/48 (pid: 295, stack limit = 0xfffffe0fe8b8c020)
[ 2.120102] Stack: (0xfffffe0fe8b8fb10 to 0xfffffe0fe8b90000)
[ 2.125914] fb00: fffffe0fe8b8fb80 fffffc00080e7648
.
.
.
[ 2.442859] Call trace:
[ 2.445327] Exception stack(0xfffffe0fe8b8f940 to 0xfffffe0fe8b8fa70)
[ 2.451843] f940: fffffe0fe89d1d00 0000040000000000 fffffe0fe8b8fb10 fffffc00080e7468
[ 2.459767] f960: fffffe0fe8b8f980 fffffc00080e4958 ffffff0ff91ab200 fffffc00080e4b64
[ 2.467690] f980: fffffe0fe8b8f9d0 fffffc00080e515c fffffe0fe8b8fa80 0000000000000000
[ 2.475614] f9a0: fffffe0fe8b8f9d0 fffffc00080e58e4 fffffe0fe8b8fa80 0000000000000000
[ 2.483540] f9c0: fffffe0fe8d10000 0000000000000040 fffffe0fe8b8fa50 fffffc00080e5ac4
[ 2.491465] f9e0: ffffff0ffffada80 fffffc0008be4c24 0000000000000000 0000000000000000
[ 2.499387] fa00: 0000000000000000 ffffffffffffffff 0000000000000000 0000000000000040
[ 2.507309] fa20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 2.515233] fa40: 0000000000000000 0000000000000000 0000000000000000 0000000000000018
[ 2.523156] fa60: 0000000000000000 0000000000000000
[ 2.528089] [<fffffc00080e7468>] try_to_wake_up+0x194/0x34c
[ 2.533723] [<fffffc00080e7648>] wake_up_process+0x28/0x34
[ 2.539275] [<fffffc00080d3764>] create_worker+0x110/0x19c
[ 2.544824] [<fffffc00080d69dc>] alloc_unbound_pwq+0x3cc/0x4b0
[ 2.550724] [<fffffc00080d6bcc>] wq_update_unbound_numa+0x10c/0x1e4
[ 2.557066] [<fffffc00080d7d78>] workqueue_online_cpu+0x220/0x28c
[ 2.563234] [<fffffc00080bd288>] cpuhp_invoke_callback+0x6c/0x168
[ 2.569398] [<fffffc00080bdf74>] cpuhp_up_callbacks+0x44/0xe4
[ 2.575210] [<fffffc00080be194>] cpuhp_thread_fun+0x13c/0x148
[ 2.581027] [<fffffc00080dfbac>] smpboot_thread_fn+0x19c/0x1a8
[ 2.586929] [<fffffc00080dbd64>] kthread+0xdc/0xf0
[ 2.591776] [<fffffc0008083380>] ret_from_fork+0x10/0x50
[ 2.597147] Code: b00057e1 91304021 91005021 b8626822 (b8606821)
[ 2.603464] ---[ end trace 58c0cd36b88802bc ]---
[ 2.608138] Kernel panic - not syncing: Fatal exception
Fix by moving call to numa_store_cpu_info() for all CPUs into
smp_prepare_cpus(), which happens before wq_numa_init(). Since
smp_store_cpu_info() now contains only a single function call,
simplify by removing the function and out-lining its contents.
Suggested-by: Robert Richter <rric@kernel.org>
Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.")
Cc: <stable@vger.kernel.org> # 4.7.x-
Signed-off-by: David Daney <david.daney@cavium.com>
Reviewed-by: Robert Richter <rrichter@cavium.com>
Tested-by: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-09-20 18:46:35 +00:00
|
|
|
unsigned int this_cpu;
|
2012-03-05 11:49:30 +00:00
|
|
|
|
2014-03-04 07:51:17 +00:00
|
|
|
init_cpu_topology();
|
|
|
|
|
arm64: Call numa_store_cpu_info() earlier.
The wq_numa_init() function makes a private CPU to node map by calling
cpu_to_node() early in the boot process, before the non-boot CPUs are
brought online. Since the default implementation of cpu_to_node()
returns zero for CPUs that have never been brought online, the
workqueue system's view is that *all* CPUs are on node zero.
When the unbound workqueue for a non-zero node is created, the
tsk_cpus_allowed() for the worker threads is the empty set because
there are, in the view of the workqueue system, no CPUs on non-zero
nodes. The code in try_to_wake_up() using this empty cpumask ends up
using the cpumask empty set value of NR_CPUS as an index into the
per-CPU area pointer array, and gets garbage as it is one past the end
of the array. This results in:
[ 0.881970] Unable to handle kernel paging request at virtual address fffffb1008b926a4
[ 1.970095] pgd = fffffc00094b0000
[ 1.973530] [fffffb1008b926a4] *pgd=0000000000000000, *pud=0000000000000000, *pmd=0000000000000000
[ 1.982610] Internal error: Oops: 96000004 [#1] SMP
[ 1.987541] Modules linked in:
[ 1.990631] CPU: 48 PID: 295 Comm: cpuhp/48 Tainted: G W 4.8.0-rc6-preempt-vol+ #9
[ 1.999435] Hardware name: Cavium ThunderX CN88XX board (DT)
[ 2.005159] task: fffffe0fe89cc300 task.stack: fffffe0fe8b8c000
[ 2.011158] PC is at try_to_wake_up+0x194/0x34c
[ 2.015737] LR is at try_to_wake_up+0x150/0x34c
[ 2.020318] pc : [<fffffc00080e7468>] lr : [<fffffc00080e7424>] pstate: 600000c5
[ 2.027803] sp : fffffe0fe8b8fb10
[ 2.031149] x29: fffffe0fe8b8fb10 x28: 0000000000000000
[ 2.036522] x27: fffffc0008c63bc8 x26: 0000000000001000
[ 2.041896] x25: fffffc0008c63c80 x24: fffffc0008bfb200
[ 2.047270] x23: 00000000000000c0 x22: 0000000000000004
[ 2.052642] x21: fffffe0fe89d25bc x20: 0000000000001000
[ 2.058014] x19: fffffe0fe89d1d00 x18: 0000000000000000
[ 2.063386] x17: 0000000000000000 x16: 0000000000000000
[ 2.068760] x15: 0000000000000018 x14: 0000000000000000
[ 2.074133] x13: 0000000000000000 x12: 0000000000000000
[ 2.079505] x11: 0000000000000000 x10: 0000000000000000
[ 2.084879] x9 : 0000000000000000 x8 : 0000000000000000
[ 2.090251] x7 : 0000000000000040 x6 : 0000000000000000
[ 2.095621] x5 : ffffffffffffffff x4 : 0000000000000000
[ 2.100991] x3 : 0000000000000000 x2 : 0000000000000000
[ 2.106364] x1 : fffffc0008be4c24 x0 : ffffff0ffffada80
[ 2.111737]
[ 2.113236] Process cpuhp/48 (pid: 295, stack limit = 0xfffffe0fe8b8c020)
[ 2.120102] Stack: (0xfffffe0fe8b8fb10 to 0xfffffe0fe8b90000)
[ 2.125914] fb00: fffffe0fe8b8fb80 fffffc00080e7648
.
.
.
[ 2.442859] Call trace:
[ 2.445327] Exception stack(0xfffffe0fe8b8f940 to 0xfffffe0fe8b8fa70)
[ 2.451843] f940: fffffe0fe89d1d00 0000040000000000 fffffe0fe8b8fb10 fffffc00080e7468
[ 2.459767] f960: fffffe0fe8b8f980 fffffc00080e4958 ffffff0ff91ab200 fffffc00080e4b64
[ 2.467690] f980: fffffe0fe8b8f9d0 fffffc00080e515c fffffe0fe8b8fa80 0000000000000000
[ 2.475614] f9a0: fffffe0fe8b8f9d0 fffffc00080e58e4 fffffe0fe8b8fa80 0000000000000000
[ 2.483540] f9c0: fffffe0fe8d10000 0000000000000040 fffffe0fe8b8fa50 fffffc00080e5ac4
[ 2.491465] f9e0: ffffff0ffffada80 fffffc0008be4c24 0000000000000000 0000000000000000
[ 2.499387] fa00: 0000000000000000 ffffffffffffffff 0000000000000000 0000000000000040
[ 2.507309] fa20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 2.515233] fa40: 0000000000000000 0000000000000000 0000000000000000 0000000000000018
[ 2.523156] fa60: 0000000000000000 0000000000000000
[ 2.528089] [<fffffc00080e7468>] try_to_wake_up+0x194/0x34c
[ 2.533723] [<fffffc00080e7648>] wake_up_process+0x28/0x34
[ 2.539275] [<fffffc00080d3764>] create_worker+0x110/0x19c
[ 2.544824] [<fffffc00080d69dc>] alloc_unbound_pwq+0x3cc/0x4b0
[ 2.550724] [<fffffc00080d6bcc>] wq_update_unbound_numa+0x10c/0x1e4
[ 2.557066] [<fffffc00080d7d78>] workqueue_online_cpu+0x220/0x28c
[ 2.563234] [<fffffc00080bd288>] cpuhp_invoke_callback+0x6c/0x168
[ 2.569398] [<fffffc00080bdf74>] cpuhp_up_callbacks+0x44/0xe4
[ 2.575210] [<fffffc00080be194>] cpuhp_thread_fun+0x13c/0x148
[ 2.581027] [<fffffc00080dfbac>] smpboot_thread_fn+0x19c/0x1a8
[ 2.586929] [<fffffc00080dbd64>] kthread+0xdc/0xf0
[ 2.591776] [<fffffc0008083380>] ret_from_fork+0x10/0x50
[ 2.597147] Code: b00057e1 91304021 91005021 b8626822 (b8606821)
[ 2.603464] ---[ end trace 58c0cd36b88802bc ]---
[ 2.608138] Kernel panic - not syncing: Fatal exception
Fix by moving call to numa_store_cpu_info() for all CPUs into
smp_prepare_cpus(), which happens before wq_numa_init(). Since
smp_store_cpu_info() now contains only a single function call,
simplify by removing the function and out-lining its contents.
Suggested-by: Robert Richter <rric@kernel.org>
Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.")
Cc: <stable@vger.kernel.org> # 4.7.x-
Signed-off-by: David Daney <david.daney@cavium.com>
Reviewed-by: Robert Richter <rrichter@cavium.com>
Tested-by: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-09-20 18:46:35 +00:00
|
|
|
this_cpu = smp_processor_id();
|
|
|
|
store_cpu_topology(this_cpu);
|
|
|
|
numa_store_cpu_info(this_cpu);
|
2018-07-06 11:02:43 +00:00
|
|
|
numa_add_cpu(this_cpu);
|
2014-03-04 07:51:17 +00:00
|
|
|
|
2016-07-21 10:15:27 +00:00
|
|
|
/*
|
|
|
|
* If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set
|
|
|
|
* secondary CPUs present.
|
|
|
|
*/
|
|
|
|
if (max_cpus == 0)
|
|
|
|
return;
|
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
/*
|
|
|
|
* Initialise the present map (which describes the set of CPUs
|
|
|
|
* actually populated at the present time) and release the
|
|
|
|
* secondaries from the bootloader.
|
|
|
|
*/
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
|
2013-01-02 15:24:22 +00:00
|
|
|
if (cpu == smp_processor_id())
|
|
|
|
continue;
|
|
|
|
|
2020-03-18 23:01:44 +00:00
|
|
|
ops = get_cpu_ops(cpu);
|
|
|
|
if (!ops)
|
2012-03-05 11:49:30 +00:00
|
|
|
continue;
|
|
|
|
|
2020-03-18 23:01:44 +00:00
|
|
|
err = ops->cpu_prepare(cpu);
|
2013-01-02 15:24:22 +00:00
|
|
|
if (err)
|
|
|
|
continue;
|
2012-03-05 11:49:30 +00:00
|
|
|
|
|
|
|
set_cpu_present(cpu, true);
|
arm64: Call numa_store_cpu_info() earlier.
The wq_numa_init() function makes a private CPU to node map by calling
cpu_to_node() early in the boot process, before the non-boot CPUs are
brought online. Since the default implementation of cpu_to_node()
returns zero for CPUs that have never been brought online, the
workqueue system's view is that *all* CPUs are on node zero.
When the unbound workqueue for a non-zero node is created, the
tsk_cpus_allowed() for the worker threads is the empty set because
there are, in the view of the workqueue system, no CPUs on non-zero
nodes. The code in try_to_wake_up() using this empty cpumask ends up
using the cpumask empty set value of NR_CPUS as an index into the
per-CPU area pointer array, and gets garbage as it is one past the end
of the array. This results in:
[ 0.881970] Unable to handle kernel paging request at virtual address fffffb1008b926a4
[ 1.970095] pgd = fffffc00094b0000
[ 1.973530] [fffffb1008b926a4] *pgd=0000000000000000, *pud=0000000000000000, *pmd=0000000000000000
[ 1.982610] Internal error: Oops: 96000004 [#1] SMP
[ 1.987541] Modules linked in:
[ 1.990631] CPU: 48 PID: 295 Comm: cpuhp/48 Tainted: G W 4.8.0-rc6-preempt-vol+ #9
[ 1.999435] Hardware name: Cavium ThunderX CN88XX board (DT)
[ 2.005159] task: fffffe0fe89cc300 task.stack: fffffe0fe8b8c000
[ 2.011158] PC is at try_to_wake_up+0x194/0x34c
[ 2.015737] LR is at try_to_wake_up+0x150/0x34c
[ 2.020318] pc : [<fffffc00080e7468>] lr : [<fffffc00080e7424>] pstate: 600000c5
[ 2.027803] sp : fffffe0fe8b8fb10
[ 2.031149] x29: fffffe0fe8b8fb10 x28: 0000000000000000
[ 2.036522] x27: fffffc0008c63bc8 x26: 0000000000001000
[ 2.041896] x25: fffffc0008c63c80 x24: fffffc0008bfb200
[ 2.047270] x23: 00000000000000c0 x22: 0000000000000004
[ 2.052642] x21: fffffe0fe89d25bc x20: 0000000000001000
[ 2.058014] x19: fffffe0fe89d1d00 x18: 0000000000000000
[ 2.063386] x17: 0000000000000000 x16: 0000000000000000
[ 2.068760] x15: 0000000000000018 x14: 0000000000000000
[ 2.074133] x13: 0000000000000000 x12: 0000000000000000
[ 2.079505] x11: 0000000000000000 x10: 0000000000000000
[ 2.084879] x9 : 0000000000000000 x8 : 0000000000000000
[ 2.090251] x7 : 0000000000000040 x6 : 0000000000000000
[ 2.095621] x5 : ffffffffffffffff x4 : 0000000000000000
[ 2.100991] x3 : 0000000000000000 x2 : 0000000000000000
[ 2.106364] x1 : fffffc0008be4c24 x0 : ffffff0ffffada80
[ 2.111737]
[ 2.113236] Process cpuhp/48 (pid: 295, stack limit = 0xfffffe0fe8b8c020)
[ 2.120102] Stack: (0xfffffe0fe8b8fb10 to 0xfffffe0fe8b90000)
[ 2.125914] fb00: fffffe0fe8b8fb80 fffffc00080e7648
.
.
.
[ 2.442859] Call trace:
[ 2.445327] Exception stack(0xfffffe0fe8b8f940 to 0xfffffe0fe8b8fa70)
[ 2.451843] f940: fffffe0fe89d1d00 0000040000000000 fffffe0fe8b8fb10 fffffc00080e7468
[ 2.459767] f960: fffffe0fe8b8f980 fffffc00080e4958 ffffff0ff91ab200 fffffc00080e4b64
[ 2.467690] f980: fffffe0fe8b8f9d0 fffffc00080e515c fffffe0fe8b8fa80 0000000000000000
[ 2.475614] f9a0: fffffe0fe8b8f9d0 fffffc00080e58e4 fffffe0fe8b8fa80 0000000000000000
[ 2.483540] f9c0: fffffe0fe8d10000 0000000000000040 fffffe0fe8b8fa50 fffffc00080e5ac4
[ 2.491465] f9e0: ffffff0ffffada80 fffffc0008be4c24 0000000000000000 0000000000000000
[ 2.499387] fa00: 0000000000000000 ffffffffffffffff 0000000000000000 0000000000000040
[ 2.507309] fa20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 2.515233] fa40: 0000000000000000 0000000000000000 0000000000000000 0000000000000018
[ 2.523156] fa60: 0000000000000000 0000000000000000
[ 2.528089] [<fffffc00080e7468>] try_to_wake_up+0x194/0x34c
[ 2.533723] [<fffffc00080e7648>] wake_up_process+0x28/0x34
[ 2.539275] [<fffffc00080d3764>] create_worker+0x110/0x19c
[ 2.544824] [<fffffc00080d69dc>] alloc_unbound_pwq+0x3cc/0x4b0
[ 2.550724] [<fffffc00080d6bcc>] wq_update_unbound_numa+0x10c/0x1e4
[ 2.557066] [<fffffc00080d7d78>] workqueue_online_cpu+0x220/0x28c
[ 2.563234] [<fffffc00080bd288>] cpuhp_invoke_callback+0x6c/0x168
[ 2.569398] [<fffffc00080bdf74>] cpuhp_up_callbacks+0x44/0xe4
[ 2.575210] [<fffffc00080be194>] cpuhp_thread_fun+0x13c/0x148
[ 2.581027] [<fffffc00080dfbac>] smpboot_thread_fn+0x19c/0x1a8
[ 2.586929] [<fffffc00080dbd64>] kthread+0xdc/0xf0
[ 2.591776] [<fffffc0008083380>] ret_from_fork+0x10/0x50
[ 2.597147] Code: b00057e1 91304021 91005021 b8626822 (b8606821)
[ 2.603464] ---[ end trace 58c0cd36b88802bc ]---
[ 2.608138] Kernel panic - not syncing: Fatal exception
Fix by moving call to numa_store_cpu_info() for all CPUs into
smp_prepare_cpus(), which happens before wq_numa_init(). Since
smp_store_cpu_info() now contains only a single function call,
simplify by removing the function and out-lining its contents.
Suggested-by: Robert Richter <rric@kernel.org>
Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.")
Cc: <stable@vger.kernel.org> # 4.7.x-
Signed-off-by: David Daney <david.daney@cavium.com>
Reviewed-by: Robert Richter <rrichter@cavium.com>
Tested-by: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-09-20 18:46:35 +00:00
|
|
|
numa_store_cpu_info(cpu);
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-20 06:36:00 +00:00
|
|
|
static const char *ipi_types[MAX_IPI] __tracepoint_string = {
|
2020-11-09 11:38:36 +00:00
|
|
|
[IPI_RESCHEDULE] = "Rescheduling interrupts",
|
|
|
|
[IPI_CALL_FUNC] = "Function call interrupts",
|
|
|
|
[IPI_CPU_STOP] = "CPU stop interrupts",
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
[IPI_CPU_STOP_NMI] = "CPU stop NMIs",
|
2020-11-09 11:38:36 +00:00
|
|
|
[IPI_TIMER] = "Timer broadcast interrupts",
|
|
|
|
[IPI_IRQ_WORK] = "IRQ work interrupts",
|
2024-06-20 06:36:00 +00:00
|
|
|
[IPI_CPU_BACKTRACE] = "CPU backtrace interrupts",
|
|
|
|
[IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts",
|
2012-03-05 11:49:30 +00:00
|
|
|
};
|
|
|
|
|
2020-05-09 13:00:23 +00:00
|
|
|
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
|
2014-07-25 20:05:32 +00:00
|
|
|
|
2020-06-20 16:19:00 +00:00
|
|
|
unsigned long irq_err_count;
|
|
|
|
|
|
|
|
int arch_show_interrupts(struct seq_file *p, int prec)
|
2012-03-05 11:49:30 +00:00
|
|
|
{
|
|
|
|
unsigned int cpu, i;
|
|
|
|
|
2024-06-20 06:36:00 +00:00
|
|
|
for (i = 0; i < MAX_IPI; i++) {
|
2014-07-25 20:05:32 +00:00
|
|
|
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
|
2012-03-05 11:49:30 +00:00
|
|
|
prec >= 4 ? " " : "");
|
2013-11-07 15:25:44 +00:00
|
|
|
for_each_online_cpu(cpu)
|
2020-12-10 19:25:46 +00:00
|
|
|
seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
|
2012-03-05 11:49:30 +00:00
|
|
|
seq_printf(p, " %s\n", ipi_types[i]);
|
|
|
|
}
|
|
|
|
|
2020-06-20 16:19:00 +00:00
|
|
|
seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
|
|
|
|
return 0;
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
2014-07-25 20:05:32 +00:00
|
|
|
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
|
|
|
|
{
|
|
|
|
smp_cross_call(mask, IPI_CALL_FUNC);
|
|
|
|
}
|
|
|
|
|
|
|
|
void arch_send_call_function_single_ipi(int cpu)
|
|
|
|
{
|
2015-01-23 05:36:42 +00:00
|
|
|
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
|
2014-07-25 20:05:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_IRQ_WORK
|
|
|
|
void arch_irq_work_raise(void)
|
|
|
|
{
|
2020-05-09 13:00:23 +00:00
|
|
|
smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
|
2014-07-25 20:05:32 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
static void __noreturn local_cpu_stop(unsigned int cpu)
|
2012-03-05 11:49:30 +00:00
|
|
|
{
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
set_cpu_online(cpu, false);
|
2012-03-05 11:49:30 +00:00
|
|
|
|
2017-11-02 12:12:34 +00:00
|
|
|
local_daif_mask();
|
arm64: kernel: Add arch-specific SDEI entry code and CPU masking
The Software Delegated Exception Interface (SDEI) is an ARM standard
for registering callbacks from the platform firmware into the OS.
This is typically used to implement RAS notifications.
Such notifications enter the kernel at the registered entry-point
with the register values of the interrupted CPU context. Because this
is not a CPU exception, it cannot reuse the existing entry code.
(crucially we don't implicitly know which exception level we interrupted),
Add the entry point to entry.S to set us up for calling into C code. If
the event interrupted code that had interrupts masked, we always return
to that location. Otherwise we pretend this was an IRQ, and use SDEI's
complete_and_resume call to return to vbar_el1 + offset.
This allows the kernel to deliver signals to user space processes. For
KVM this triggers the world switch, a quick spin round vcpu_run, then
back into the guest, unless there are pending signals.
Add sdei_mask_local_cpu() calls to the smp_send_stop() code, this covers
the panic() code-path, which doesn't invoke cpuhotplug notifiers.
Because we can interrupt entry-from/exit-to another EL, we can't trust the
value in sp_el0 or x29, even if we interrupted the kernel, in this case
the code in entry.S will save/restore sp_el0 and use the value in
__entry_task.
When we have VMAP stacks we can interrupt the stack-overflow test, which
stirs x0 into sp, meaning we have to have our own VMAP stacks. For now
these are allocated when we probe the interface. Future patches will add
refcounting hooks to allow the arch code to allocate them lazily.
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-01-08 15:38:12 +00:00
|
|
|
sdei_mask_local_cpu();
|
2019-06-17 20:35:18 +00:00
|
|
|
cpu_park_loop();
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
2019-06-17 20:35:19 +00:00
|
|
|
/*
|
|
|
|
* We need to implement panic_smp_self_stop() for parallel panic() calls, so
|
|
|
|
* that cpu_online_mask gets correctly updated and smp_send_stop() can skip
|
|
|
|
* CPUs that have already stopped themselves.
|
|
|
|
*/
|
2023-04-12 23:49:35 +00:00
|
|
|
void __noreturn panic_smp_self_stop(void)
|
2019-06-17 20:35:19 +00:00
|
|
|
{
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
local_cpu_stop(smp_processor_id());
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
2023-04-12 23:49:34 +00:00
|
|
|
static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
|
2017-04-03 02:24:36 +00:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
/*
|
|
|
|
* Use local_daif_mask() instead of local_irq_disable() to make sure
|
|
|
|
* that pseudo-NMIs are disabled. The "crash stop" code starts with
|
|
|
|
* an IRQ and falls back to NMI (which might be pseudo). If the IRQ
|
|
|
|
* finally goes through right as we're timing out then the NMI could
|
|
|
|
* interrupt us. It's better to prevent the NMI and let the IRQ
|
|
|
|
* finish since the pt_regs will be better.
|
|
|
|
*/
|
|
|
|
local_daif_mask();
|
|
|
|
|
2017-04-03 02:24:36 +00:00
|
|
|
crash_save_cpu(regs, cpu);
|
|
|
|
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
set_cpu_online(cpu, false);
|
2017-04-03 02:24:36 +00:00
|
|
|
|
arm64: kernel: Add arch-specific SDEI entry code and CPU masking
The Software Delegated Exception Interface (SDEI) is an ARM standard
for registering callbacks from the platform firmware into the OS.
This is typically used to implement RAS notifications.
Such notifications enter the kernel at the registered entry-point
with the register values of the interrupted CPU context. Because this
is not a CPU exception, it cannot reuse the existing entry code.
(crucially we don't implicitly know which exception level we interrupted),
Add the entry point to entry.S to set us up for calling into C code. If
the event interrupted code that had interrupts masked, we always return
to that location. Otherwise we pretend this was an IRQ, and use SDEI's
complete_and_resume call to return to vbar_el1 + offset.
This allows the kernel to deliver signals to user space processes. For
KVM this triggers the world switch, a quick spin round vcpu_run, then
back into the guest, unless there are pending signals.
Add sdei_mask_local_cpu() calls to the smp_send_stop() code, this covers
the panic() code-path, which doesn't invoke cpuhotplug notifiers.
Because we can interrupt entry-from/exit-to another EL, we can't trust the
value in sp_el0 or x29, even if we interrupted the kernel, in this case
the code in entry.S will save/restore sp_el0 and use the value in
__entry_task.
When we have VMAP stacks we can interrupt the stack-overflow test, which
stirs x0 into sp, meaning we have to have our own VMAP stacks. For now
these are allocated when we probe the interface. Future patches will add
refcounting hooks to allow the arch code to allocate them lazily.
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-01-08 15:38:12 +00:00
|
|
|
sdei_mask_local_cpu();
|
2017-04-03 02:24:36 +00:00
|
|
|
|
2020-03-18 23:01:44 +00:00
|
|
|
if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
|
|
|
|
__cpu_try_die(cpu);
|
2017-04-03 02:24:36 +00:00
|
|
|
|
|
|
|
/* just in case */
|
|
|
|
cpu_park_loop();
|
2023-04-12 23:49:34 +00:00
|
|
|
#else
|
|
|
|
BUG();
|
2017-04-03 02:24:36 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2023-09-06 16:02:59 +00:00
|
|
|
static void arm64_backtrace_ipi(cpumask_t *mask)
|
|
|
|
{
|
|
|
|
__ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
|
|
|
|
}
|
|
|
|
|
|
|
|
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name,
|
|
|
|
* nothing about it truly needs to be implemented using an NMI, it's
|
|
|
|
* just that it's _allowed_ to work with NMIs. If ipi_should_be_nmi()
|
|
|
|
* returned false our backtrace attempt will just use a regular IPI.
|
|
|
|
*/
|
|
|
|
nmi_trigger_cpumask_backtrace(mask, exclude_cpu, arm64_backtrace_ipi);
|
|
|
|
}
|
|
|
|
|
2023-09-06 16:03:01 +00:00
|
|
|
#ifdef CONFIG_KGDB
|
|
|
|
void kgdb_roundup_cpus(void)
|
|
|
|
{
|
|
|
|
int this_cpu = raw_smp_processor_id();
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
for_each_online_cpu(cpu) {
|
|
|
|
/* No need to roundup ourselves */
|
|
|
|
if (cpu == this_cpu)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
__ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
/*
|
|
|
|
* Main handler for inter-processor interrupts
|
|
|
|
*/
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
static void do_handle_IPI(int ipinr)
|
2012-03-05 11:49:30 +00:00
|
|
|
{
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
|
2020-06-20 16:19:00 +00:00
|
|
|
if ((unsigned)ipinr < NR_IPI)
|
2023-01-12 19:43:38 +00:00
|
|
|
trace_ipi_entry(ipi_types[ipinr]);
|
2012-03-05 11:49:30 +00:00
|
|
|
|
|
|
|
switch (ipinr) {
|
|
|
|
case IPI_RESCHEDULE:
|
|
|
|
scheduler_ipi();
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPI_CALL_FUNC:
|
|
|
|
generic_smp_call_function_interrupt();
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPI_CPU_STOP:
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
case IPI_CPU_STOP_NMI:
|
|
|
|
if (IS_ENABLED(CONFIG_KEXEC_CORE) && crash_stop) {
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
ipi_cpu_crash_stop(cpu, get_irq_regs());
|
2017-04-03 02:24:36 +00:00
|
|
|
unreachable();
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
} else {
|
|
|
|
local_cpu_stop(cpu);
|
2017-04-03 02:24:36 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2013-09-04 09:55:17 +00:00
|
|
|
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
|
|
|
|
case IPI_TIMER:
|
|
|
|
tick_receive_broadcast();
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
|
2014-05-12 15:48:51 +00:00
|
|
|
#ifdef CONFIG_IRQ_WORK
|
|
|
|
case IPI_IRQ_WORK:
|
|
|
|
irq_work_run();
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
|
2023-09-06 16:02:59 +00:00
|
|
|
case IPI_CPU_BACKTRACE:
|
|
|
|
/*
|
|
|
|
* NOTE: in some cases this _won't_ be NMI context. See the
|
|
|
|
* comment in arch_trigger_cpumask_backtrace().
|
|
|
|
*/
|
|
|
|
nmi_cpu_backtrace(get_irq_regs());
|
|
|
|
break;
|
|
|
|
|
2023-09-06 16:03:01 +00:00
|
|
|
case IPI_KGDB_ROUNDUP:
|
|
|
|
kgdb_nmicallback(cpu, get_irq_regs());
|
2016-01-26 11:10:38 +00:00
|
|
|
break;
|
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
default:
|
|
|
|
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
|
|
|
|
break;
|
|
|
|
}
|
2014-07-25 20:05:32 +00:00
|
|
|
|
|
|
|
if ((unsigned)ipinr < NR_IPI)
|
2023-01-12 19:43:38 +00:00
|
|
|
trace_ipi_exit(ipi_types[ipinr]);
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static irqreturn_t ipi_handler(int irq, void *data)
|
|
|
|
{
|
|
|
|
do_handle_IPI(irq - ipi_irq_base);
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2020-05-09 13:00:23 +00:00
|
|
|
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
{
|
2020-05-09 13:00:23 +00:00
|
|
|
trace_ipi_raise(target, ipi_types[ipinr]);
|
|
|
|
__ipi_send_mask(ipi_desc[ipinr], target);
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
}
|
|
|
|
|
2023-09-06 16:02:59 +00:00
|
|
|
static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
|
|
|
|
{
|
2023-11-07 15:26:57 +00:00
|
|
|
if (!system_uses_irq_prio_masking())
|
2023-09-06 16:02:59 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
switch (ipi) {
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
case IPI_CPU_STOP_NMI:
|
2023-09-06 16:02:59 +00:00
|
|
|
case IPI_CPU_BACKTRACE:
|
2023-09-06 16:03:01 +00:00
|
|
|
case IPI_KGDB_ROUNDUP:
|
2023-09-06 16:02:59 +00:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
static void ipi_setup(int cpu)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2020-05-09 13:00:23 +00:00
|
|
|
if (WARN_ON_ONCE(!ipi_irq_base))
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
return;
|
|
|
|
|
2023-09-06 16:02:59 +00:00
|
|
|
for (i = 0; i < nr_ipi; i++) {
|
|
|
|
if (ipi_should_be_nmi(i)) {
|
|
|
|
prepare_percpu_nmi(ipi_irq_base + i);
|
|
|
|
enable_percpu_nmi(ipi_irq_base + i, 0);
|
|
|
|
} else {
|
|
|
|
enable_percpu_irq(ipi_irq_base + i, 0);
|
|
|
|
}
|
|
|
|
}
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
}
|
|
|
|
|
2020-09-18 12:33:18 +00:00
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
static void ipi_teardown(int cpu)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2020-05-09 13:00:23 +00:00
|
|
|
if (WARN_ON_ONCE(!ipi_irq_base))
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
return;
|
|
|
|
|
2023-09-06 16:02:59 +00:00
|
|
|
for (i = 0; i < nr_ipi; i++) {
|
|
|
|
if (ipi_should_be_nmi(i)) {
|
|
|
|
disable_percpu_nmi(ipi_irq_base + i);
|
|
|
|
teardown_percpu_nmi(ipi_irq_base + i);
|
|
|
|
} else {
|
|
|
|
disable_percpu_irq(ipi_irq_base + i);
|
|
|
|
}
|
|
|
|
}
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
}
|
2020-09-18 12:33:18 +00:00
|
|
|
#endif
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
|
|
|
|
void __init set_smp_ipi_range(int ipi_base, int n)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2023-09-06 16:02:59 +00:00
|
|
|
WARN_ON(n < MAX_IPI);
|
|
|
|
nr_ipi = min(n, MAX_IPI);
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
|
|
|
|
for (i = 0; i < nr_ipi; i++) {
|
|
|
|
int err;
|
|
|
|
|
2023-09-06 16:02:59 +00:00
|
|
|
if (ipi_should_be_nmi(i)) {
|
|
|
|
err = request_percpu_nmi(ipi_base + i, ipi_handler,
|
arm64: implement raw_smp_processor_id() using thread_info
Historically, arm64 implemented raw_smp_processor_id() as a read of
current_thread_info()->cpu. This changed when arm64 moved thread_info into
task struct, as at the time CONFIG_THREAD_INFO_IN_TASK made core code use
thread_struct::cpu for the cpu number, and due to header dependencies
prevented using this in raw_smp_processor_id(). As a workaround, we moved to
using a percpu variable in commit:
57c82954e77fa12c ("arm64: make cpu number a percpu variable")
Since then, thread_info::cpu was reintroduced, and core code was made to use
this in commits:
001430c1910df65a ("arm64: add CPU field to struct thread_info")
bcf9033e5449bdca ("sched: move CPU field back into thread_info if THREAD_INFO_IN_TASK=y")
Consequently it is possible to use current_thread_info()->cpu again.
This decreases the number of emitted instructions like in the following
example:
Dump of assembler code for function bpf_get_smp_processor_id:
0xffff8000802cd608 <+0>: nop
0xffff8000802cd60c <+4>: nop
0xffff8000802cd610 <+8>: adrp x0, 0xffff800082138000
0xffff8000802cd614 <+12>: mrs x1, tpidr_el1
0xffff8000802cd618 <+16>: add x0, x0, #0x8
0xffff8000802cd61c <+20>: ldrsw x0, [x0, x1]
0xffff8000802cd620 <+24>: ret
After this patch:
Dump of assembler code for function bpf_get_smp_processor_id:
0xffff8000802c9130 <+0>: nop
0xffff8000802c9134 <+4>: nop
0xffff8000802c9138 <+8>: mrs x0, sp_el0
0xffff8000802c913c <+12>: ldr w0, [x0, #24]
0xffff8000802c9140 <+16>: ret
A microbenchmark[1] was built to measure the performance improvement
provided by this change. It calls the following function given number of
times and finds the runtime overhead:
static noinline int get_cpu_id(void)
{
return smp_processor_id();
}
Run the benchmark like:
modprobe smp_processor_id nr_function_calls=1000000000
+--------------------------+------------------------+
| | Number of Calls | Time taken |
+--------+-----------------+------------------------+
| Before | 1000000000 | 1602888401ns |
+--------+-----------------+------------------------+
| After | 1000000000 | 1206212658ns |
+--------+-----------------+------------------------+
| Difference (decrease) | 396675743ns (24.74%) |
+---------------------------------------------------+
Remove the percpu variable cpu_number as it is used only in
set_smp_ipi_range() as a dummy variable to be passed to ipi_handler().
Use irq_stat in place of cpu_number here like arm32.
[1] https://github.com/puranjaymohan/linux/commit/77d3fdd
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20240503171847.68267-2-puranjay@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-05-03 17:18:47 +00:00
|
|
|
"IPI", &irq_stat);
|
2023-09-06 16:02:59 +00:00
|
|
|
WARN(err, "Could not request IPI %d as NMI, err=%d\n",
|
|
|
|
i, err);
|
|
|
|
} else {
|
|
|
|
err = request_percpu_irq(ipi_base + i, ipi_handler,
|
arm64: implement raw_smp_processor_id() using thread_info
Historically, arm64 implemented raw_smp_processor_id() as a read of
current_thread_info()->cpu. This changed when arm64 moved thread_info into
task struct, as at the time CONFIG_THREAD_INFO_IN_TASK made core code use
thread_struct::cpu for the cpu number, and due to header dependencies
prevented using this in raw_smp_processor_id(). As a workaround, we moved to
using a percpu variable in commit:
57c82954e77fa12c ("arm64: make cpu number a percpu variable")
Since then, thread_info::cpu was reintroduced, and core code was made to use
this in commits:
001430c1910df65a ("arm64: add CPU field to struct thread_info")
bcf9033e5449bdca ("sched: move CPU field back into thread_info if THREAD_INFO_IN_TASK=y")
Consequently it is possible to use current_thread_info()->cpu again.
This decreases the number of emitted instructions like in the following
example:
Dump of assembler code for function bpf_get_smp_processor_id:
0xffff8000802cd608 <+0>: nop
0xffff8000802cd60c <+4>: nop
0xffff8000802cd610 <+8>: adrp x0, 0xffff800082138000
0xffff8000802cd614 <+12>: mrs x1, tpidr_el1
0xffff8000802cd618 <+16>: add x0, x0, #0x8
0xffff8000802cd61c <+20>: ldrsw x0, [x0, x1]
0xffff8000802cd620 <+24>: ret
After this patch:
Dump of assembler code for function bpf_get_smp_processor_id:
0xffff8000802c9130 <+0>: nop
0xffff8000802c9134 <+4>: nop
0xffff8000802c9138 <+8>: mrs x0, sp_el0
0xffff8000802c913c <+12>: ldr w0, [x0, #24]
0xffff8000802c9140 <+16>: ret
A microbenchmark[1] was built to measure the performance improvement
provided by this change. It calls the following function given number of
times and finds the runtime overhead:
static noinline int get_cpu_id(void)
{
return smp_processor_id();
}
Run the benchmark like:
modprobe smp_processor_id nr_function_calls=1000000000
+--------------------------+------------------------+
| | Number of Calls | Time taken |
+--------+-----------------+------------------------+
| Before | 1000000000 | 1602888401ns |
+--------+-----------------+------------------------+
| After | 1000000000 | 1206212658ns |
+--------+-----------------+------------------------+
| Difference (decrease) | 396675743ns (24.74%) |
+---------------------------------------------------+
Remove the percpu variable cpu_number as it is used only in
set_smp_ipi_range() as a dummy variable to be passed to ipi_handler().
Use irq_stat in place of cpu_number here like arm32.
[1] https://github.com/puranjaymohan/linux/commit/77d3fdd
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20240503171847.68267-2-puranjay@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2024-05-03 17:18:47 +00:00
|
|
|
"IPI", &irq_stat);
|
2023-09-06 16:02:59 +00:00
|
|
|
WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
|
|
|
|
i, err);
|
|
|
|
}
|
arm64: Allow IPIs to be handled as normal interrupts
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.
set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.
This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.
On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.
One of the major difference is that we end up, in some cases
(such as when performing IRQ time accounting on the scheduler
IPI), end up with nested irq_enter()/irq_exit() pairs.
Other than the (relatively small) overhead, there should be
no consequences to it (these pairs are designed to nest
correctly, and the accounting shouldn't be off).
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
2020-04-25 14:03:47 +00:00
|
|
|
|
|
|
|
ipi_desc[i] = irq_to_desc(ipi_base + i);
|
|
|
|
irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
ipi_irq_base = ipi_base;
|
|
|
|
|
|
|
|
/* Setup the boot CPU immediately */
|
|
|
|
ipi_setup(smp_processor_id());
|
|
|
|
}
|
|
|
|
|
2023-03-07 14:35:56 +00:00
|
|
|
void arch_smp_send_reschedule(int cpu)
|
2012-03-05 11:49:30 +00:00
|
|
|
{
|
|
|
|
smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
|
|
|
|
}
|
|
|
|
|
2023-09-06 16:02:58 +00:00
|
|
|
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
|
|
|
|
void arch_send_wakeup_ipi(unsigned int cpu)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We use a scheduler IPI to wake the CPU as this avoids the need for a
|
|
|
|
* dedicated IPI and we can safely handle spurious scheduler IPIs.
|
|
|
|
*/
|
2023-10-02 16:45:30 +00:00
|
|
|
smp_send_reschedule(cpu);
|
2023-09-06 16:02:58 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-09-04 09:55:17 +00:00
|
|
|
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
|
|
|
|
void tick_broadcast(const struct cpumask *mask)
|
|
|
|
{
|
|
|
|
smp_cross_call(mask, IPI_TIMER);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2020-03-11 17:12:44 +00:00
|
|
|
/*
|
|
|
|
* The number of CPUs online, not counting this CPU (which may not be
|
|
|
|
* fully online and so not counted in num_online_cpus()).
|
|
|
|
*/
|
|
|
|
static inline unsigned int num_other_online_cpus(void)
|
|
|
|
{
|
|
|
|
unsigned int this_cpu_online = cpu_online(smp_processor_id());
|
|
|
|
|
|
|
|
return num_online_cpus() - this_cpu_online;
|
|
|
|
}
|
|
|
|
|
2012-03-05 11:49:30 +00:00
|
|
|
void smp_send_stop(void)
|
|
|
|
{
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
static unsigned long stop_in_progress;
|
|
|
|
cpumask_t mask;
|
2012-03-05 11:49:30 +00:00
|
|
|
unsigned long timeout;
|
|
|
|
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
/*
|
|
|
|
* If this cpu is the only one alive at this point in time, online or
|
|
|
|
* not, there are no stop messages to be sent around, so just back out.
|
|
|
|
*/
|
|
|
|
if (num_other_online_cpus() == 0)
|
|
|
|
goto skip_ipi;
|
2012-03-05 11:49:30 +00:00
|
|
|
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
/* Only proceed if this is the first CPU to reach this code */
|
|
|
|
if (test_and_set_bit(0, &stop_in_progress))
|
|
|
|
return;
|
2012-03-05 11:49:30 +00:00
|
|
|
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
/*
|
|
|
|
* Send an IPI to all currently online CPUs except the CPU running
|
|
|
|
* this code.
|
|
|
|
*
|
|
|
|
* NOTE: we don't do anything here to prevent other CPUs from coming
|
|
|
|
* online after we snapshot `cpu_online_mask`. Ideally, the calling code
|
|
|
|
* should do something to prevent other CPUs from coming up. This code
|
|
|
|
* can be called in the panic path and thus it doesn't seem wise to
|
|
|
|
* grab the CPU hotplug mutex ourselves. Worst case:
|
|
|
|
* - If a CPU comes online as we're running, we'll likely notice it
|
|
|
|
* during the 1 second wait below and then we'll catch it when we try
|
|
|
|
* with an NMI (assuming NMIs are enabled) since we re-snapshot the
|
|
|
|
* mask before sending an NMI.
|
|
|
|
* - If we leave the function and see that CPUs are still online we'll
|
|
|
|
* at least print a warning. Especially without NMIs this function
|
|
|
|
* isn't foolproof anyway so calling code will just have to accept
|
|
|
|
* the fact that there could be cases where a CPU can't be stopped.
|
|
|
|
*/
|
|
|
|
cpumask_copy(&mask, cpu_online_mask);
|
|
|
|
cpumask_clear_cpu(smp_processor_id(), &mask);
|
2012-03-05 11:49:30 +00:00
|
|
|
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
if (system_state <= SYSTEM_RUNNING)
|
|
|
|
pr_crit("SMP: stopping secondary CPUs\n");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Start with a normal IPI and wait up to one second for other CPUs to
|
|
|
|
* stop. We do this first because it gives other processors a chance
|
|
|
|
* to exit critical sections / drop locks and makes the rest of the
|
|
|
|
* stop process (especially console flush) more robust.
|
|
|
|
*/
|
|
|
|
smp_cross_call(&mask, IPI_CPU_STOP);
|
2012-03-05 11:49:30 +00:00
|
|
|
timeout = USEC_PER_SEC;
|
2020-03-11 17:12:44 +00:00
|
|
|
while (num_other_online_cpus() && timeout--)
|
2012-03-05 11:49:30 +00:00
|
|
|
udelay(1);
|
|
|
|
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
/*
|
|
|
|
* If CPUs are still online, try an NMI. There's no excuse for this to
|
|
|
|
* be slow, so we only give them an extra 10 ms to respond.
|
|
|
|
*/
|
|
|
|
if (num_other_online_cpus() && ipi_should_be_nmi(IPI_CPU_STOP_NMI)) {
|
|
|
|
smp_rmb();
|
|
|
|
cpumask_copy(&mask, cpu_online_mask);
|
|
|
|
cpumask_clear_cpu(smp_processor_id(), &mask);
|
|
|
|
|
|
|
|
pr_info("SMP: retry stop with NMI for CPUs %*pbl\n",
|
|
|
|
cpumask_pr_args(&mask));
|
|
|
|
|
|
|
|
smp_cross_call(&mask, IPI_CPU_STOP_NMI);
|
|
|
|
timeout = USEC_PER_MSEC * 10;
|
|
|
|
while (num_other_online_cpus() && timeout--)
|
|
|
|
udelay(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (num_other_online_cpus()) {
|
|
|
|
smp_rmb();
|
|
|
|
cpumask_copy(&mask, cpu_online_mask);
|
|
|
|
cpumask_clear_cpu(smp_processor_id(), &mask);
|
|
|
|
|
2019-10-18 03:18:19 +00:00
|
|
|
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
cpumask_pr_args(&mask));
|
|
|
|
}
|
arm64: kernel: Add arch-specific SDEI entry code and CPU masking
The Software Delegated Exception Interface (SDEI) is an ARM standard
for registering callbacks from the platform firmware into the OS.
This is typically used to implement RAS notifications.
Such notifications enter the kernel at the registered entry-point
with the register values of the interrupted CPU context. Because this
is not a CPU exception, it cannot reuse the existing entry code.
(crucially we don't implicitly know which exception level we interrupted),
Add the entry point to entry.S to set us up for calling into C code. If
the event interrupted code that had interrupts masked, we always return
to that location. Otherwise we pretend this was an IRQ, and use SDEI's
complete_and_resume call to return to vbar_el1 + offset.
This allows the kernel to deliver signals to user space processes. For
KVM this triggers the world switch, a quick spin round vcpu_run, then
back into the guest, unless there are pending signals.
Add sdei_mask_local_cpu() calls to the smp_send_stop() code, this covers
the panic() code-path, which doesn't invoke cpuhotplug notifiers.
Because we can interrupt entry-from/exit-to another EL, we can't trust the
value in sp_el0 or x29, even if we interrupted the kernel, in this case
the code in entry.S will save/restore sp_el0 and use the value in
__entry_task.
When we have VMAP stacks we can interrupt the stack-overflow test, which
stirs x0 into sp, meaning we have to have our own VMAP stacks. For now
these are allocated when we probe the interface. Future patches will add
refcounting hooks to allow the arch code to allocate them lazily.
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-01-08 15:38:12 +00:00
|
|
|
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
skip_ipi:
|
arm64: kernel: Add arch-specific SDEI entry code and CPU masking
The Software Delegated Exception Interface (SDEI) is an ARM standard
for registering callbacks from the platform firmware into the OS.
This is typically used to implement RAS notifications.
Such notifications enter the kernel at the registered entry-point
with the register values of the interrupted CPU context. Because this
is not a CPU exception, it cannot reuse the existing entry code.
(crucially we don't implicitly know which exception level we interrupted),
Add the entry point to entry.S to set us up for calling into C code. If
the event interrupted code that had interrupts masked, we always return
to that location. Otherwise we pretend this was an IRQ, and use SDEI's
complete_and_resume call to return to vbar_el1 + offset.
This allows the kernel to deliver signals to user space processes. For
KVM this triggers the world switch, a quick spin round vcpu_run, then
back into the guest, unless there are pending signals.
Add sdei_mask_local_cpu() calls to the smp_send_stop() code, this covers
the panic() code-path, which doesn't invoke cpuhotplug notifiers.
Because we can interrupt entry-from/exit-to another EL, we can't trust the
value in sp_el0 or x29, even if we interrupted the kernel, in this case
the code in entry.S will save/restore sp_el0 and use the value in
__entry_task.
When we have VMAP stacks we can interrupt the stack-overflow test, which
stirs x0 into sp, meaning we have to have our own VMAP stacks. For now
these are allocated when we probe the interface. Future patches will add
refcounting hooks to allow the arch code to allocate them lazily.
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2018-01-08 15:38:12 +00:00
|
|
|
sdei_mask_local_cpu();
|
2012-03-05 11:49:30 +00:00
|
|
|
}
|
|
|
|
|
2017-04-03 02:24:36 +00:00
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
2017-08-17 02:24:27 +00:00
|
|
|
void crash_smp_send_stop(void)
|
2017-04-03 02:24:36 +00:00
|
|
|
{
|
2017-08-17 02:24:27 +00:00
|
|
|
/*
|
|
|
|
* This function can be called twice in panic path, but obviously
|
|
|
|
* we execute this only once.
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
*
|
|
|
|
* We use this same boolean to tell whether the IPI we send was a
|
|
|
|
* stop or a "crash stop".
|
2017-08-17 02:24:27 +00:00
|
|
|
*/
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
if (crash_stop)
|
2017-08-17 02:24:27 +00:00
|
|
|
return;
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
crash_stop = 1;
|
2017-08-17 02:24:27 +00:00
|
|
|
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
smp_send_stop();
|
2017-08-17 02:24:27 +00:00
|
|
|
|
2023-06-27 00:29:39 +00:00
|
|
|
sdei_handler_abort();
|
2017-04-03 02:24:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool smp_crash_stop_failed(void)
|
|
|
|
{
|
arm64: smp: smp_send_stop() and crash_smp_send_stop() should try non-NMI first
When testing hard lockup handling on my sc7180-trogdor-lazor device
with pseudo-NMI enabled, with serial console enabled and with kgdb
disabled, I found that the stack crawls printed to the serial console
ended up as a jumbled mess. After rebooting, the pstore-based console
looked fine though. Also, enabling kgdb to trap the panic made the
console look fine and avoided the mess.
After a bit of tracking down, I came to the conclusion that this was
what was happening:
1. The panic path was stopping all other CPUs with
panic_other_cpus_shutdown().
2. At least one of those other CPUs was in the middle of printing to
the serial console and holding the console port's lock, which is
grabbed with "irqsave". ...but since we were stopping with an NMI
we didn't care about the "irqsave" and interrupted anyway.
3. Since we stopped the CPU while it was holding the lock it would
never release it.
4. All future calls to output to the console would end up failing to
get the lock in qcom_geni_serial_console_write(). This isn't
_totally_ unexpected at panic time but it's a code path that's not
well tested, hard to get right, and apparently doesn't work
terribly well on the Qualcomm geni serial driver.
The Qualcomm geni serial driver was fixed to be a bit better in commit
9e957a155005 ("serial: qcom-geni: Don't cancel/abort if we can't get
the port lock") but it's nice not to get into this situation in the
first place.
Taking a page from what x86 appears to do in native_stop_other_cpus(),
do this:
1. First, try to stop other CPUs with a normal IPI and wait a second.
This gives them a chance to leave critical sections.
2. If CPUs fail to stop then retry with an NMI, but give a much lower
timeout since there's no good reason for a CPU not to react quickly
to a NMI.
This works well and avoids the corrupted console and (presumably)
could help avoid other similar issues.
In order to do this, we need to do a little re-organization of our
IPIs since we don't have any more free IDs. Do what was suggested in
previous conversations and combine "stop" and "crash stop". That frees
up an IPI so now we can have a "stop" and "stop NMI".
In order to do this we also need a slight change in the way we keep
track of which CPUs still need to be stopped. We need to know
specifically which CPUs haven't stopped yet when we fall back to NMI
but in the "crash stop" case the "cpu_online_mask" isn't updated as
CPUs go down. This is why that code path had an atomic of the number
of CPUs left. Solve this by also updating the "cpu_online_mask" for
crash stops.
All of the above lets us combine the logic for "stop" and "crash stop"
code, which appeared to have a bunch of arbitrary implementation
differences.
Aside from the above change where we try a normal IPI and then an NMI,
the combined function has a few subtle differences:
* In the normal smp_send_stop(), if we fail to stop one or more CPUs
then we won't include the current CPU (the one running
smp_send_stop()) in the error message.
* In crash_smp_send_stop(), if we fail to stop some CPUs we'll print
the CPUs that we failed to stop instead of printing all _but_ the
current running CPU.
* In crash_smp_send_stop(), we will now only print "SMP: stopping
secondary CPUs" if (system_state <= SYSTEM_RUNNING).
Fixes: d7402513c935 ("arm64: smp: IPI_CPU_STOP and IPI_CPU_CRASH_STOP should try for NMI")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240821145353.v3.1.Id4817adef610302554b8aa42b090d57270dc119c@changeid
Signed-off-by: Will Deacon <will@kernel.org>
2024-08-21 21:53:57 +00:00
|
|
|
return num_other_online_cpus() != 0;
|
2017-04-03 02:24:36 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2016-06-22 09:06:12 +00:00
|
|
|
static bool have_cpu_die(void)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
int any_cpu = raw_smp_processor_id();
|
2020-03-18 23:01:44 +00:00
|
|
|
const struct cpu_operations *ops = get_cpu_ops(any_cpu);
|
2016-06-22 09:06:12 +00:00
|
|
|
|
2020-03-18 23:01:44 +00:00
|
|
|
if (ops && ops->cpu_die)
|
2016-06-22 09:06:12 +00:00
|
|
|
return true;
|
|
|
|
#endif
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool cpus_are_stuck_in_kernel(void)
|
|
|
|
{
|
|
|
|
bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
|
|
|
|
|
2021-10-08 13:58:35 +00:00
|
|
|
return !!cpus_stuck_in_kernel || smp_spin_tables ||
|
|
|
|
is_protected_kvm_enabled();
|
2016-06-22 09:06:12 +00:00
|
|
|
}
|