From 6f8b79683dfb37ee0661cf4c13a72f024c29f65c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 Nov 2024 12:42:34 +0200 Subject: [PATCH 1/4] genirq: Move irq_thread_fn() further up in the code In a preparation to reuse irq_thread_fn() move it further up in the code. No functional change intended. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241119104339.2112455-2-andriy.shevchenko@linux.intel.com --- kernel/irq/manage.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index f0803d6bd296..230f4701f18e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1181,14 +1181,29 @@ out_unlock: chip_bus_sync_unlock(desc); } +/* + * Interrupts explicitly requested as threaded interrupts want to be + * preemptible - many of them need to sleep and wait for slow busses to + * complete. + */ +static irqreturn_t irq_thread_fn(struct irq_desc *desc, struct irqaction *action) +{ + irqreturn_t ret = action->thread_fn(action->irq, action->dev_id); + + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); + + irq_finalize_oneshot(desc, action); + return ret; +} + /* * Interrupts which are not explicitly requested as threaded * interrupts rely on the implicit bh/preempt disable of the hard irq * context. So we need to disable bh here to avoid deadlocks and other * side effects. */ -static irqreturn_t -irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) +static irqreturn_t irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) { irqreturn_t ret; @@ -1206,24 +1221,6 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) return ret; } -/* - * Interrupts explicitly requested as threaded interrupts want to be - * preemptible - many of them need to sleep and wait for slow busses to - * complete. - */ -static irqreturn_t irq_thread_fn(struct irq_desc *desc, - struct irqaction *action) -{ - irqreturn_t ret; - - ret = action->thread_fn(action->irq, action->dev_id); - if (ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); - - irq_finalize_oneshot(desc, action); - return ret; -} - void wake_threads_waitq(struct irq_desc *desc) { if (atomic_dec_and_test(&desc->threads_active)) From 429f49ad361cd999ca221d8b562ae2552b7c3e2c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 Nov 2024 12:42:35 +0200 Subject: [PATCH 2/4] genirq: Reuse irq_thread_fn() for forced thread case rq_forced_thread_fn() uses the same action callback as the non-forced variant but with different locking decorations. Reuse irq_thread_fn() here to make that clear. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241119104339.2112455-3-andriy.shevchenko@linux.intel.com --- kernel/irq/manage.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 230f4701f18e..f300bb6be3bd 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1210,11 +1210,7 @@ static irqreturn_t irq_forced_thread_fn(struct irq_desc *desc, struct irqaction local_bh_disable(); if (!IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_disable(); - ret = action->thread_fn(action->irq, action->dev_id); - if (ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); - - irq_finalize_oneshot(desc, action); + ret = irq_thread_fn(desc, action); if (!IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_enable(); local_bh_enable(); From bad6722e478f5b17a5ceb039dfb4c680cf2c0b48 Mon Sep 17 00:00:00 2001 From: Eliav Farber Date: Wed, 4 Dec 2024 14:20:02 +0000 Subject: [PATCH 3/4] kexec: Consolidate machine_kexec_mask_interrupts() implementation Consolidate the machine_kexec_mask_interrupts implementation into a common function located in a new file: kernel/irq/kexec.c. This removes duplicate implementations from architecture-specific files in arch/arm, arch/arm64, arch/powerpc, and arch/riscv, reducing code duplication and improving maintainability. The new implementation retains architecture-specific behavior for CONFIG_GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD, which was previously implemented for ARM64. When enabled (currently for ARM64), it clears the active state of interrupts forwarded to virtual machines (VMs) before handling other interrupt masking operations. Signed-off-by: Eliav Farber Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241204142003.32859-2-farbere@amazon.com --- arch/arm/kernel/machine_kexec.c | 23 ------------------ arch/arm64/Kconfig | 1 + arch/arm64/kernel/machine_kexec.c | 31 ------------------------ arch/powerpc/include/asm/kexec.h | 1 - arch/powerpc/kexec/core.c | 22 ----------------- arch/powerpc/kexec/core_32.c | 1 + arch/riscv/kernel/machine_kexec.c | 23 ------------------ include/linux/irq.h | 3 +++ kernel/irq/Kconfig | 6 +++++ kernel/irq/Makefile | 2 +- kernel/irq/kexec.c | 40 +++++++++++++++++++++++++++++++ 11 files changed, 52 insertions(+), 101 deletions(-) create mode 100644 kernel/irq/kexec.c diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 80ceb5bd2680..dd430477e7c1 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -127,29 +127,6 @@ void crash_smp_send_stop(void) cpus_stopped = 1; } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - void machine_crash_shutdown(struct pt_regs *regs) { local_irq_disable(); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 100570a048c5..dcc3551cf6c2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -149,6 +149,7 @@ config ARM64 select GENERIC_IDLE_POLL_SETUP select GENERIC_IOREMAP select GENERIC_IRQ_IPI + select GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 82e2203d86a3..6f121a0164a4 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -207,37 +207,6 @@ void machine_kexec(struct kimage *kimage) BUG(); /* Should never get here. */ } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - int ret; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - /* - * First try to remove the active state. If this - * fails, try to EOI the interrupt. - */ - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); - - if (ret && irqd_irq_inprogress(&desc->irq_data) && - chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - /** * machine_crash_shutdown - shutdown non-crashing cpus and save registers */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 270ee93a0f7d..601e569303e1 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -61,7 +61,6 @@ struct pt_regs; extern void kexec_smp_wait(void); /* get and clear naca physid, wait for master to copy new code to 0 */ extern void default_machine_kexec(struct kimage *image); -extern void machine_kexec_mask_interrupts(void); void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, unsigned long start_address) __noreturn; diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index b8333a49ea5d..58a930a47422 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -22,28 +22,6 @@ #include #include -void machine_kexec_mask_interrupts(void) { - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - #ifdef CONFIG_CRASH_DUMP void machine_crash_shutdown(struct pt_regs *regs) { diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c index c95f96850c9e..deb28eb44f30 100644 --- a/arch/powerpc/kexec/core_32.c +++ b/arch/powerpc/kexec/core_32.c @@ -7,6 +7,7 @@ * Copyright (C) 2005 IBM Corporation. */ +#include #include #include #include diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index 3c830a6f7ef4..2306ce3e5f22 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -114,29 +114,6 @@ void machine_shutdown(void) #endif } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * diff --git a/include/linux/irq.h b/include/linux/irq.h index fa711f80957b..25f51bf3c351 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -694,6 +694,9 @@ extern int irq_chip_request_resources_parent(struct irq_data *data); extern void irq_chip_release_resources_parent(struct irq_data *data); #endif +/* Disable or mask interrupts during a kernel kexec */ +extern void machine_kexec_mask_interrupts(void); + /* Handling of unhandled and spurious interrupts: */ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 529adb1f5859..875f25ed6f71 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -141,6 +141,12 @@ config GENERIC_IRQ_DEBUGFS If you don't know what to do here, say N. +# Clear forwarded VM interrupts during kexec. +# This option ensures the kernel clears active states for interrupts +# forwarded to virtual machines (VMs) during a machine kexec. +config GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD + bool + endmenu config GENERIC_IRQ_MULTI_HANDLER diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index f19d3080bf11..c0f44c06d69d 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o +obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o kexec.o obj-$(CONFIG_IRQ_TIMINGS) += timings.o ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y) CFLAGS_timings.o += -DDEBUG diff --git a/kernel/irq/kexec.c b/kernel/irq/kexec.c new file mode 100644 index 000000000000..0f9548c1708d --- /dev/null +++ b/kernel/irq/kexec.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#include "internals.h" + +void machine_kexec_mask_interrupts(void) +{ + struct irq_desc *desc; + unsigned int i; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int check_eoi = 1; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + if (IS_ENABLED(CONFIG_GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD)) { + /* + * First try to remove the active state from an interrupt which is forwarded + * to a VM. If the interrupt is not forwarded, try to EOI the interrupt. + */ + check_eoi = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + } + + if (check_eoi && chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} From b4706d814921cc2df7bb59ad8f9ee84855a4f0c4 Mon Sep 17 00:00:00 2001 From: Eliav Farber Date: Wed, 4 Dec 2024 14:20:03 +0000 Subject: [PATCH 4/4] genirq/kexec: Prevent redundant IRQ masking by checking state before shutdown During machine kexec, machine_kexec_mask_interrupts() is responsible for disabling or masking all interrupts. While the irq_disable() is only invoked when the interrupt is not yet disabled, it unconditionally invokes the irq_mask() callback for every interrupt descriptor, even when the interrupt is already masked or not even started up yet. A specific issue was observed in the crash kernel flow after unbinding a device (prior to kexec) that used a GPIO as an IRQ source. The warning was triggered by the gpiochip_disable_irq() function, which attempts to clear the FLAG_IRQ_IS_ENABLED flag when FLAG_USED_AS_IRQ was not set. This issue surfaced after commit a8173820f441 ("gpio: gpiolib: Allow GPIO IRQs to lazy disable") introduced lazy disablement for GPIO IRQs. It replaced disable/enable hooks with mask/unmask hooks. Unlike the disable hook, the mask hook doesn't handle already-masked IRQs. When a GPIO-IRQ driver is unbound, the IRQ is released, triggering __irq_disable() and irq_state_set_masked(). A subsequent call to machine_kexec_mask_interrupts() re-invokes chip->irq_mask(). This results in a call chain, including gpiochip_irq_mask() and gpiochip_disable_irq(). Since FLAG_USED_AS_IRQ was cleared earlier, the warning is triggered. Replace the direct invocation of the irq_mask() and irq_disable() callbacks invoking to irq_shutdown(), which handles the cases correct and avoid it all together when the interrupt has never been started up. Signed-off-by: Eliav Farber Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241204142003.32859-3-farbere@amazon.com --- kernel/irq/kexec.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/irq/kexec.c b/kernel/irq/kexec.c index 0f9548c1708d..1a3deffe6b5b 100644 --- a/kernel/irq/kexec.c +++ b/kernel/irq/kexec.c @@ -17,7 +17,7 @@ void machine_kexec_mask_interrupts(void) int check_eoi = 1; chip = irq_desc_get_chip(desc); - if (!chip) + if (!chip || !irqd_is_started(&desc->irq_data)) continue; if (IS_ENABLED(CONFIG_GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD)) { @@ -31,10 +31,6 @@ void machine_kexec_mask_interrupts(void) if (check_eoi && chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) chip->irq_eoi(&desc->irq_data); - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); + irq_shutdown(desc); } }