From 625056b65ee9a8b4caf42136e0c844c15e6ec54f Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 15 Mar 2012 17:47:20 -0400 Subject: [PATCH 1/5] hung task debugging: Inject NMI when hung and going to panic Send an NMI to all CPUs when a hung task is detected and the hung task code is configured to panic. This gives us a fairly uptodate snapshot of all CPUs in the system. This lets us get stack trace of all CPUs which makes life easier trying to debug a deadlock, and the NMI doesn't change anything since the next step is a kernel panic. Signed-off-by: Sasha Levin Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1331848040-1676-1-git-send-email-levinsasha928@gmail.com [ extended the changelog a bit ] Signed-off-by: Ingo Molnar --- kernel/hung_task.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index c21449f85a2a..6df614912b9d 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -108,8 +108,10 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) touch_nmi_watchdog(); - if (sysctl_hung_task_panic) + if (sysctl_hung_task_panic) { + trigger_all_cpu_backtrace(); panic("hung_task: blocked tasks"); + } } /* From 09ee10143658cd021d879ead61ead72a196302b6 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 29 Mar 2012 16:11:15 -0400 Subject: [PATCH 2/5] watchdog, hpwdt: Remove priority option for NMI callback The NMI_UNKNOWN bucket only allows for one function to register to it. The reason for that is because only functions which can not determine if the NMI belongs to them or not should register and would like to assume/swallow any NMI they see. As a result it doesn't make sense to let more than one function like this register. In fact, letting a second function fail allows us to know that more than one function is going to swallow NMIs on the current system. This is better than silently being ignored. Therefore hpwdt's priority mechanism doesn't make sense any more. They will be always first on the NMI_UNKNOWN queue, if they register. Removing this parameter cleans up the code and simplifies things for the next patch which changes how nmis are registered. Signed-off-by: Don Zickus Cc: Thomas Mingarelli Cc: Wim Van Sebroeck Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1333051877-15755-2-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- drivers/watchdog/hpwdt.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index cbc7ceef2786..4000b8038cac 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -147,7 +147,6 @@ struct cmn_registers { static unsigned int hpwdt_nmi_decoding; static unsigned int allow_kdump; -static unsigned int priority; /* hpwdt at end of die_notify list */ static unsigned int is_icru; static DEFINE_SPINLOCK(rom_lock); static void *cru_rom_addr; @@ -723,13 +722,9 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) } /* - * If the priority is set to 1, then we will be put first on the - * die notify list to handle a critical NMI. The default is to - * be last so other users of the NMI signal can function. + * Only one function can register for NMI_UNKNOWN */ - retval = register_nmi_handler(NMI_UNKNOWN, hpwdt_pretimeout, - (priority) ? NMI_FLAG_FIRST : 0, - "hpwdt"); + retval = register_nmi_handler(NMI_UNKNOWN, hpwdt_pretimeout, 0, "hpwdt"); if (retval != 0) { dev_warn(&dev->dev, "Unable to register a die notifier (err=%d).\n", @@ -740,10 +735,8 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) dev_info(&dev->dev, "HP Watchdog Timer Driver: NMI decoding initialized" - ", allow kernel dump: %s (default = 0/OFF)" - ", priority: %s (default = 0/LAST).\n", - (allow_kdump == 0) ? "OFF" : "ON", - (priority == 0) ? "LAST" : "FIRST"); + ", allow kernel dump: %s (default = 0/OFF)\n", + (allow_kdump == 0) ? "OFF" : "ON"); return 0; } @@ -881,10 +874,6 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" #ifdef CONFIG_HPWDT_NMI_DECODING module_param(allow_kdump, int, 0); MODULE_PARM_DESC(allow_kdump, "Start a kernel dump after NMI occurs"); - -module_param(priority, int, 0); -MODULE_PARM_DESC(priority, "The hpwdt driver handles NMIs first or last" - " (default = 0/Last)\n"); #endif /* !CONFIG_HPWDT_NMI_DECODING */ module_init(hpwdt_init); From 553222f3e81f18da31b2552e18dc519715198590 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 29 Mar 2012 16:11:16 -0400 Subject: [PATCH 3/5] x86/nmi: Add new NMI queues to deal with IO_CHK and SERR In discussions with Thomas Mingarelli about hpwdt, he explained to me some issues they were some when using their virtual NMI button to test the hpwdt driver. It turns out the virtual NMI button used on HP's machines do no send unknown NMIs but instead send IO_CHK NMIs. The way the kernel code is written, the hpwdt driver can not register itself against that type of NMI and therefore can not successfully capture system information before panic'ing. To solve this I created two new NMI queues to allow driver to register against the IO_CHK and SERR NMIs. Or in the hpwdt all three (if you include unknown NMIs too). The change is straightforward and just mimics what the unknown NMI does. Reported-and-tested-by: Thomas Mingarelli Signed-off-by: Don Zickus Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1333051877-15755-3-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 2 ++ arch/x86/kernel/nmi.c | 18 ++++++++++++++++++ drivers/watchdog/hpwdt.c | 27 ++++++++++++++++++++------- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index fd3f9f18cf3f..07162dfbff84 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -27,6 +27,8 @@ void arch_trigger_all_cpu_backtrace(void); enum { NMI_LOCAL=0, NMI_UNKNOWN, + NMI_SERR, + NMI_IO_CHECK, NMI_MAX }; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 47acaf319165..ac9c1b76df96 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -54,6 +54,14 @@ static struct nmi_desc nmi_desc[NMI_MAX] = .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), .head = LIST_HEAD_INIT(nmi_desc[1].head), }, + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock), + .head = LIST_HEAD_INIT(nmi_desc[2].head), + }, + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock), + .head = LIST_HEAD_INIT(nmi_desc[3].head), + }, }; @@ -120,6 +128,8 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action) * to manage expectations */ WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); + WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); + WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); /* * some handlers need to be executed first otherwise a fake @@ -212,6 +222,10 @@ EXPORT_SYMBOL_GPL(unregister_nmi_handler); static notrace __kprobes void pci_serr_error(unsigned char reason, struct pt_regs *regs) { + /* check to see if anyone registered against these types of errors */ + if (nmi_handle(NMI_SERR, regs, false)) + return; + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", reason, smp_processor_id()); @@ -241,6 +255,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; + /* check to see if anyone registered against these types of errors */ + if (nmi_handle(NMI_IO_CHECK, regs, false)) + return; + pr_emerg( "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", reason, smp_processor_id()); diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 4000b8038cac..6e414b501d58 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -725,19 +725,32 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) * Only one function can register for NMI_UNKNOWN */ retval = register_nmi_handler(NMI_UNKNOWN, hpwdt_pretimeout, 0, "hpwdt"); - if (retval != 0) { - dev_warn(&dev->dev, - "Unable to register a die notifier (err=%d).\n", - retval); - if (cru_rom_addr) - iounmap(cru_rom_addr); - } + if (retval) + goto error; + retval = register_nmi_handler(NMI_SERR, hpwdt_pretimeout, 0, "hpwdt"); + if (retval) + goto error1; + retval = register_nmi_handler(NMI_IO_CHECK, hpwdt_pretimeout, 0, "hpwdt"); + if (retval) + goto error2; dev_info(&dev->dev, "HP Watchdog Timer Driver: NMI decoding initialized" ", allow kernel dump: %s (default = 0/OFF)\n", (allow_kdump == 0) ? "OFF" : "ON"); return 0; + +error2: + unregister_nmi_handler(NMI_SERR, "hpwdt"); +error1: + unregister_nmi_handler(NMI_UNKNOWN, "hpwdt"); +error: + dev_warn(&dev->dev, + "Unable to register a die notifier (err=%d).\n", + retval); + if (cru_rom_addr) + iounmap(cru_rom_addr); + return retval; } static void hpwdt_exit_nmi_decoding(void) From 72b3fb24713755cf9740b403e95aa67ceedf3509 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Thu, 29 Mar 2012 16:11:17 -0400 Subject: [PATCH 4/5] x86/nmi: Fix page faults by nmiaction if kmemcheck is enabled This patch tries to fix the problem of page fault exception caused by accessing nmiaction structure in nmi if kmemcheck is enabled. If kmemcheck is enabled, the memory allocated through slab are in pages that are marked non-present, so that some checks could be done in the page fault handling code ( e.g. whether the memory is read before written to ). As nmiaction is allocated in this way, so it resides in a non-present page. Then there is a page fault while the nmi code accessing the nmiaction structure, which would then cause a warning by WARN_ON_ONCE(in_nmi()) in kmemcheck_fault(), called by do_page_fault(). This significantly simplifies the code as well, as the whole dynamic allocation dance goes away. v2: as Peter suggested, changed the nmiaction to use static storage. v3: as Peter suggested, use macro to shorten the codes. Also keep the original usage of register_nmi_handler, so users of this call doesn't need change. Tested-by: Seiji Aguchi Fixes: https://lkml.org/lkml/2012/3/2/356 Signed-off-by: Li Zhong [ simplified the wrappers ] Signed-off-by: Don Zickus Cc: Peter Zijlstra Cc: thomas.mingarelli@hp.com Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1333051877-15755-4-git-send-email-dzickus@redhat.com [ tidied the patch a bit ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 20 ++++++++++-- arch/x86/kernel/nmi.c | 65 ++++---------------------------------- 2 files changed, 24 insertions(+), 61 deletions(-) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 07162dfbff84..a1a836c8131c 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -37,8 +37,24 @@ enum { typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); -int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long, - const char *); +struct nmiaction { + struct list_head list; + nmi_handler_t handler; + unsigned int flags; + const char *name; +}; + +#define register_nmi_handler(t, fn, fg, n) \ +({ \ + static struct nmiaction fn##_na = { \ + .handler = (fn), \ + .name = (n), \ + .flags = (fg), \ + }; \ + __register_nmi_handler((t), &fn##_na); \ +}) + +int __register_nmi_handler(unsigned int, struct nmiaction *); void unregister_nmi_handler(unsigned int, const char *); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ac9c1b76df96..585be4bd71a5 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -31,14 +31,6 @@ #include #include -#define NMI_MAX_NAMELEN 16 -struct nmiaction { - struct list_head list; - nmi_handler_t handler; - unsigned int flags; - char *name; -}; - struct nmi_desc { spinlock_t lock; struct list_head head; @@ -115,11 +107,14 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, return handled; } -static int __setup_nmi(unsigned int type, struct nmiaction *action) +int __register_nmi_handler(unsigned int type, struct nmiaction *action) { struct nmi_desc *desc = nmi_to_desc(type); unsigned long flags; + if (!action->handler) + return -EINVAL; + spin_lock_irqsave(&desc->lock, flags); /* @@ -143,8 +138,9 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action) spin_unlock_irqrestore(&desc->lock, flags); return 0; } +EXPORT_SYMBOL(__register_nmi_handler); -static struct nmiaction *__free_nmi(unsigned int type, const char *name) +void unregister_nmi_handler(unsigned int type, const char *name) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *n; @@ -167,56 +163,7 @@ static struct nmiaction *__free_nmi(unsigned int type, const char *name) spin_unlock_irqrestore(&desc->lock, flags); synchronize_rcu(); - return (n); } - -int register_nmi_handler(unsigned int type, nmi_handler_t handler, - unsigned long nmiflags, const char *devname) -{ - struct nmiaction *action; - int retval = -ENOMEM; - - if (!handler) - return -EINVAL; - - action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); - if (!action) - goto fail_action; - - action->handler = handler; - action->flags = nmiflags; - action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); - if (!action->name) - goto fail_action_name; - - retval = __setup_nmi(type, action); - - if (retval) - goto fail_setup_nmi; - - return retval; - -fail_setup_nmi: - kfree(action->name); -fail_action_name: - kfree(action); -fail_action: - - return retval; -} -EXPORT_SYMBOL_GPL(register_nmi_handler); - -void unregister_nmi_handler(unsigned int type, const char *name) -{ - struct nmiaction *a; - - a = __free_nmi(type, name); - if (a) { - kfree(a->name); - kfree(a); - } -} - EXPORT_SYMBOL_GPL(unregister_nmi_handler); static notrace __kprobes void From 6ff968cca1dfebd4b6fcade87c11658dbfc96932 Mon Sep 17 00:00:00 2001 From: Betty Dall Date: Fri, 27 Apr 2012 14:40:55 -0600 Subject: [PATCH 5/5] x86/nmi: Fix the type of the nmiaction.flags field This patch changes the type of the struct nmiaction flags field to unsigned long from unsigned int. All the usages of the flags field are unsigned long already. There is only one flag used currently, NMI_FLAG_FIRST, but having the wrong size could cause a truncation bug in the future on 64 bit architectures. Signed-off-by: Betty Dall Acked-by: Don Zickus Link: http://lkml.kernel.org/r/1335559255-13454-1-git-send-email-betty.dall@hp.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index a1a836c8131c..0e3793b821ef 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -40,7 +40,7 @@ typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); struct nmiaction { struct list_head list; nmi_handler_t handler; - unsigned int flags; + unsigned long flags; const char *name; };