mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 02:36:02 +00:00
kernel/watchdog: Prevent false positives with turbo modes
The hardlockup detector on x86 uses a performance counter based on unhalted
CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the
performance counter period, so the hrtimer should fire 2-3 times before the
performance counter NMI fires. The NMI code checks whether the hrtimer
fired since the last invocation. If not, it assumess a hard lockup.
The calculation of those periods is based on the nominal CPU
frequency. Turbo modes increase the CPU clock frequency and therefore
shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x
nominal frequency) the perf/NMI period is shorter than the hrtimer period
which leads to false positives.
A simple fix would be to shorten the hrtimer period, but that comes with
the side effect of more frequent hrtimer and softlockup thread wakeups,
which is not desired.
Implement a low pass filter, which checks the perf/NMI period against
kernel time. If the perf/NMI fires before 4/5 of the watchdog period has
elapsed then the event is ignored and postponed to the next perf/NMI.
That solves the problem and avoids the overhead of shorter hrtimer periods
and more frequent softlockup thread wakeups.
Fixes: 58687acba5
("lockup_detector: Combine nmi_watchdog and softlockup detector")
Reported-and-tested-by: Kan Liang <Kan.liang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: dzickus@redhat.com
Cc: prarit@redhat.com
Cc: ak@linux.intel.com
Cc: babu.moger@oracle.com
Cc: peterz@infradead.org
Cc: eranian@google.com
Cc: acme@redhat.com
Cc: stable@vger.kernel.org
Cc: atomlin@redhat.com
Cc: akpm@linux-foundation.org
Cc: torvalds@linux-foundation.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos
This commit is contained in:
parent
ef954844c7
commit
7edaeb6841
@ -100,6 +100,7 @@ config X86
|
|||||||
select GENERIC_STRNCPY_FROM_USER
|
select GENERIC_STRNCPY_FROM_USER
|
||||||
select GENERIC_STRNLEN_USER
|
select GENERIC_STRNLEN_USER
|
||||||
select GENERIC_TIME_VSYSCALL
|
select GENERIC_TIME_VSYSCALL
|
||||||
|
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
|
||||||
select HAVE_ACPI_APEI if ACPI
|
select HAVE_ACPI_APEI if ACPI
|
||||||
select HAVE_ACPI_APEI_NMI if ACPI
|
select HAVE_ACPI_APEI_NMI if ACPI
|
||||||
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
||||||
|
@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
|
|||||||
#define sysctl_softlockup_all_cpu_backtrace 0
|
#define sysctl_softlockup_all_cpu_backtrace 0
|
||||||
#define sysctl_hardlockup_all_cpu_backtrace 0
|
#define sysctl_hardlockup_all_cpu_backtrace 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
|
||||||
|
defined(CONFIG_HARDLOCKUP_DETECTOR)
|
||||||
|
void watchdog_update_hrtimer_threshold(u64 period);
|
||||||
|
#else
|
||||||
|
static inline void watchdog_update_hrtimer_threshold(u64 period) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
extern bool is_hardlockup(void);
|
extern bool is_hardlockup(void);
|
||||||
struct ctl_table;
|
struct ctl_table;
|
||||||
extern int proc_watchdog(struct ctl_table *, int ,
|
extern int proc_watchdog(struct ctl_table *, int ,
|
||||||
|
@ -240,6 +240,7 @@ static void set_sample_period(void)
|
|||||||
* hardlockup detector generates a warning
|
* hardlockup detector generates a warning
|
||||||
*/
|
*/
|
||||||
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
|
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
|
||||||
|
watchdog_update_hrtimer_threshold(sample_period);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Commands for resetting the watchdog */
|
/* Commands for resetting the watchdog */
|
||||||
|
@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
|
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
|
||||||
|
|
||||||
|
#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
|
||||||
|
static DEFINE_PER_CPU(ktime_t, last_timestamp);
|
||||||
|
static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
|
||||||
|
static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
|
||||||
|
|
||||||
|
void watchdog_update_hrtimer_threshold(u64 period)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The hrtimer runs with a period of (watchdog_threshold * 2) / 5
|
||||||
|
*
|
||||||
|
* So it runs effectively with 2.5 times the rate of the NMI
|
||||||
|
* watchdog. That means the hrtimer should fire 2-3 times before
|
||||||
|
* the NMI watchdog expires. The NMI watchdog on x86 is based on
|
||||||
|
* unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
|
||||||
|
* might run way faster than expected and the NMI fires in a
|
||||||
|
* smaller period than the one deduced from the nominal CPU
|
||||||
|
* frequency. Depending on the Turbo-Mode factor this might be fast
|
||||||
|
* enough to get the NMI period smaller than the hrtimer watchdog
|
||||||
|
* period and trigger false positives.
|
||||||
|
*
|
||||||
|
* The sample threshold is used to check in the NMI handler whether
|
||||||
|
* the minimum time between two NMI samples has elapsed. That
|
||||||
|
* prevents false positives.
|
||||||
|
*
|
||||||
|
* Set this to 4/5 of the actual watchdog threshold period so the
|
||||||
|
* hrtimer is guaranteed to fire at least once within the real
|
||||||
|
* watchdog threshold.
|
||||||
|
*/
|
||||||
|
watchdog_hrtimer_sample_threshold = period * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool watchdog_check_timestamp(void)
|
||||||
|
{
|
||||||
|
ktime_t delta, now = ktime_get_mono_fast_ns();
|
||||||
|
|
||||||
|
delta = now - __this_cpu_read(last_timestamp);
|
||||||
|
if (delta < watchdog_hrtimer_sample_threshold) {
|
||||||
|
/*
|
||||||
|
* If ktime is jiffies based, a stalled timer would prevent
|
||||||
|
* jiffies from being incremented and the filter would look
|
||||||
|
* at a stale timestamp and never trigger.
|
||||||
|
*/
|
||||||
|
if (__this_cpu_inc_return(nmi_rearmed) < 10)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
__this_cpu_write(nmi_rearmed, 0);
|
||||||
|
__this_cpu_write(last_timestamp, now);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline bool watchdog_check_timestamp(void)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static struct perf_event_attr wd_hw_attr = {
|
static struct perf_event_attr wd_hw_attr = {
|
||||||
.type = PERF_TYPE_HARDWARE,
|
.type = PERF_TYPE_HARDWARE,
|
||||||
.config = PERF_COUNT_HW_CPU_CYCLES,
|
.config = PERF_COUNT_HW_CPU_CYCLES,
|
||||||
@ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!watchdog_check_timestamp())
|
||||||
|
return;
|
||||||
|
|
||||||
/* check for a hardlockup
|
/* check for a hardlockup
|
||||||
* This is done by making sure our timer interrupt
|
* This is done by making sure our timer interrupt
|
||||||
* is incrementing. The timer interrupt should have
|
* is incrementing. The timer interrupt should have
|
||||||
|
@ -797,6 +797,13 @@ config HARDLOCKUP_DETECTOR_PERF
|
|||||||
bool
|
bool
|
||||||
select SOFTLOCKUP_DETECTOR
|
select SOFTLOCKUP_DETECTOR
|
||||||
|
|
||||||
|
#
|
||||||
|
# Enables a timestamp based low pass filter to compensate for perf based
|
||||||
|
# hard lockup detection which runs too fast due to turbo modes.
|
||||||
|
#
|
||||||
|
config HARDLOCKUP_CHECK_TIMESTAMP
|
||||||
|
bool
|
||||||
|
|
||||||
#
|
#
|
||||||
# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
|
# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
|
||||||
# lockup detector rather than the perf based detector.
|
# lockup detector rather than the perf based detector.
|
||||||
|
Loading…
Reference in New Issue
Block a user