clocksource: Provide kernel module to test clocksource watchdog
When the clocksource watchdog marks a clock as unstable, this might
be due to that clock being unstable or it might be due to delays that
happen to occur between the reads of the two clocks. It would be good
to have a way of testing the clocksource watchdog's ability to
distinguish between these two causes of clock skew and instability.
Therefore, provide a new clocksource-wdtest module selected by a new
TEST_CLOCKSOURCE_WATCHDOG Kconfig option. This module has a single module
parameter named "holdoff" that provides the number of seconds of delay
before testing should start, which defaults to zero when built as a module
and to 10 seconds when built directly into the kernel. Very large systems
that boot slowly may need to increase the value of this module parameter.
This module uses hand-crafted clocksource structures to do its testing,
thus avoiding messing up timing for the rest of the kernel and for user
applications. This module first verifies that the ->uncertainty_margin
field of the clocksource structures are set sanely. It then tests the
delay-detection capability of the clocksource watchdog, increasing the
number of consecutive delays injected, first provoking console messages
complaining about the delays and finally forcing a clock-skew event.
Unexpected test results cause at least one WARN_ON_ONCE() console splat.
If there are no splats, the test has passed. Finally, it fuzzes the
value returned from a clocksource to test the clocksource watchdog's
ability to detect time skew.
This module checks the state of its clocksource after each test, and
uses WARN_ON_ONCE() to emit a console splat if there are any failures.
This should enable all types of test frameworks to detect any such
failures.
This facility is intended for diagnostic use only, and should be avoided
on production systems.
Reported-by: Chris Mason <clm@fb.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org
2021-05-27 12:01:23 -07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0+
|
|
|
|
/*
|
|
|
|
* Unit test for the clocksource watchdog.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2021 Facebook, Inc.
|
|
|
|
*
|
|
|
|
* Author: Paul E. McKenney <paulmck@kernel.org>
|
|
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
|
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/clocksource.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
|
|
|
|
#include <linux/tick.h>
|
|
|
|
#include <linux/kthread.h>
|
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/prandom.h>
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
|
2021-08-12 09:31:28 -07:00
|
|
|
#include "tick-internal.h"
|
|
|
|
|
clocksource: Provide kernel module to test clocksource watchdog
When the clocksource watchdog marks a clock as unstable, this might
be due to that clock being unstable or it might be due to delays that
happen to occur between the reads of the two clocks. It would be good
to have a way of testing the clocksource watchdog's ability to
distinguish between these two causes of clock skew and instability.
Therefore, provide a new clocksource-wdtest module selected by a new
TEST_CLOCKSOURCE_WATCHDOG Kconfig option. This module has a single module
parameter named "holdoff" that provides the number of seconds of delay
before testing should start, which defaults to zero when built as a module
and to 10 seconds when built directly into the kernel. Very large systems
that boot slowly may need to increase the value of this module parameter.
This module uses hand-crafted clocksource structures to do its testing,
thus avoiding messing up timing for the rest of the kernel and for user
applications. This module first verifies that the ->uncertainty_margin
field of the clocksource structures are set sanely. It then tests the
delay-detection capability of the clocksource watchdog, increasing the
number of consecutive delays injected, first provoking console messages
complaining about the delays and finally forcing a clock-skew event.
Unexpected test results cause at least one WARN_ON_ONCE() console splat.
If there are no splats, the test has passed. Finally, it fuzzes the
value returned from a clocksource to test the clocksource watchdog's
ability to detect time skew.
This module checks the state of its clocksource after each test, and
uses WARN_ON_ONCE() to emit a console splat if there are any failures.
This should enable all types of test frameworks to detect any such
failures.
This facility is intended for diagnostic use only, and should be avoided
on production systems.
Reported-by: Chris Mason <clm@fb.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org
2021-05-27 12:01:23 -07:00
|
|
|
MODULE_LICENSE("GPL");
|
2024-05-10 17:24:25 -07:00
|
|
|
MODULE_DESCRIPTION("Clocksource watchdog unit test");
|
clocksource: Provide kernel module to test clocksource watchdog
When the clocksource watchdog marks a clock as unstable, this might
be due to that clock being unstable or it might be due to delays that
happen to occur between the reads of the two clocks. It would be good
to have a way of testing the clocksource watchdog's ability to
distinguish between these two causes of clock skew and instability.
Therefore, provide a new clocksource-wdtest module selected by a new
TEST_CLOCKSOURCE_WATCHDOG Kconfig option. This module has a single module
parameter named "holdoff" that provides the number of seconds of delay
before testing should start, which defaults to zero when built as a module
and to 10 seconds when built directly into the kernel. Very large systems
that boot slowly may need to increase the value of this module parameter.
This module uses hand-crafted clocksource structures to do its testing,
thus avoiding messing up timing for the rest of the kernel and for user
applications. This module first verifies that the ->uncertainty_margin
field of the clocksource structures are set sanely. It then tests the
delay-detection capability of the clocksource watchdog, increasing the
number of consecutive delays injected, first provoking console messages
complaining about the delays and finally forcing a clock-skew event.
Unexpected test results cause at least one WARN_ON_ONCE() console splat.
If there are no splats, the test has passed. Finally, it fuzzes the
value returned from a clocksource to test the clocksource watchdog's
ability to detect time skew.
This module checks the state of its clocksource after each test, and
uses WARN_ON_ONCE() to emit a console splat if there are any failures.
This should enable all types of test frameworks to detect any such
failures.
This facility is intended for diagnostic use only, and should be avoided
on production systems.
Reported-by: Chris Mason <clm@fb.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org
2021-05-27 12:01:23 -07:00
|
|
|
MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>");
|
|
|
|
|
|
|
|
static int holdoff = IS_BUILTIN(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) ? 10 : 0;
|
|
|
|
module_param(holdoff, int, 0444);
|
|
|
|
MODULE_PARM_DESC(holdoff, "Time to wait to start test (s).");
|
|
|
|
|
|
|
|
/* Watchdog kthread's task_struct pointer for debug purposes. */
|
|
|
|
static struct task_struct *wdtest_task;
|
|
|
|
|
|
|
|
static u64 wdtest_jiffies_read(struct clocksource *cs)
|
|
|
|
{
|
|
|
|
return (u64)jiffies;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct clocksource clocksource_wdtest_jiffies = {
|
|
|
|
.name = "wdtest-jiffies",
|
|
|
|
.rating = 1, /* lowest valid rating*/
|
|
|
|
.uncertainty_margin = TICK_NSEC,
|
|
|
|
.read = wdtest_jiffies_read,
|
|
|
|
.mask = CLOCKSOURCE_MASK(32),
|
|
|
|
.flags = CLOCK_SOURCE_MUST_VERIFY,
|
|
|
|
.mult = TICK_NSEC << JIFFIES_SHIFT, /* details above */
|
|
|
|
.shift = JIFFIES_SHIFT,
|
|
|
|
.max_cycles = 10,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int wdtest_ktime_read_ndelays;
|
|
|
|
static bool wdtest_ktime_read_fuzz;
|
|
|
|
|
|
|
|
static u64 wdtest_ktime_read(struct clocksource *cs)
|
|
|
|
{
|
|
|
|
int wkrn = READ_ONCE(wdtest_ktime_read_ndelays);
|
|
|
|
static int sign = 1;
|
|
|
|
u64 ret;
|
|
|
|
|
|
|
|
if (wkrn) {
|
|
|
|
udelay(cs->uncertainty_margin / 250);
|
|
|
|
WRITE_ONCE(wdtest_ktime_read_ndelays, wkrn - 1);
|
|
|
|
}
|
|
|
|
ret = ktime_get_real_fast_ns();
|
|
|
|
if (READ_ONCE(wdtest_ktime_read_fuzz)) {
|
|
|
|
sign = -sign;
|
|
|
|
ret = ret + sign * 100 * NSEC_PER_MSEC;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void wdtest_ktime_cs_mark_unstable(struct clocksource *cs)
|
|
|
|
{
|
|
|
|
pr_info("--- Marking %s unstable due to clocksource watchdog.\n", cs->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define KTIME_FLAGS (CLOCK_SOURCE_IS_CONTINUOUS | \
|
|
|
|
CLOCK_SOURCE_VALID_FOR_HRES | \
|
|
|
|
CLOCK_SOURCE_MUST_VERIFY | \
|
|
|
|
CLOCK_SOURCE_VERIFY_PERCPU)
|
|
|
|
|
|
|
|
static struct clocksource clocksource_wdtest_ktime = {
|
|
|
|
.name = "wdtest-ktime",
|
|
|
|
.rating = 300,
|
|
|
|
.read = wdtest_ktime_read,
|
|
|
|
.mask = CLOCKSOURCE_MASK(64),
|
|
|
|
.flags = KTIME_FLAGS,
|
|
|
|
.mark_unstable = wdtest_ktime_cs_mark_unstable,
|
|
|
|
.list = LIST_HEAD_INIT(clocksource_wdtest_ktime.list),
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Reset the clocksource if needed. */
|
|
|
|
static void wdtest_ktime_clocksource_reset(void)
|
|
|
|
{
|
|
|
|
if (clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE) {
|
|
|
|
clocksource_unregister(&clocksource_wdtest_ktime);
|
|
|
|
clocksource_wdtest_ktime.flags = KTIME_FLAGS;
|
|
|
|
schedule_timeout_uninterruptible(HZ / 10);
|
|
|
|
clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Run the specified series of watchdog tests. */
|
|
|
|
static int wdtest_func(void *arg)
|
|
|
|
{
|
|
|
|
unsigned long j1, j2;
|
clocksource: Scale the watchdog read retries automatically
On a 8-socket server the TSC is wrongly marked as 'unstable' and disabled
during boot time on about one out of 120 boot attempts:
clocksource: timekeeping watchdog on CPU227: wd-tsc-wd excessive read-back delay of 153560ns vs. limit of 125000ns,
wd-wd read-back delay only 11440ns, attempt 3, marking tsc unstable
tsc: Marking TSC unstable due to clocksource watchdog
TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
sched_clock: Marking unstable (119294969739, 159204297)<-(125446229205, -5992055152)
clocksource: Checking clocksource tsc synchronization from CPU 319 to CPUs 0,99,136,180,210,542,601,896.
clocksource: Switched to clocksource hpet
The reason is that for platform with a large number of CPUs, there are
sporadic big or huge read latencies while reading the watchog/clocksource
during boot or when system is under stress work load, and the frequency and
maximum value of the latency goes up with the number of online CPUs.
The cCurrent code already has logic to detect and filter such high latency
case by reading the watchdog twice and checking the two deltas. Due to the
randomness of the latency, there is a low probabilty that the first delta
(latency) is big, but the second delta is small and looks valid. The
watchdog code retries the readouts by default twice, which is not
necessarily sufficient for systems with a large number of CPUs.
There is a command line parameter 'max_cswd_read_retries' which allows to
increase the number of retries, but that's not user friendly as it needs to
be tweaked per system. As the number of required retries is proportional to
the number of online CPUs, this parameter can be calculated at runtime.
Scale and enlarge the number of retries according to the number of online
CPUs and remove the command line parameter completely.
[ tglx: Massaged change log and comments ]
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jin Wang <jin1.wang@intel.com>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20240221060859.1027450-1-feng.tang@intel.com
2024-02-21 14:08:59 +08:00
|
|
|
int i, max_retries;
|
clocksource: Provide kernel module to test clocksource watchdog
When the clocksource watchdog marks a clock as unstable, this might
be due to that clock being unstable or it might be due to delays that
happen to occur between the reads of the two clocks. It would be good
to have a way of testing the clocksource watchdog's ability to
distinguish between these two causes of clock skew and instability.
Therefore, provide a new clocksource-wdtest module selected by a new
TEST_CLOCKSOURCE_WATCHDOG Kconfig option. This module has a single module
parameter named "holdoff" that provides the number of seconds of delay
before testing should start, which defaults to zero when built as a module
and to 10 seconds when built directly into the kernel. Very large systems
that boot slowly may need to increase the value of this module parameter.
This module uses hand-crafted clocksource structures to do its testing,
thus avoiding messing up timing for the rest of the kernel and for user
applications. This module first verifies that the ->uncertainty_margin
field of the clocksource structures are set sanely. It then tests the
delay-detection capability of the clocksource watchdog, increasing the
number of consecutive delays injected, first provoking console messages
complaining about the delays and finally forcing a clock-skew event.
Unexpected test results cause at least one WARN_ON_ONCE() console splat.
If there are no splats, the test has passed. Finally, it fuzzes the
value returned from a clocksource to test the clocksource watchdog's
ability to detect time skew.
This module checks the state of its clocksource after each test, and
uses WARN_ON_ONCE() to emit a console splat if there are any failures.
This should enable all types of test frameworks to detect any such
failures.
This facility is intended for diagnostic use only, and should be avoided
on production systems.
Reported-by: Chris Mason <clm@fb.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org
2021-05-27 12:01:23 -07:00
|
|
|
char *s;
|
|
|
|
|
|
|
|
schedule_timeout_uninterruptible(holdoff * HZ);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify that jiffies-like clocksources get the manually
|
|
|
|
* specified uncertainty margin.
|
|
|
|
*/
|
|
|
|
pr_info("--- Verify jiffies-like uncertainty margin.\n");
|
|
|
|
__clocksource_register(&clocksource_wdtest_jiffies);
|
|
|
|
WARN_ON_ONCE(clocksource_wdtest_jiffies.uncertainty_margin != TICK_NSEC);
|
|
|
|
|
|
|
|
j1 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies);
|
|
|
|
schedule_timeout_uninterruptible(HZ);
|
|
|
|
j2 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies);
|
|
|
|
WARN_ON_ONCE(j1 == j2);
|
|
|
|
|
|
|
|
clocksource_unregister(&clocksource_wdtest_jiffies);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify that tsc-like clocksources are assigned a reasonable
|
|
|
|
* uncertainty margin.
|
|
|
|
*/
|
|
|
|
pr_info("--- Verify tsc-like uncertainty margin.\n");
|
|
|
|
clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
|
|
|
|
WARN_ON_ONCE(clocksource_wdtest_ktime.uncertainty_margin < NSEC_PER_USEC);
|
|
|
|
|
|
|
|
j1 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime);
|
|
|
|
udelay(1);
|
|
|
|
j2 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime);
|
|
|
|
pr_info("--- tsc-like times: %lu - %lu = %lu.\n", j2, j1, j2 - j1);
|
|
|
|
WARN_ON_ONCE(time_before(j2, j1 + NSEC_PER_USEC));
|
|
|
|
|
|
|
|
/* Verify tsc-like stability with various numbers of errors injected. */
|
clocksource: Scale the watchdog read retries automatically
On a 8-socket server the TSC is wrongly marked as 'unstable' and disabled
during boot time on about one out of 120 boot attempts:
clocksource: timekeeping watchdog on CPU227: wd-tsc-wd excessive read-back delay of 153560ns vs. limit of 125000ns,
wd-wd read-back delay only 11440ns, attempt 3, marking tsc unstable
tsc: Marking TSC unstable due to clocksource watchdog
TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
sched_clock: Marking unstable (119294969739, 159204297)<-(125446229205, -5992055152)
clocksource: Checking clocksource tsc synchronization from CPU 319 to CPUs 0,99,136,180,210,542,601,896.
clocksource: Switched to clocksource hpet
The reason is that for platform with a large number of CPUs, there are
sporadic big or huge read latencies while reading the watchog/clocksource
during boot or when system is under stress work load, and the frequency and
maximum value of the latency goes up with the number of online CPUs.
The cCurrent code already has logic to detect and filter such high latency
case by reading the watchdog twice and checking the two deltas. Due to the
randomness of the latency, there is a low probabilty that the first delta
(latency) is big, but the second delta is small and looks valid. The
watchdog code retries the readouts by default twice, which is not
necessarily sufficient for systems with a large number of CPUs.
There is a command line parameter 'max_cswd_read_retries' which allows to
increase the number of retries, but that's not user friendly as it needs to
be tweaked per system. As the number of required retries is proportional to
the number of online CPUs, this parameter can be calculated at runtime.
Scale and enlarge the number of retries according to the number of online
CPUs and remove the command line parameter completely.
[ tglx: Massaged change log and comments ]
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jin Wang <jin1.wang@intel.com>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20240221060859.1027450-1-feng.tang@intel.com
2024-02-21 14:08:59 +08:00
|
|
|
max_retries = clocksource_get_max_watchdog_retry();
|
|
|
|
for (i = 0; i <= max_retries + 1; i++) {
|
|
|
|
if (i <= 1 && i < max_retries)
|
clocksource: Provide kernel module to test clocksource watchdog
When the clocksource watchdog marks a clock as unstable, this might
be due to that clock being unstable or it might be due to delays that
happen to occur between the reads of the two clocks. It would be good
to have a way of testing the clocksource watchdog's ability to
distinguish between these two causes of clock skew and instability.
Therefore, provide a new clocksource-wdtest module selected by a new
TEST_CLOCKSOURCE_WATCHDOG Kconfig option. This module has a single module
parameter named "holdoff" that provides the number of seconds of delay
before testing should start, which defaults to zero when built as a module
and to 10 seconds when built directly into the kernel. Very large systems
that boot slowly may need to increase the value of this module parameter.
This module uses hand-crafted clocksource structures to do its testing,
thus avoiding messing up timing for the rest of the kernel and for user
applications. This module first verifies that the ->uncertainty_margin
field of the clocksource structures are set sanely. It then tests the
delay-detection capability of the clocksource watchdog, increasing the
number of consecutive delays injected, first provoking console messages
complaining about the delays and finally forcing a clock-skew event.
Unexpected test results cause at least one WARN_ON_ONCE() console splat.
If there are no splats, the test has passed. Finally, it fuzzes the
value returned from a clocksource to test the clocksource watchdog's
ability to detect time skew.
This module checks the state of its clocksource after each test, and
uses WARN_ON_ONCE() to emit a console splat if there are any failures.
This should enable all types of test frameworks to detect any such
failures.
This facility is intended for diagnostic use only, and should be avoided
on production systems.
Reported-by: Chris Mason <clm@fb.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org
2021-05-27 12:01:23 -07:00
|
|
|
s = "";
|
clocksource: Scale the watchdog read retries automatically
On a 8-socket server the TSC is wrongly marked as 'unstable' and disabled
during boot time on about one out of 120 boot attempts:
clocksource: timekeeping watchdog on CPU227: wd-tsc-wd excessive read-back delay of 153560ns vs. limit of 125000ns,
wd-wd read-back delay only 11440ns, attempt 3, marking tsc unstable
tsc: Marking TSC unstable due to clocksource watchdog
TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
sched_clock: Marking unstable (119294969739, 159204297)<-(125446229205, -5992055152)
clocksource: Checking clocksource tsc synchronization from CPU 319 to CPUs 0,99,136,180,210,542,601,896.
clocksource: Switched to clocksource hpet
The reason is that for platform with a large number of CPUs, there are
sporadic big or huge read latencies while reading the watchog/clocksource
during boot or when system is under stress work load, and the frequency and
maximum value of the latency goes up with the number of online CPUs.
The cCurrent code already has logic to detect and filter such high latency
case by reading the watchdog twice and checking the two deltas. Due to the
randomness of the latency, there is a low probabilty that the first delta
(latency) is big, but the second delta is small and looks valid. The
watchdog code retries the readouts by default twice, which is not
necessarily sufficient for systems with a large number of CPUs.
There is a command line parameter 'max_cswd_read_retries' which allows to
increase the number of retries, but that's not user friendly as it needs to
be tweaked per system. As the number of required retries is proportional to
the number of online CPUs, this parameter can be calculated at runtime.
Scale and enlarge the number of retries according to the number of online
CPUs and remove the command line parameter completely.
[ tglx: Massaged change log and comments ]
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jin Wang <jin1.wang@intel.com>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20240221060859.1027450-1-feng.tang@intel.com
2024-02-21 14:08:59 +08:00
|
|
|
else if (i <= max_retries)
|
clocksource: Provide kernel module to test clocksource watchdog
When the clocksource watchdog marks a clock as unstable, this might
be due to that clock being unstable or it might be due to delays that
happen to occur between the reads of the two clocks. It would be good
to have a way of testing the clocksource watchdog's ability to
distinguish between these two causes of clock skew and instability.
Therefore, provide a new clocksource-wdtest module selected by a new
TEST_CLOCKSOURCE_WATCHDOG Kconfig option. This module has a single module
parameter named "holdoff" that provides the number of seconds of delay
before testing should start, which defaults to zero when built as a module
and to 10 seconds when built directly into the kernel. Very large systems
that boot slowly may need to increase the value of this module parameter.
This module uses hand-crafted clocksource structures to do its testing,
thus avoiding messing up timing for the rest of the kernel and for user
applications. This module first verifies that the ->uncertainty_margin
field of the clocksource structures are set sanely. It then tests the
delay-detection capability of the clocksource watchdog, increasing the
number of consecutive delays injected, first provoking console messages
complaining about the delays and finally forcing a clock-skew event.
Unexpected test results cause at least one WARN_ON_ONCE() console splat.
If there are no splats, the test has passed. Finally, it fuzzes the
value returned from a clocksource to test the clocksource watchdog's
ability to detect time skew.
This module checks the state of its clocksource after each test, and
uses WARN_ON_ONCE() to emit a console splat if there are any failures.
This should enable all types of test frameworks to detect any such
failures.
This facility is intended for diagnostic use only, and should be avoided
on production systems.
Reported-by: Chris Mason <clm@fb.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org
2021-05-27 12:01:23 -07:00
|
|
|
s = ", expect message";
|
|
|
|
else
|
|
|
|
s = ", expect clock skew";
|
clocksource: Scale the watchdog read retries automatically
On a 8-socket server the TSC is wrongly marked as 'unstable' and disabled
during boot time on about one out of 120 boot attempts:
clocksource: timekeeping watchdog on CPU227: wd-tsc-wd excessive read-back delay of 153560ns vs. limit of 125000ns,
wd-wd read-back delay only 11440ns, attempt 3, marking tsc unstable
tsc: Marking TSC unstable due to clocksource watchdog
TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
sched_clock: Marking unstable (119294969739, 159204297)<-(125446229205, -5992055152)
clocksource: Checking clocksource tsc synchronization from CPU 319 to CPUs 0,99,136,180,210,542,601,896.
clocksource: Switched to clocksource hpet
The reason is that for platform with a large number of CPUs, there are
sporadic big or huge read latencies while reading the watchog/clocksource
during boot or when system is under stress work load, and the frequency and
maximum value of the latency goes up with the number of online CPUs.
The cCurrent code already has logic to detect and filter such high latency
case by reading the watchdog twice and checking the two deltas. Due to the
randomness of the latency, there is a low probabilty that the first delta
(latency) is big, but the second delta is small and looks valid. The
watchdog code retries the readouts by default twice, which is not
necessarily sufficient for systems with a large number of CPUs.
There is a command line parameter 'max_cswd_read_retries' which allows to
increase the number of retries, but that's not user friendly as it needs to
be tweaked per system. As the number of required retries is proportional to
the number of online CPUs, this parameter can be calculated at runtime.
Scale and enlarge the number of retries according to the number of online
CPUs and remove the command line parameter completely.
[ tglx: Massaged change log and comments ]
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jin Wang <jin1.wang@intel.com>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20240221060859.1027450-1-feng.tang@intel.com
2024-02-21 14:08:59 +08:00
|
|
|
pr_info("--- Watchdog with %dx error injection, %d retries%s.\n", i, max_retries, s);
|
clocksource: Provide kernel module to test clocksource watchdog
When the clocksource watchdog marks a clock as unstable, this might
be due to that clock being unstable or it might be due to delays that
happen to occur between the reads of the two clocks. It would be good
to have a way of testing the clocksource watchdog's ability to
distinguish between these two causes of clock skew and instability.
Therefore, provide a new clocksource-wdtest module selected by a new
TEST_CLOCKSOURCE_WATCHDOG Kconfig option. This module has a single module
parameter named "holdoff" that provides the number of seconds of delay
before testing should start, which defaults to zero when built as a module
and to 10 seconds when built directly into the kernel. Very large systems
that boot slowly may need to increase the value of this module parameter.
This module uses hand-crafted clocksource structures to do its testing,
thus avoiding messing up timing for the rest of the kernel and for user
applications. This module first verifies that the ->uncertainty_margin
field of the clocksource structures are set sanely. It then tests the
delay-detection capability of the clocksource watchdog, increasing the
number of consecutive delays injected, first provoking console messages
complaining about the delays and finally forcing a clock-skew event.
Unexpected test results cause at least one WARN_ON_ONCE() console splat.
If there are no splats, the test has passed. Finally, it fuzzes the
value returned from a clocksource to test the clocksource watchdog's
ability to detect time skew.
This module checks the state of its clocksource after each test, and
uses WARN_ON_ONCE() to emit a console splat if there are any failures.
This should enable all types of test frameworks to detect any such
failures.
This facility is intended for diagnostic use only, and should be avoided
on production systems.
Reported-by: Chris Mason <clm@fb.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org
2021-05-27 12:01:23 -07:00
|
|
|
WRITE_ONCE(wdtest_ktime_read_ndelays, i);
|
|
|
|
schedule_timeout_uninterruptible(2 * HZ);
|
|
|
|
WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays));
|
clocksource: Scale the watchdog read retries automatically
On a 8-socket server the TSC is wrongly marked as 'unstable' and disabled
during boot time on about one out of 120 boot attempts:
clocksource: timekeeping watchdog on CPU227: wd-tsc-wd excessive read-back delay of 153560ns vs. limit of 125000ns,
wd-wd read-back delay only 11440ns, attempt 3, marking tsc unstable
tsc: Marking TSC unstable due to clocksource watchdog
TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
sched_clock: Marking unstable (119294969739, 159204297)<-(125446229205, -5992055152)
clocksource: Checking clocksource tsc synchronization from CPU 319 to CPUs 0,99,136,180,210,542,601,896.
clocksource: Switched to clocksource hpet
The reason is that for platform with a large number of CPUs, there are
sporadic big or huge read latencies while reading the watchog/clocksource
during boot or when system is under stress work load, and the frequency and
maximum value of the latency goes up with the number of online CPUs.
The cCurrent code already has logic to detect and filter such high latency
case by reading the watchdog twice and checking the two deltas. Due to the
randomness of the latency, there is a low probabilty that the first delta
(latency) is big, but the second delta is small and looks valid. The
watchdog code retries the readouts by default twice, which is not
necessarily sufficient for systems with a large number of CPUs.
There is a command line parameter 'max_cswd_read_retries' which allows to
increase the number of retries, but that's not user friendly as it needs to
be tweaked per system. As the number of required retries is proportional to
the number of online CPUs, this parameter can be calculated at runtime.
Scale and enlarge the number of retries according to the number of online
CPUs and remove the command line parameter completely.
[ tglx: Massaged change log and comments ]
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jin Wang <jin1.wang@intel.com>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20240221060859.1027450-1-feng.tang@intel.com
2024-02-21 14:08:59 +08:00
|
|
|
WARN_ON_ONCE((i <= max_retries) !=
|
clocksource: Provide kernel module to test clocksource watchdog
When the clocksource watchdog marks a clock as unstable, this might
be due to that clock being unstable or it might be due to delays that
happen to occur between the reads of the two clocks. It would be good
to have a way of testing the clocksource watchdog's ability to
distinguish between these two causes of clock skew and instability.
Therefore, provide a new clocksource-wdtest module selected by a new
TEST_CLOCKSOURCE_WATCHDOG Kconfig option. This module has a single module
parameter named "holdoff" that provides the number of seconds of delay
before testing should start, which defaults to zero when built as a module
and to 10 seconds when built directly into the kernel. Very large systems
that boot slowly may need to increase the value of this module parameter.
This module uses hand-crafted clocksource structures to do its testing,
thus avoiding messing up timing for the rest of the kernel and for user
applications. This module first verifies that the ->uncertainty_margin
field of the clocksource structures are set sanely. It then tests the
delay-detection capability of the clocksource watchdog, increasing the
number of consecutive delays injected, first provoking console messages
complaining about the delays and finally forcing a clock-skew event.
Unexpected test results cause at least one WARN_ON_ONCE() console splat.
If there are no splats, the test has passed. Finally, it fuzzes the
value returned from a clocksource to test the clocksource watchdog's
ability to detect time skew.
This module checks the state of its clocksource after each test, and
uses WARN_ON_ONCE() to emit a console splat if there are any failures.
This should enable all types of test frameworks to detect any such
failures.
This facility is intended for diagnostic use only, and should be avoided
on production systems.
Reported-by: Chris Mason <clm@fb.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org
2021-05-27 12:01:23 -07:00
|
|
|
!(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
|
|
|
|
wdtest_ktime_clocksource_reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Verify tsc-like stability with clock-value-fuzz error injection. */
|
|
|
|
pr_info("--- Watchdog clock-value-fuzz error injection, expect clock skew and per-CPU mismatches.\n");
|
|
|
|
WRITE_ONCE(wdtest_ktime_read_fuzz, true);
|
|
|
|
schedule_timeout_uninterruptible(2 * HZ);
|
|
|
|
WARN_ON_ONCE(!(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
|
|
|
|
clocksource_verify_percpu(&clocksource_wdtest_ktime);
|
|
|
|
WRITE_ONCE(wdtest_ktime_read_fuzz, false);
|
|
|
|
|
|
|
|
clocksource_unregister(&clocksource_wdtest_ktime);
|
|
|
|
|
|
|
|
pr_info("--- Done with test.\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void wdtest_print_module_parms(void)
|
|
|
|
{
|
|
|
|
pr_alert("--- holdoff=%d\n", holdoff);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Cleanup function. */
|
|
|
|
static void clocksource_wdtest_cleanup(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init clocksource_wdtest_init(void)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
wdtest_print_module_parms();
|
|
|
|
|
|
|
|
/* Create watchdog-test task. */
|
|
|
|
wdtest_task = kthread_run(wdtest_func, NULL, "wdtest");
|
|
|
|
if (IS_ERR(wdtest_task)) {
|
|
|
|
ret = PTR_ERR(wdtest_task);
|
|
|
|
pr_warn("%s: Failed to create wdtest kthread.\n", __func__);
|
|
|
|
wdtest_task = NULL;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(clocksource_wdtest_init);
|
|
|
|
module_exit(clocksource_wdtest_cleanup);
|