linux-stable/kernel/rcu/rcutorture.c

3956 lines
123 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0+
/*
* Read-Copy Update module-based torture test facility
*
* Copyright (C) IBM Corporation, 2005, 2006
*
* Authors: Paul E. McKenney <paulmck@linux.ibm.com>
* Josh Triplett <josh@joshtriplett.org>
*
* See also: Documentation/RCU/torture.rst
*/
#define pr_fmt(fmt) fmt
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/rcupdate_wait.h>
#include <linux/rcu_notifier.h>
#include <linux/interrupt.h>
#include <linux/sched/signal.h>
#include <uapi/linux/sched/types.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/freezer.h>
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/stat.h>
#include <linux/srcu.h>
#include <linux/slab.h>
#include <linux/trace_clock.h>
#include <asm/byteorder.h>
#include <linux/torture.h>
#include <linux/vmalloc.h>
#include <linux/sched/debug.h>
#include <linux/sched/sysctl.h>
#include <linux/oom.h>
#include <linux/tick.h>
#include <linux/rcupdate_trace.h>
#include <linux/nmi.h>
#include "rcu.h"
MODULE_DESCRIPTION("Read-Copy Update module-based torture test facility");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
/* Bits for ->extendables field, extendables param, and related definitions. */
#define RCUTORTURE_RDR_SHIFT_1 8 /* Put SRCU index in upper bits. */
#define RCUTORTURE_RDR_MASK_1 (1 << RCUTORTURE_RDR_SHIFT_1)
#define RCUTORTURE_RDR_SHIFT_2 9 /* Put SRCU index in upper bits. */
#define RCUTORTURE_RDR_MASK_2 (1 << RCUTORTURE_RDR_SHIFT_2)
#define RCUTORTURE_RDR_BH 0x01 /* Extend readers by disabling bh. */
#define RCUTORTURE_RDR_IRQ 0x02 /* ... disabling interrupts. */
#define RCUTORTURE_RDR_PREEMPT 0x04 /* ... disabling preemption. */
#define RCUTORTURE_RDR_RBH 0x08 /* ... rcu_read_lock_bh(). */
#define RCUTORTURE_RDR_SCHED 0x10 /* ... rcu_read_lock_sched(). */
#define RCUTORTURE_RDR_RCU_1 0x20 /* ... entering another RCU reader. */
#define RCUTORTURE_RDR_RCU_2 0x40 /* ... entering another RCU reader. */
#define RCUTORTURE_RDR_NBITS 7 /* Number of bits defined above. */
#define RCUTORTURE_MAX_EXTEND \
(RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ | RCUTORTURE_RDR_PREEMPT | \
RCUTORTURE_RDR_RBH | RCUTORTURE_RDR_SCHED)
#define RCUTORTURE_RDR_MAX_LOOPS 0x7 /* Maximum reader extensions. */
/* Must be power of two minus one. */
#define RCUTORTURE_RDR_MAX_SEGS (RCUTORTURE_RDR_MAX_LOOPS + 3)
torture_param(int, extendables, RCUTORTURE_MAX_EXTEND,
"Extend readers by disabling bh (1), irqs (2), or preempt (4)");
torture_param(int, fqs_duration, 0, "Duration of fqs bursts (us), 0 to disable");
torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
torture_param(int, fwd_progress, 1, "Number of grace-period forward progress tasks (0 to disable)");
torture_param(int, fwd_progress_div, 4, "Fraction of CPU stall to wait");
torture_param(int, fwd_progress_holdoff, 60, "Time between forward-progress tests (s)");
torture_param(bool, fwd_progress_need_resched, 1, "Hide cond_resched() behind need_resched()");
torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives");
torture_param(bool, gp_cond_exp, false, "Use conditional/async expedited GP wait primitives");
torture_param(bool, gp_cond_full, false, "Use conditional/async full-state GP wait primitives");
torture_param(bool, gp_cond_exp_full, false,
"Use conditional/async full-stateexpedited GP wait primitives");
torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
torture_param(bool, gp_normal, false, "Use normal (non-expedited) GP wait primitives");
torture_param(bool, gp_poll, false, "Use polling GP wait primitives");
torture_param(bool, gp_poll_exp, false, "Use polling expedited GP wait primitives");
torture_param(bool, gp_poll_full, false, "Use polling full-state GP wait primitives");
torture_param(bool, gp_poll_exp_full, false, "Use polling full-state expedited GP wait primitives");
torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives");
torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers");
torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers");
torture_param(int, n_barrier_cbs, 0, "# of callbacks/kthreads for barrier testing");
torture_param(int, nfakewriters, 4, "Number of RCU fake writer threads");
torture_param(int, nreaders, -1, "Number of RCU reader threads");
torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
torture_param(int, read_exit_delay, 13, "Delay between read-then-exit episodes (s)");
torture_param(int, read_exit_burst, 16, "# of read-then-exit bursts per episode, zero to disable");
torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles");
torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable.");
torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
torture_param(int, stall_cpu_holdoff, 10, "Time to wait before starting stall (s).");
torture_param(bool, stall_no_softlockup, false, "Avoid softlockup warning during cpu stall.");
torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
torture_param(int, stall_cpu_block, 0, "Sleep while stalling.");
torture_param(int, stall_gp_kthread, 0, "Grace-period kthread stall duration (s).");
torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s");
torture_param(int, stutter, 5, "Number of seconds to run/halt test");
torture_param(int, test_boost, 1, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
torture_param(int, test_boost_duration, 4, "Duration of each boost test, seconds.");
torture_param(int, test_boost_interval, 7, "Interval between boost tests, seconds.");
torture_param(int, test_nmis, 0, "End-test NMI tests, 0 to disable.");
torture_param(bool, test_no_idle_hz, true, "Test support for tickless idle CPUs");
torture_param(int, test_srcu_lockdep, 0, "Test specified SRCU deadlock scenario.");
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
static char *torture_type = "rcu";
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, srcu, ...)");
static int nrealnocbers;
static int nrealreaders;
static struct task_struct *writer_task;
static struct task_struct **fakewriter_tasks;
static struct task_struct **reader_tasks;
static struct task_struct **nocb_tasks;
static struct task_struct *stats_task;
static struct task_struct *fqs_task;
static struct task_struct *boost_tasks[NR_CPUS];
static struct task_struct *stall_task;
static struct task_struct **fwd_prog_tasks;
static struct task_struct **barrier_cbs_tasks;
static struct task_struct *barrier_task;
static struct task_struct *read_exit_task;
#define RCU_TORTURE_PIPE_LEN 10
// Mailbox-like structure to check RCU global memory ordering.
struct rcu_torture_reader_check {
unsigned long rtc_myloops;
int rtc_chkrdr;
unsigned long rtc_chkloops;
int rtc_ready;
struct rcu_torture_reader_check *rtc_assigner;
} ____cacheline_internodealigned_in_smp;
// Update-side data structure used to check RCU readers.
struct rcu_torture {
struct rcu_head rtort_rcu;
int rtort_pipe_count;
struct list_head rtort_free;
int rtort_mbtest;
struct rcu_torture_reader_check *rtort_chkp;
};
static LIST_HEAD(rcu_torture_freelist);
static struct rcu_torture __rcu *rcu_torture_current;
static unsigned long rcu_torture_current_version;
static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
static DEFINE_SPINLOCK(rcu_torture_lock);
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count);
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch);
static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
static struct rcu_torture_reader_check *rcu_torture_reader_mbchk;
static atomic_t n_rcu_torture_alloc;
static atomic_t n_rcu_torture_alloc_fail;
static atomic_t n_rcu_torture_free;
static atomic_t n_rcu_torture_mberror;
static atomic_t n_rcu_torture_mbchk_fail;
static atomic_t n_rcu_torture_mbchk_tries;
static atomic_t n_rcu_torture_error;
static long n_rcu_torture_barrier_error;
static long n_rcu_torture_boost_ktrerror;
static long n_rcu_torture_boost_failure;
static long n_rcu_torture_boosts;
static atomic_long_t n_rcu_torture_timers;
static long n_barrier_attempts;
static long n_barrier_successes; /* did rcu_barrier test succeed? */
static unsigned long n_read_exits;
static struct list_head rcu_torture_removed;
static unsigned long shutdown_jiffies;
static unsigned long start_gp_seq;
static atomic_long_t n_nocb_offload;
static atomic_long_t n_nocb_deoffload;
static int rcu_torture_writer_state;
#define RTWS_FIXED_DELAY 0
#define RTWS_DELAY 1
#define RTWS_REPLACE 2
#define RTWS_DEF_FREE 3
#define RTWS_EXP_SYNC 4
#define RTWS_COND_GET 5
#define RTWS_COND_GET_FULL 6
#define RTWS_COND_GET_EXP 7
#define RTWS_COND_GET_EXP_FULL 8
#define RTWS_COND_SYNC 9
#define RTWS_COND_SYNC_FULL 10
#define RTWS_COND_SYNC_EXP 11
#define RTWS_COND_SYNC_EXP_FULL 12
#define RTWS_POLL_GET 13
#define RTWS_POLL_GET_FULL 14
#define RTWS_POLL_GET_EXP 15
#define RTWS_POLL_GET_EXP_FULL 16
#define RTWS_POLL_WAIT 17
#define RTWS_POLL_WAIT_FULL 18
#define RTWS_POLL_WAIT_EXP 19
#define RTWS_POLL_WAIT_EXP_FULL 20
#define RTWS_SYNC 21
#define RTWS_STUTTER 22
#define RTWS_STOPPING 23
static const char * const rcu_torture_writer_state_names[] = {
"RTWS_FIXED_DELAY",
"RTWS_DELAY",
"RTWS_REPLACE",
"RTWS_DEF_FREE",
"RTWS_EXP_SYNC",
"RTWS_COND_GET",
"RTWS_COND_GET_FULL",
"RTWS_COND_GET_EXP",
"RTWS_COND_GET_EXP_FULL",
"RTWS_COND_SYNC",
"RTWS_COND_SYNC_FULL",
"RTWS_COND_SYNC_EXP",
"RTWS_COND_SYNC_EXP_FULL",
"RTWS_POLL_GET",
"RTWS_POLL_GET_FULL",
"RTWS_POLL_GET_EXP",
"RTWS_POLL_GET_EXP_FULL",
"RTWS_POLL_WAIT",
"RTWS_POLL_WAIT_FULL",
"RTWS_POLL_WAIT_EXP",
"RTWS_POLL_WAIT_EXP_FULL",
"RTWS_SYNC",
"RTWS_STUTTER",
"RTWS_STOPPING",
};
/* Record reader segment types and duration for first failing read. */
struct rt_read_seg {
int rt_readstate;
unsigned long rt_delay_jiffies;
unsigned long rt_delay_ms;
unsigned long rt_delay_us;
bool rt_preempted;
};
static int err_segs_recorded;
static struct rt_read_seg err_segs[RCUTORTURE_RDR_MAX_SEGS];
static int rt_read_nsegs;
static const char *rcu_torture_writer_state_getname(void)
{
unsigned int i = READ_ONCE(rcu_torture_writer_state);
if (i >= ARRAY_SIZE(rcu_torture_writer_state_names))
return "???";
return rcu_torture_writer_state_names[i];
}
#ifdef CONFIG_RCU_TRACE
static u64 notrace rcu_trace_clock_local(void)
{
u64 ts = trace_clock_local();
(void)do_div(ts, NSEC_PER_USEC);
return ts;
}
#else /* #ifdef CONFIG_RCU_TRACE */
static u64 notrace rcu_trace_clock_local(void)
{
return 0ULL;
}
#endif /* #else #ifdef CONFIG_RCU_TRACE */
/*
* Stop aggressive CPU-hog tests a bit before the end of the test in order
* to avoid interfering with test shutdown.
*/
static bool shutdown_time_arrived(void)
{
return shutdown_secs && time_after(jiffies, shutdown_jiffies - 30 * HZ);
}
static unsigned long boost_starttime; /* jiffies of next boost test start. */
static DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
/* and boost task create/destroy. */
static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
static bool barrier_phase; /* Test phase. */
static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
static atomic_t rcu_fwd_cb_nodelay; /* Short rcu_torture_delay() delays. */
/*
* Allocate an element from the rcu_tortures pool.
*/
static struct rcu_torture *
rcu_torture_alloc(void)
{
struct list_head *p;
spin_lock_bh(&rcu_torture_lock);
if (list_empty(&rcu_torture_freelist)) {
atomic_inc(&n_rcu_torture_alloc_fail);
spin_unlock_bh(&rcu_torture_lock);
return NULL;
}
atomic_inc(&n_rcu_torture_alloc);
p = rcu_torture_freelist.next;
list_del_init(p);
spin_unlock_bh(&rcu_torture_lock);
return container_of(p, struct rcu_torture, rtort_free);
}
/*
* Free an element to the rcu_tortures pool.
*/
static void
rcu_torture_free(struct rcu_torture *p)
{
atomic_inc(&n_rcu_torture_free);
spin_lock_bh(&rcu_torture_lock);
list_add_tail(&p->rtort_free, &rcu_torture_freelist);
spin_unlock_bh(&rcu_torture_lock);
}
/*
* Operations vector for selecting different types of tests.
*/
struct rcu_torture_ops {
int ttype;
void (*init)(void);
void (*cleanup)(void);
int (*readlock)(void);
void (*read_delay)(struct torture_random_state *rrsp,
struct rt_read_seg *rtrsp);
void (*readunlock)(int idx);
int (*readlock_held)(void);
unsigned long (*get_gp_seq)(void);
unsigned long (*gp_diff)(unsigned long new, unsigned long old);
void (*deferred_free)(struct rcu_torture *p);
void (*sync)(void);
void (*exp_sync)(void);
unsigned long (*get_gp_state_exp)(void);
unsigned long (*start_gp_poll_exp)(void);
void (*start_gp_poll_exp_full)(struct rcu_gp_oldstate *rgosp);
bool (*poll_gp_state_exp)(unsigned long oldstate);
void (*cond_sync_exp)(unsigned long oldstate);
void (*cond_sync_exp_full)(struct rcu_gp_oldstate *rgosp);
unsigned long (*get_comp_state)(void);
void (*get_comp_state_full)(struct rcu_gp_oldstate *rgosp);
bool (*same_gp_state)(unsigned long oldstate1, unsigned long oldstate2);
bool (*same_gp_state_full)(struct rcu_gp_oldstate *rgosp1, struct rcu_gp_oldstate *rgosp2);
unsigned long (*get_gp_state)(void);
void (*get_gp_state_full)(struct rcu_gp_oldstate *rgosp);
unsigned long (*start_gp_poll)(void);
void (*start_gp_poll_full)(struct rcu_gp_oldstate *rgosp);
bool (*poll_gp_state)(unsigned long oldstate);
bool (*poll_gp_state_full)(struct rcu_gp_oldstate *rgosp);
bool (*poll_need_2gp)(bool poll, bool poll_full);
void (*cond_sync)(unsigned long oldstate);
void (*cond_sync_full)(struct rcu_gp_oldstate *rgosp);
call_rcu_func_t call;
void (*cb_barrier)(void);
void (*fqs)(void);
void (*stats)(void);
void (*gp_kthread_dbg)(void);
bool (*check_boost_failed)(unsigned long gp_state, int *cpup);
int (*stall_dur)(void);
void (*get_gp_data)(int *flags, unsigned long *gp_seq);
void (*gp_slow_register)(atomic_t *rgssp);
void (*gp_slow_unregister)(atomic_t *rgssp);
long cbflood_max;
int irq_capable;
int can_boost;
int extendables;
int slow_gps;
int no_pi_lock;
int debug_objects;
const char *name;
};
static struct rcu_torture_ops *cur_ops;
/*
* Definitions for rcu torture testing.
*/
static int torture_readlock_not_held(void)
{
return rcu_read_lock_bh_held() || rcu_read_lock_sched_held();
}
static int rcu_torture_read_lock(void)
{
rcu_read_lock();
return 0;
}
static void
rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
{
unsigned long started;
unsigned long completed;
const unsigned long shortdelay_us = 200;
unsigned long longdelay_ms = 300;
unsigned long long ts;
/* We want a short delay sometimes to make a reader delay the grace
* period, and we want a long delay occasionally to trigger
* force_quiescent_state. */
if (!atomic_read(&rcu_fwd_cb_nodelay) &&
!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) {
started = cur_ops->get_gp_seq();
ts = rcu_trace_clock_local();
if (preempt_count() & (SOFTIRQ_MASK | HARDIRQ_MASK))
longdelay_ms = 5; /* Avoid triggering BH limits. */
mdelay(longdelay_ms);
rtrsp->rt_delay_ms = longdelay_ms;
completed = cur_ops->get_gp_seq();
do_trace_rcu_torture_read(cur_ops->name, NULL, ts,
started, completed);
}
if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) {
udelay(shortdelay_us);
rtrsp->rt_delay_us = shortdelay_us;
}
if (!preempt_count() &&
!(torture_random(rrsp) % (nrealreaders * 500))) {
torture_preempt_schedule(); /* QS only if preemptible. */
rtrsp->rt_preempted = true;
}
}
static void rcu_torture_read_unlock(int idx)
{
rcu_read_unlock();
}
/*
* Update callback in the pipe. This should be invoked after a grace period.
*/
static bool
rcu_torture_pipe_update_one(struct rcu_torture *rp)
{
int i;
struct rcu_torture_reader_check *rtrcp = READ_ONCE(rp->rtort_chkp);
if (rtrcp) {
WRITE_ONCE(rp->rtort_chkp, NULL);
smp_store_release(&rtrcp->rtc_ready, 1); // Pair with smp_load_acquire().
}
i = rp->rtort_pipe_count;
if (i > RCU_TORTURE_PIPE_LEN)
i = RCU_TORTURE_PIPE_LEN;
atomic_inc(&rcu_torture_wcount[i]);
WRITE_ONCE(rp->rtort_pipe_count, i + 1);
ASSERT_EXCLUSIVE_WRITER(rp->rtort_pipe_count);
if (i + 1 >= RCU_TORTURE_PIPE_LEN) {
rp->rtort_mbtest = 0;
return true;
}
return false;
}
/*
* Update all callbacks in the pipe. Suitable for synchronous grace-period
* primitives.
*/
static void
rcu_torture_pipe_update(struct rcu_torture *old_rp)
{
struct rcu_torture *rp;
struct rcu_torture *rp1;
if (old_rp)
list_add(&old_rp->rtort_free, &rcu_torture_removed);
list_for_each_entry_safe(rp, rp1, &rcu_torture_removed, rtort_free) {
if (rcu_torture_pipe_update_one(rp)) {
list_del(&rp->rtort_free);
rcu_torture_free(rp);
}
}
}
static void
rcu_torture_cb(struct rcu_head *p)
{
struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
if (torture_must_stop_irq()) {
/* Test is ending, just drop callbacks on the floor. */
/* The next initialization will pick up the pieces. */
return;
}
if (rcu_torture_pipe_update_one(rp))
rcu_torture_free(rp);
else
cur_ops->deferred_free(rp);
}
static unsigned long rcu_no_completed(void)
{
return 0;
}
static void rcu_torture_deferred_free(struct rcu_torture *p)
{
call_rcu_hurry(&p->rtort_rcu, rcu_torture_cb);
}
static void rcu_sync_torture_init(void)
{
INIT_LIST_HEAD(&rcu_torture_removed);
}
static bool rcu_poll_need_2gp(bool poll, bool poll_full)
{
return poll;
}
static struct rcu_torture_ops rcu_ops = {
.ttype = RCU_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay,
.readunlock = rcu_torture_read_unlock,
.readlock_held = torture_readlock_not_held,
.get_gp_seq = rcu_get_gp_seq,
.gp_diff = rcu_seq_diff,
.deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.exp_sync = synchronize_rcu_expedited,
.same_gp_state = same_state_synchronize_rcu,
.same_gp_state_full = same_state_synchronize_rcu_full,
.get_comp_state = get_completed_synchronize_rcu,
.get_comp_state_full = get_completed_synchronize_rcu_full,
.get_gp_state = get_state_synchronize_rcu,
.get_gp_state_full = get_state_synchronize_rcu_full,
.start_gp_poll = start_poll_synchronize_rcu,
.start_gp_poll_full = start_poll_synchronize_rcu_full,
.poll_gp_state = poll_state_synchronize_rcu,
.poll_gp_state_full = poll_state_synchronize_rcu_full,
.poll_need_2gp = rcu_poll_need_2gp,
.cond_sync = cond_synchronize_rcu,
.cond_sync_full = cond_synchronize_rcu_full,
.get_gp_state_exp = get_state_synchronize_rcu,
.start_gp_poll_exp = start_poll_synchronize_rcu_expedited,
.start_gp_poll_exp_full = start_poll_synchronize_rcu_expedited_full,
.poll_gp_state_exp = poll_state_synchronize_rcu,
.cond_sync_exp = cond_synchronize_rcu_expedited,
.call = call_rcu_hurry,
.cb_barrier = rcu_barrier,
.fqs = rcu_force_quiescent_state,
.gp_kthread_dbg = show_rcu_gp_kthreads,
.check_boost_failed = rcu_check_boost_fail,
.stall_dur = rcu_jiffies_till_stall_check,
.get_gp_data = rcutorture_get_gp_data,
.gp_slow_register = rcu_gp_slow_register,
.gp_slow_unregister = rcu_gp_slow_unregister,
.irq_capable = 1,
.can_boost = IS_ENABLED(CONFIG_RCU_BOOST),
.extendables = RCUTORTURE_MAX_EXTEND,
.debug_objects = 1,
.name = "rcu"
};
/*
* Don't even think about trying any of these in real life!!!
* The names includes "busted", and they really means it!
* The only purpose of these functions is to provide a buggy RCU
* implementation to make sure that rcutorture correctly emits
* buggy-RCU error messages.
*/
static void rcu_busted_torture_deferred_free(struct rcu_torture *p)
{
/* This is a deliberate bug for testing purposes only! */
rcu_torture_cb(&p->rtort_rcu);
}
static void synchronize_rcu_busted(void)
{
/* This is a deliberate bug for testing purposes only! */
}
static void
call_rcu_busted(struct rcu_head *head, rcu_callback_t func)
{
/* This is a deliberate bug for testing purposes only! */
func(head);
}
static struct rcu_torture_ops rcu_busted_ops = {
.ttype = INVALID_RCU_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_torture_read_unlock,
.readlock_held = torture_readlock_not_held,
.get_gp_seq = rcu_no_completed,
.deferred_free = rcu_busted_torture_deferred_free,
.sync = synchronize_rcu_busted,
.exp_sync = synchronize_rcu_busted,
.call = call_rcu_busted,
.irq_capable = 1,
.name = "busted"
};
/*
* Definitions for srcu torture testing.
*/
DEFINE_STATIC_SRCU(srcu_ctl);
static struct srcu_struct srcu_ctld;
static struct srcu_struct *srcu_ctlp = &srcu_ctl;
srcu: Create an srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe() On strict load-store architectures, the use of this_cpu_inc() by srcu_read_lock() and srcu_read_unlock() is not NMI-safe in TREE SRCU. To see this suppose that an NMI arrives in the middle of srcu_read_lock(), just after it has read ->srcu_lock_count, but before it has written the incremented value back to memory. If that NMI handler also does srcu_read_lock() and srcu_read_lock() on that same srcu_struct structure, then upon return from that NMI handler, the interrupted srcu_read_lock() will overwrite the NMI handler's update to ->srcu_lock_count, but leave unchanged the NMI handler's update by srcu_read_unlock() to ->srcu_unlock_count. This can result in a too-short SRCU grace period, which can in turn result in arbitrary memory corruption. If the NMI handler instead interrupts the srcu_read_unlock(), this can result in eternal SRCU grace periods, which is not much better. This commit therefore creates a pair of new srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe() functions, which allow SRCU readers in both NMI handlers and in process and IRQ context. It is bad practice to mix the existing and the new _nmisafe() primitives on the same srcu_struct structure. Use one set or the other, not both. Just to underline that "bad practice" point, using srcu_read_lock() at process level and srcu_read_lock_nmisafe() in your NMI handler will not, repeat NOT, work. If you do not immediately understand why this is the case, please review the earlier paragraphs in this commit log. [ paulmck: Apply kernel test robot feedback. ] [ paulmck: Apply feedback from Randy Dunlap. ] [ paulmck: Apply feedback from John Ogness. ] [ paulmck: Apply feedback from Frederic Weisbecker. ] Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/ Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: John Ogness <john.ogness@linutronix.de> Cc: Petr Mladek <pmladek@suse.com>
2022-09-15 21:29:07 +00:00
static struct rcu_torture_ops srcud_ops;
static void srcu_get_gp_data(int *flags, unsigned long *gp_seq)
{
srcutorture_get_gp_data(srcu_ctlp, flags, gp_seq);
}
static int srcu_torture_read_lock(void)
{
srcu: Create an srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe() On strict load-store architectures, the use of this_cpu_inc() by srcu_read_lock() and srcu_read_unlock() is not NMI-safe in TREE SRCU. To see this suppose that an NMI arrives in the middle of srcu_read_lock(), just after it has read ->srcu_lock_count, but before it has written the incremented value back to memory. If that NMI handler also does srcu_read_lock() and srcu_read_lock() on that same srcu_struct structure, then upon return from that NMI handler, the interrupted srcu_read_lock() will overwrite the NMI handler's update to ->srcu_lock_count, but leave unchanged the NMI handler's update by srcu_read_unlock() to ->srcu_unlock_count. This can result in a too-short SRCU grace period, which can in turn result in arbitrary memory corruption. If the NMI handler instead interrupts the srcu_read_unlock(), this can result in eternal SRCU grace periods, which is not much better. This commit therefore creates a pair of new srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe() functions, which allow SRCU readers in both NMI handlers and in process and IRQ context. It is bad practice to mix the existing and the new _nmisafe() primitives on the same srcu_struct structure. Use one set or the other, not both. Just to underline that "bad practice" point, using srcu_read_lock() at process level and srcu_read_lock_nmisafe() in your NMI handler will not, repeat NOT, work. If you do not immediately understand why this is the case, please review the earlier paragraphs in this commit log. [ paulmck: Apply kernel test robot feedback. ] [ paulmck: Apply feedback from Randy Dunlap. ] [ paulmck: Apply feedback from John Ogness. ] [ paulmck: Apply feedback from Frederic Weisbecker. ] Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/ Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: John Ogness <john.ogness@linutronix.de> Cc: Petr Mladek <pmladek@suse.com>
2022-09-15 21:29:07 +00:00
if (cur_ops == &srcud_ops)
return srcu_read_lock_nmisafe(srcu_ctlp);
else
return srcu_read_lock(srcu_ctlp);
}
static void
srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
{
long delay;
const long uspertick = 1000000 / HZ;
const long longdelay = 10;
/* We want there to be long-running readers, but not all the time. */
delay = torture_random(rrsp) %
(nrealreaders * 2 * longdelay * uspertick);
if (!delay && in_task()) {
schedule_timeout_interruptible(longdelay);
rtrsp->rt_delay_jiffies = longdelay;
} else {
rcu_read_delay(rrsp, rtrsp);
}
}
static void srcu_torture_read_unlock(int idx)
{
srcu: Create an srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe() On strict load-store architectures, the use of this_cpu_inc() by srcu_read_lock() and srcu_read_unlock() is not NMI-safe in TREE SRCU. To see this suppose that an NMI arrives in the middle of srcu_read_lock(), just after it has read ->srcu_lock_count, but before it has written the incremented value back to memory. If that NMI handler also does srcu_read_lock() and srcu_read_lock() on that same srcu_struct structure, then upon return from that NMI handler, the interrupted srcu_read_lock() will overwrite the NMI handler's update to ->srcu_lock_count, but leave unchanged the NMI handler's update by srcu_read_unlock() to ->srcu_unlock_count. This can result in a too-short SRCU grace period, which can in turn result in arbitrary memory corruption. If the NMI handler instead interrupts the srcu_read_unlock(), this can result in eternal SRCU grace periods, which is not much better. This commit therefore creates a pair of new srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe() functions, which allow SRCU readers in both NMI handlers and in process and IRQ context. It is bad practice to mix the existing and the new _nmisafe() primitives on the same srcu_struct structure. Use one set or the other, not both. Just to underline that "bad practice" point, using srcu_read_lock() at process level and srcu_read_lock_nmisafe() in your NMI handler will not, repeat NOT, work. If you do not immediately understand why this is the case, please review the earlier paragraphs in this commit log. [ paulmck: Apply kernel test robot feedback. ] [ paulmck: Apply feedback from Randy Dunlap. ] [ paulmck: Apply feedback from John Ogness. ] [ paulmck: Apply feedback from Frederic Weisbecker. ] Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/ Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: John Ogness <john.ogness@linutronix.de> Cc: Petr Mladek <pmladek@suse.com>
2022-09-15 21:29:07 +00:00
if (cur_ops == &srcud_ops)
srcu_read_unlock_nmisafe(srcu_ctlp, idx);
else
srcu_read_unlock(srcu_ctlp, idx);
}
static int torture_srcu_read_lock_held(void)
{
return srcu_read_lock_held(srcu_ctlp);
}
static unsigned long srcu_torture_completed(void)
{
return srcu_batches_completed(srcu_ctlp);
}
static void srcu_torture_deferred_free(struct rcu_torture *rp)
{
call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb);
}
static void srcu_torture_synchronize(void)
{
synchronize_srcu(srcu_ctlp);
}
static unsigned long srcu_torture_get_gp_state(void)
{
return get_state_synchronize_srcu(srcu_ctlp);
}
static unsigned long srcu_torture_start_gp_poll(void)
{
return start_poll_synchronize_srcu(srcu_ctlp);
}
static bool srcu_torture_poll_gp_state(unsigned long oldstate)
{
return poll_state_synchronize_srcu(srcu_ctlp, oldstate);
}
static void srcu_torture_call(struct rcu_head *head,
rcu_callback_t func)
{
call_srcu(srcu_ctlp, head, func);
}
static void srcu_torture_barrier(void)
{
srcu_barrier(srcu_ctlp);
}
static void srcu_torture_stats(void)
{
srcu_torture_stats_print(srcu_ctlp, torture_type, TORTURE_FLAG);
}
static void srcu_torture_synchronize_expedited(void)
{
synchronize_srcu_expedited(srcu_ctlp);
}
static struct rcu_torture_ops srcu_ops = {
.ttype = SRCU_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
.readlock_held = torture_srcu_read_lock_held,
.get_gp_seq = srcu_torture_completed,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.exp_sync = srcu_torture_synchronize_expedited,
.get_gp_state = srcu_torture_get_gp_state,
.start_gp_poll = srcu_torture_start_gp_poll,
.poll_gp_state = srcu_torture_poll_gp_state,
.call = srcu_torture_call,
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
.get_gp_data = srcu_get_gp_data,
.cbflood_max = 50000,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.debug_objects = 1,
.name = "srcu"
};
static void srcu_torture_init(void)
{
rcu_sync_torture_init();
WARN_ON(init_srcu_struct(&srcu_ctld));
srcu_ctlp = &srcu_ctld;
}
static void srcu_torture_cleanup(void)
{
srcu: Remove cleanup_srcu_struct_quiesced() The cleanup_srcu_struct_quiesced() function was added because NVME used WQ_MEM_RECLAIM workqueues and SRCU did not, which meant that NVME workqueues waiting on SRCU workqueues could result in deadlocks during low-memory conditions. However, SRCU now also has WQ_MEM_RECLAIM workqueues, so there is no longer a potential for deadlock. Furthermore, it turns out to be extremely hard to use cleanup_srcu_struct_quiesced() correctly due to the fact that SRCU callback invocation accesses the srcu_struct structure's per-CPU data area just after callbacks are invoked. Therefore, the usual practice of using srcu_barrier() to wait for callbacks to be invoked before invoking cleanup_srcu_struct_quiesced() fails because SRCU's callback-invocation workqueue handler might be delayed, which can result in cleanup_srcu_struct_quiesced() being invoked (and thus freeing the per-CPU data) before the SRCU's callback-invocation workqueue handler is finished using that per-CPU data. Nor is this a theoretical problem: KASAN emitted use-after-free warnings because of this problem on actual runs. In short, NVME can now safely invoke cleanup_srcu_struct(), which avoids the use-after-free scenario. And cleanup_srcu_struct_quiesced() is quite difficult to use safely. This commit therefore removes cleanup_srcu_struct_quiesced(), switching its sole user back to cleanup_srcu_struct(). This effectively reverts the following pair of commits: f7194ac32ca2 ("srcu: Add cleanup_srcu_struct_quiesced()") 4317228ad9b8 ("nvme: Avoid flush dependency in delete controller flow") Reported-by: Bart Van Assche <bvanassche@acm.org> Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Tested-by: Bart Van Assche <bvanassche@acm.org>
2019-02-13 21:54:37 +00:00
cleanup_srcu_struct(&srcu_ctld);
srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
}
/* As above, but dynamically allocated. */
static struct rcu_torture_ops srcud_ops = {
.ttype = SRCU_FLAVOR,
.init = srcu_torture_init,
.cleanup = srcu_torture_cleanup,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
.readlock_held = torture_srcu_read_lock_held,
.get_gp_seq = srcu_torture_completed,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.exp_sync = srcu_torture_synchronize_expedited,
.get_gp_state = srcu_torture_get_gp_state,
.start_gp_poll = srcu_torture_start_gp_poll,
.poll_gp_state = srcu_torture_poll_gp_state,
.call = srcu_torture_call,
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
.get_gp_data = srcu_get_gp_data,
.cbflood_max = 50000,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.debug_objects = 1,
.name = "srcud"
};
/* As above, but broken due to inappropriate reader extension. */
static struct rcu_torture_ops busted_srcud_ops = {
.ttype = SRCU_FLAVOR,
.init = srcu_torture_init,
.cleanup = srcu_torture_cleanup,
.readlock = srcu_torture_read_lock,
.read_delay = rcu_read_delay,
.readunlock = srcu_torture_read_unlock,
.readlock_held = torture_srcu_read_lock_held,
.get_gp_seq = srcu_torture_completed,
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.exp_sync = srcu_torture_synchronize_expedited,
.call = srcu_torture_call,
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.extendables = RCUTORTURE_MAX_EXTEND,
.name = "busted_srcud"
};
/*
* Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
* This implementation does not necessarily work well with CPU hotplug.
*/
static void synchronize_rcu_trivial(void)
{
int cpu;
for_each_online_cpu(cpu) {
torture_sched_setaffinity(current->pid, cpumask_of(cpu));
WARN_ON_ONCE(raw_smp_processor_id() != cpu);
}
}
static int rcu_torture_read_lock_trivial(void)
{
preempt_disable();
return 0;
}
static void rcu_torture_read_unlock_trivial(int idx)
{
preempt_enable();
}
static struct rcu_torture_ops trivial_ops = {
.ttype = RCU_TRIVIAL_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = rcu_torture_read_lock_trivial,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_torture_read_unlock_trivial,
.readlock_held = torture_readlock_not_held,
.get_gp_seq = rcu_no_completed,
.sync = synchronize_rcu_trivial,
.exp_sync = synchronize_rcu_trivial,
.irq_capable = 1,
.name = "trivial"
};
#ifdef CONFIG_TASKS_RCU
/*
* Definitions for RCU-tasks torture testing.
*/
static int tasks_torture_read_lock(void)
{
return 0;
}
static void tasks_torture_read_unlock(int idx)
{
}
static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
{
call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
}
static void synchronize_rcu_mult_test(void)
{
synchronize_rcu_mult(call_rcu_tasks, call_rcu_hurry);
}
static struct rcu_torture_ops tasks_ops = {
.ttype = RCU_TASKS_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = tasks_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = tasks_torture_read_unlock,
.get_gp_seq = rcu_no_completed,
.deferred_free = rcu_tasks_torture_deferred_free,
.sync = synchronize_rcu_tasks,
.exp_sync = synchronize_rcu_mult_test,
.call = call_rcu_tasks,
.cb_barrier = rcu_barrier_tasks,
.gp_kthread_dbg = show_rcu_tasks_classic_gp_kthread,
.get_gp_data = rcu_tasks_get_gp_data,
.irq_capable = 1,
.slow_gps = 1,
.name = "tasks"
};
#define TASKS_OPS &tasks_ops,
rcutorture: Add trivial RCU implementation I have been showing off a trivial RCU implementation for non-preemptive environments for some time now: #define rcu_read_lock() #define rcu_read_unlock() #define rcu_dereference(p) READ_ONCE(p) #define rcu_assign_pointer(p, v) smp_store_release(&(p), (v)) void synchronize_rcu(void) { int cpu; for_each_online_cpu(cpu) sched_setaffinity(current->pid, cpumask_of(cpu)); } Trivial or not, as the old saying goes, "if it ain't tested, it don't work!". This commit therefore adds a "trivial" flavor to rcutorture and a corresponding TRIVIAL test scenario. This variant does not handle CPU hotplug, which is unconditionally enabled on x86 for post-v5.1-rc3 kernels, which is why the TRIVIAL.boot says "rcutorture.onoff_interval=0". This commit actually does handle CONFIG_PREEMPT=y kernels, but only because it turns back the Linux-kernel clock in order to provide these alternative definitions (or the moral equivalent thereof): #define rcu_read_lock() preempt_disable() #define rcu_read_unlock() preempt_enable() In CONFIG_PREEMPT=n kernels without debugging, these are equivalent to empty macros give or take a compiler barrier. However, the have been successfully tested with actual empty macros as well. Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com> [ paulmck: Fix symbol issue reported by kbuild test robot <lkp@intel.com>. ] [ paulmck: Work around sched_setaffinity() issue noted by Andrea Parri. ] [ paulmck: Add rcutorture.shuffle_interval=0 to TRIVIAL.boot to fix interaction with shuffler task noted by Peter Zijlstra. ] Tested-by: Andrea Parri <andrea.parri@amarulasolutions.com>
2019-04-19 14:38:27 +00:00
#else // #ifdef CONFIG_TASKS_RCU
rcutorture: Add trivial RCU implementation I have been showing off a trivial RCU implementation for non-preemptive environments for some time now: #define rcu_read_lock() #define rcu_read_unlock() #define rcu_dereference(p) READ_ONCE(p) #define rcu_assign_pointer(p, v) smp_store_release(&(p), (v)) void synchronize_rcu(void) { int cpu; for_each_online_cpu(cpu) sched_setaffinity(current->pid, cpumask_of(cpu)); } Trivial or not, as the old saying goes, "if it ain't tested, it don't work!". This commit therefore adds a "trivial" flavor to rcutorture and a corresponding TRIVIAL test scenario. This variant does not handle CPU hotplug, which is unconditionally enabled on x86 for post-v5.1-rc3 kernels, which is why the TRIVIAL.boot says "rcutorture.onoff_interval=0". This commit actually does handle CONFIG_PREEMPT=y kernels, but only because it turns back the Linux-kernel clock in order to provide these alternative definitions (or the moral equivalent thereof): #define rcu_read_lock() preempt_disable() #define rcu_read_unlock() preempt_enable() In CONFIG_PREEMPT=n kernels without debugging, these are equivalent to empty macros give or take a compiler barrier. However, the have been successfully tested with actual empty macros as well. Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com> [ paulmck: Fix symbol issue reported by kbuild test robot <lkp@intel.com>. ] [ paulmck: Work around sched_setaffinity() issue noted by Andrea Parri. ] [ paulmck: Add rcutorture.shuffle_interval=0 to TRIVIAL.boot to fix interaction with shuffler task noted by Peter Zijlstra. ] Tested-by: Andrea Parri <andrea.parri@amarulasolutions.com>
2019-04-19 14:38:27 +00:00
#define TASKS_OPS
rcutorture: Add trivial RCU implementation I have been showing off a trivial RCU implementation for non-preemptive environments for some time now: #define rcu_read_lock() #define rcu_read_unlock() #define rcu_dereference(p) READ_ONCE(p) #define rcu_assign_pointer(p, v) smp_store_release(&(p), (v)) void synchronize_rcu(void) { int cpu; for_each_online_cpu(cpu) sched_setaffinity(current->pid, cpumask_of(cpu)); } Trivial or not, as the old saying goes, "if it ain't tested, it don't work!". This commit therefore adds a "trivial" flavor to rcutorture and a corresponding TRIVIAL test scenario. This variant does not handle CPU hotplug, which is unconditionally enabled on x86 for post-v5.1-rc3 kernels, which is why the TRIVIAL.boot says "rcutorture.onoff_interval=0". This commit actually does handle CONFIG_PREEMPT=y kernels, but only because it turns back the Linux-kernel clock in order to provide these alternative definitions (or the moral equivalent thereof): #define rcu_read_lock() preempt_disable() #define rcu_read_unlock() preempt_enable() In CONFIG_PREEMPT=n kernels without debugging, these are equivalent to empty macros give or take a compiler barrier. However, the have been successfully tested with actual empty macros as well. Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com> [ paulmck: Fix symbol issue reported by kbuild test robot <lkp@intel.com>. ] [ paulmck: Work around sched_setaffinity() issue noted by Andrea Parri. ] [ paulmck: Add rcutorture.shuffle_interval=0 to TRIVIAL.boot to fix interaction with shuffler task noted by Peter Zijlstra. ] Tested-by: Andrea Parri <andrea.parri@amarulasolutions.com>
2019-04-19 14:38:27 +00:00
#endif // #else #ifdef CONFIG_TASKS_RCU
rcutorture: Add trivial RCU implementation I have been showing off a trivial RCU implementation for non-preemptive environments for some time now: #define rcu_read_lock() #define rcu_read_unlock() #define rcu_dereference(p) READ_ONCE(p) #define rcu_assign_pointer(p, v) smp_store_release(&(p), (v)) void synchronize_rcu(void) { int cpu; for_each_online_cpu(cpu) sched_setaffinity(current->pid, cpumask_of(cpu)); } Trivial or not, as the old saying goes, "if it ain't tested, it don't work!". This commit therefore adds a "trivial" flavor to rcutorture and a corresponding TRIVIAL test scenario. This variant does not handle CPU hotplug, which is unconditionally enabled on x86 for post-v5.1-rc3 kernels, which is why the TRIVIAL.boot says "rcutorture.onoff_interval=0". This commit actually does handle CONFIG_PREEMPT=y kernels, but only because it turns back the Linux-kernel clock in order to provide these alternative definitions (or the moral equivalent thereof): #define rcu_read_lock() preempt_disable() #define rcu_read_unlock() preempt_enable() In CONFIG_PREEMPT=n kernels without debugging, these are equivalent to empty macros give or take a compiler barrier. However, the have been successfully tested with actual empty macros as well. Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com> [ paulmck: Fix symbol issue reported by kbuild test robot <lkp@intel.com>. ] [ paulmck: Work around sched_setaffinity() issue noted by Andrea Parri. ] [ paulmck: Add rcutorture.shuffle_interval=0 to TRIVIAL.boot to fix interaction with shuffler task noted by Peter Zijlstra. ] Tested-by: Andrea Parri <andrea.parri@amarulasolutions.com>
2019-04-19 14:38:27 +00:00
#ifdef CONFIG_TASKS_RUDE_RCU
rcutorture: Add trivial RCU implementation I have been showing off a trivial RCU implementation for non-preemptive environments for some time now: #define rcu_read_lock() #define rcu_read_unlock() #define rcu_dereference(p) READ_ONCE(p) #define rcu_assign_pointer(p, v) smp_store_release(&(p), (v)) void synchronize_rcu(void) { int cpu; for_each_online_cpu(cpu) sched_setaffinity(current->pid, cpumask_of(cpu)); } Trivial or not, as the old saying goes, "if it ain't tested, it don't work!". This commit therefore adds a "trivial" flavor to rcutorture and a corresponding TRIVIAL test scenario. This variant does not handle CPU hotplug, which is unconditionally enabled on x86 for post-v5.1-rc3 kernels, which is why the TRIVIAL.boot says "rcutorture.onoff_interval=0". This commit actually does handle CONFIG_PREEMPT=y kernels, but only because it turns back the Linux-kernel clock in order to provide these alternative definitions (or the moral equivalent thereof): #define rcu_read_lock() preempt_disable() #define rcu_read_unlock() preempt_enable() In CONFIG_PREEMPT=n kernels without debugging, these are equivalent to empty macros give or take a compiler barrier. However, the have been successfully tested with actual empty macros as well. Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com> [ paulmck: Fix symbol issue reported by kbuild test robot <lkp@intel.com>. ] [ paulmck: Work around sched_setaffinity() issue noted by Andrea Parri. ] [ paulmck: Add rcutorture.shuffle_interval=0 to TRIVIAL.boot to fix interaction with shuffler task noted by Peter Zijlstra. ] Tested-by: Andrea Parri <andrea.parri@amarulasolutions.com>
2019-04-19 14:38:27 +00:00
/*
* Definitions for rude RCU-tasks torture testing.
*/
static void rcu_tasks_rude_torture_deferred_free(struct rcu_torture *p)
{
call_rcu_tasks_rude(&p->rtort_rcu, rcu_torture_cb);
}
static struct rcu_torture_ops tasks_rude_ops = {
.ttype = RCU_TASKS_RUDE_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = rcu_torture_read_lock_trivial,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_torture_read_unlock_trivial,
.get_gp_seq = rcu_no_completed,
.deferred_free = rcu_tasks_rude_torture_deferred_free,
.sync = synchronize_rcu_tasks_rude,
.exp_sync = synchronize_rcu_tasks_rude,
.call = call_rcu_tasks_rude,
.cb_barrier = rcu_barrier_tasks_rude,
.gp_kthread_dbg = show_rcu_tasks_rude_gp_kthread,
.get_gp_data = rcu_tasks_rude_get_gp_data,
.cbflood_max = 50000,
.irq_capable = 1,
.name = "tasks-rude"
};
#define TASKS_RUDE_OPS &tasks_rude_ops,
#else // #ifdef CONFIG_TASKS_RUDE_RCU
#define TASKS_RUDE_OPS
#endif // #else #ifdef CONFIG_TASKS_RUDE_RCU
#ifdef CONFIG_TASKS_TRACE_RCU
/*
* Definitions for tracing RCU-tasks torture testing.
*/
static int tasks_tracing_torture_read_lock(void)
{
rcu_read_lock_trace();
return 0;
}
static void tasks_tracing_torture_read_unlock(int idx)
{
rcu_read_unlock_trace();
}
static void rcu_tasks_tracing_torture_deferred_free(struct rcu_torture *p)
{
call_rcu_tasks_trace(&p->rtort_rcu, rcu_torture_cb);
}
static struct rcu_torture_ops tasks_tracing_ops = {
.ttype = RCU_TASKS_TRACING_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = tasks_tracing_torture_read_lock,
.read_delay = srcu_read_delay, /* just reuse srcu's version. */
.readunlock = tasks_tracing_torture_read_unlock,
.readlock_held = rcu_read_lock_trace_held,
.get_gp_seq = rcu_no_completed,
.deferred_free = rcu_tasks_tracing_torture_deferred_free,
.sync = synchronize_rcu_tasks_trace,
.exp_sync = synchronize_rcu_tasks_trace,
.call = call_rcu_tasks_trace,
.cb_barrier = rcu_barrier_tasks_trace,
.gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread,
.get_gp_data = rcu_tasks_trace_get_gp_data,
.cbflood_max = 50000,
.irq_capable = 1,
.slow_gps = 1,
.name = "tasks-tracing"
};
#define TASKS_TRACING_OPS &tasks_tracing_ops,
#else // #ifdef CONFIG_TASKS_TRACE_RCU
#define TASKS_TRACING_OPS
#endif // #else #ifdef CONFIG_TASKS_TRACE_RCU
static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
{
if (!cur_ops->gp_diff)
return new - old;
return cur_ops->gp_diff(new, old);
}
/*
* RCU torture priority-boost testing. Runs one real-time thread per
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
* CPU for moderate bursts, repeatedly starting grace periods and waiting
* for them to complete. If a given grace period takes too long, we assume
* that priority inversion has occurred.
*/
static int old_rt_runtime = -1;
static void rcu_torture_disable_rt_throttle(void)
{
/*
* Disable RT throttling so that rcutorture's boost threads don't get
* throttled. Only possible if rcutorture is built-in otherwise the
* user should manually do this by setting the sched_rt_period_us and
* sched_rt_runtime sysctls.
*/
if (!IS_BUILTIN(CONFIG_RCU_TORTURE_TEST) || old_rt_runtime != -1)
return;
old_rt_runtime = sysctl_sched_rt_runtime;
sysctl_sched_rt_runtime = -1;
}
static void rcu_torture_enable_rt_throttle(void)
{
if (!IS_BUILTIN(CONFIG_RCU_TORTURE_TEST) || old_rt_runtime == -1)
return;
sysctl_sched_rt_runtime = old_rt_runtime;
old_rt_runtime = -1;
}
static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long *start)
{
int cpu;
static int dbg_done;
unsigned long end = jiffies;
bool gp_done;
unsigned long j;
static unsigned long last_persist;
unsigned long lp;
unsigned long mininterval = test_boost_duration * HZ - HZ / 2;
if (end - *start > mininterval) {
// Recheck after checking time to avoid false positives.
smp_mb(); // Time check before grace-period check.
if (cur_ops->poll_gp_state(gp_state))
return false; // passed, though perhaps just barely
if (cur_ops->check_boost_failed && !cur_ops->check_boost_failed(gp_state, &cpu)) {
// At most one persisted message per boost test.
j = jiffies;
lp = READ_ONCE(last_persist);
if (time_after(j, lp + mininterval) && cmpxchg(&last_persist, lp, j) == lp)
pr_info("Boost inversion persisted: No QS from CPU %d\n", cpu);
return false; // passed on a technicality
}
VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed");
n_rcu_torture_boost_failure++;
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
if (!xchg(&dbg_done, 1) && cur_ops->gp_kthread_dbg) {
pr_info("Boost inversion thread ->rt_priority %u gp_state %lu jiffies %lu\n",
current->rt_priority, gp_state, end - *start);
cur_ops->gp_kthread_dbg();
// Recheck after print to flag grace period ending during splat.
gp_done = cur_ops->poll_gp_state(gp_state);
pr_info("Boost inversion: GP %lu %s.\n", gp_state,
gp_done ? "ended already" : "still pending");
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
}
return true; // failed
} else if (cur_ops->check_boost_failed && !cur_ops->check_boost_failed(gp_state, NULL)) {
*start = jiffies;
}
return false; // passed
}
static int rcu_torture_boost(void *arg)
{
unsigned long endtime;
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
unsigned long gp_state;
unsigned long gp_state_time;
unsigned long oldstarttime;
VERBOSE_TOROUT_STRING("rcu_torture_boost started");
/* Set real-time priority. */
sched_set_fifo_low(current);
/* Each pass through the following loop does one boost-test cycle. */
do {
bool failed = false; // Test failed already in this test interval
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
bool gp_initiated = false;
if (kthread_should_stop())
goto checkwait;
/* Wait for the next test interval. */
oldstarttime = READ_ONCE(boost_starttime);
while (time_before(jiffies, oldstarttime)) {
schedule_timeout_interruptible(oldstarttime - jiffies);
if (stutter_wait("rcu_torture_boost"))
sched_set_fifo_low(current);
if (torture_must_stop())
goto checkwait;
}
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
// Do one boost-test interval.
endtime = oldstarttime + test_boost_duration * HZ;
while (time_before(jiffies, endtime)) {
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
// Has current GP gone too long?
if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state))
failed = rcu_torture_boost_failed(gp_state, &gp_state_time);
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
// If we don't have a grace period in flight, start one.
if (!gp_initiated || cur_ops->poll_gp_state(gp_state)) {
gp_state = cur_ops->start_gp_poll();
gp_initiated = true;
gp_state_time = jiffies;
}
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
if (stutter_wait("rcu_torture_boost")) {
sched_set_fifo_low(current);
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
// If the grace period already ended,
// we don't know when that happened, so
// start over.
if (cur_ops->poll_gp_state(gp_state))
gp_initiated = false;
}
if (torture_must_stop())
goto checkwait;
}
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
// In case the grace period extended beyond the end of the loop.
if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state))
rcu_torture_boost_failed(gp_state, &gp_state_time);
/*
* Set the start time of the next test interval.
* Yes, this is vulnerable to long delays, but such
* delays simply cause a false negative for the next
* interval. Besides, we are running at RT priority,
* so delays should be relatively rare.
*/
while (oldstarttime == READ_ONCE(boost_starttime) && !kthread_should_stop()) {
if (mutex_trylock(&boost_mutex)) {
if (oldstarttime == boost_starttime) {
WRITE_ONCE(boost_starttime,
jiffies + test_boost_interval * HZ);
n_rcu_torture_boosts++;
}
mutex_unlock(&boost_mutex);
break;
}
schedule_timeout_uninterruptible(HZ / 20);
}
/* Go do the stutter. */
checkwait: if (stutter_wait("rcu_torture_boost"))
sched_set_fifo_low(current);
} while (!torture_must_stop());
/* Clean up and exit. */
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
while (!kthread_should_stop()) {
torture_shutdown_absorb("rcu_torture_boost");
schedule_timeout_uninterruptible(HZ / 20);
}
torture_kthread_stopping("rcu_torture_boost");
return 0;
}
/*
* RCU torture force-quiescent-state kthread. Repeatedly induces
* bursts of calls to force_quiescent_state(), increasing the probability
* of occurrence of some important types of race conditions.
*/
static int
rcu_torture_fqs(void *arg)
{
unsigned long fqs_resume_time;
int fqs_burst_remaining;
int oldnice = task_nice(current);
VERBOSE_TOROUT_STRING("rcu_torture_fqs task started");
do {
fqs_resume_time = jiffies + fqs_stutter * HZ;
while (time_before(jiffies, fqs_resume_time) &&
!kthread_should_stop()) {
schedule_timeout_interruptible(HZ / 20);
}
fqs_burst_remaining = fqs_duration;
while (fqs_burst_remaining > 0 &&
!kthread_should_stop()) {
cur_ops->fqs();
udelay(fqs_holdoff);
fqs_burst_remaining -= fqs_holdoff;
}
if (stutter_wait("rcu_torture_fqs"))
sched_set_normal(current, oldnice);
} while (!torture_must_stop());
torture_kthread_stopping("rcu_torture_fqs");
return 0;
}
// Used by writers to randomly choose from the available grace-period primitives.
static int synctype[ARRAY_SIZE(rcu_torture_writer_state_names)] = { };
static int nsynctypes;
/*
* Determine which grace-period primitives are available.
*/
static void rcu_torture_write_types(void)
{
bool gp_cond1 = gp_cond, gp_cond_exp1 = gp_cond_exp, gp_cond_full1 = gp_cond_full;
bool gp_cond_exp_full1 = gp_cond_exp_full, gp_exp1 = gp_exp, gp_poll_exp1 = gp_poll_exp;
bool gp_poll_exp_full1 = gp_poll_exp_full, gp_normal1 = gp_normal, gp_poll1 = gp_poll;
bool gp_poll_full1 = gp_poll_full, gp_sync1 = gp_sync;
/* Initialize synctype[] array. If none set, take default. */
if (!gp_cond1 &&
!gp_cond_exp1 &&
!gp_cond_full1 &&
!gp_cond_exp_full1 &&
!gp_exp1 &&
!gp_poll_exp1 &&
!gp_poll_exp_full1 &&
!gp_normal1 &&
!gp_poll1 &&
!gp_poll_full1 &&
!gp_sync1) {
gp_cond1 = true;
gp_cond_exp1 = true;
gp_cond_full1 = true;
gp_cond_exp_full1 = true;
gp_exp1 = true;
gp_poll_exp1 = true;
gp_poll_exp_full1 = true;
gp_normal1 = true;
gp_poll1 = true;
gp_poll_full1 = true;
gp_sync1 = true;
}
if (gp_cond1 && cur_ops->get_gp_state && cur_ops->cond_sync) {
synctype[nsynctypes++] = RTWS_COND_GET;
pr_info("%s: Testing conditional GPs.\n", __func__);
} else if (gp_cond && (!cur_ops->get_gp_state || !cur_ops->cond_sync)) {
pr_alert("%s: gp_cond without primitives.\n", __func__);
}
if (gp_cond_exp1 && cur_ops->get_gp_state_exp && cur_ops->cond_sync_exp) {
synctype[nsynctypes++] = RTWS_COND_GET_EXP;
pr_info("%s: Testing conditional expedited GPs.\n", __func__);
} else if (gp_cond_exp && (!cur_ops->get_gp_state_exp || !cur_ops->cond_sync_exp)) {
pr_alert("%s: gp_cond_exp without primitives.\n", __func__);
}
if (gp_cond_full1 && cur_ops->get_gp_state && cur_ops->cond_sync_full) {
synctype[nsynctypes++] = RTWS_COND_GET_FULL;
pr_info("%s: Testing conditional full-state GPs.\n", __func__);
} else if (gp_cond_full && (!cur_ops->get_gp_state || !cur_ops->cond_sync_full)) {
pr_alert("%s: gp_cond_full without primitives.\n", __func__);
}
if (gp_cond_exp_full1 && cur_ops->get_gp_state_exp && cur_ops->cond_sync_exp_full) {
synctype[nsynctypes++] = RTWS_COND_GET_EXP_FULL;
pr_info("%s: Testing conditional full-state expedited GPs.\n", __func__);
} else if (gp_cond_exp_full &&
(!cur_ops->get_gp_state_exp || !cur_ops->cond_sync_exp_full)) {
pr_alert("%s: gp_cond_exp_full without primitives.\n", __func__);
}
if (gp_exp1 && cur_ops->exp_sync) {
synctype[nsynctypes++] = RTWS_EXP_SYNC;
pr_info("%s: Testing expedited GPs.\n", __func__);
} else if (gp_exp && !cur_ops->exp_sync) {
pr_alert("%s: gp_exp without primitives.\n", __func__);
}
if (gp_normal1 && cur_ops->deferred_free) {
synctype[nsynctypes++] = RTWS_DEF_FREE;
pr_info("%s: Testing asynchronous GPs.\n", __func__);
} else if (gp_normal && !cur_ops->deferred_free) {
pr_alert("%s: gp_normal without primitives.\n", __func__);
}
if (gp_poll1 && cur_ops->get_comp_state && cur_ops->same_gp_state &&
cur_ops->start_gp_poll && cur_ops->poll_gp_state) {
synctype[nsynctypes++] = RTWS_POLL_GET;
pr_info("%s: Testing polling GPs.\n", __func__);
} else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) {
pr_alert("%s: gp_poll without primitives.\n", __func__);
}
if (gp_poll_full1 && cur_ops->get_comp_state_full && cur_ops->same_gp_state_full
&& cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) {
synctype[nsynctypes++] = RTWS_POLL_GET_FULL;
pr_info("%s: Testing polling full-state GPs.\n", __func__);
} else if (gp_poll_full && (!cur_ops->start_gp_poll_full || !cur_ops->poll_gp_state_full)) {
pr_alert("%s: gp_poll_full without primitives.\n", __func__);
}
if (gp_poll_exp1 && cur_ops->start_gp_poll_exp && cur_ops->poll_gp_state_exp) {
synctype[nsynctypes++] = RTWS_POLL_GET_EXP;
pr_info("%s: Testing polling expedited GPs.\n", __func__);
} else if (gp_poll_exp && (!cur_ops->start_gp_poll_exp || !cur_ops->poll_gp_state_exp)) {
pr_alert("%s: gp_poll_exp without primitives.\n", __func__);
}
if (gp_poll_exp_full1 && cur_ops->start_gp_poll_exp_full && cur_ops->poll_gp_state_full) {
synctype[nsynctypes++] = RTWS_POLL_GET_EXP_FULL;
pr_info("%s: Testing polling full-state expedited GPs.\n", __func__);
} else if (gp_poll_exp_full &&
(!cur_ops->start_gp_poll_exp_full || !cur_ops->poll_gp_state_full)) {
pr_alert("%s: gp_poll_exp_full without primitives.\n", __func__);
}
if (gp_sync1 && cur_ops->sync) {
synctype[nsynctypes++] = RTWS_SYNC;
pr_info("%s: Testing normal GPs.\n", __func__);
} else if (gp_sync && !cur_ops->sync) {
pr_alert("%s: gp_sync without primitives.\n", __func__);
}
}
/*
* Do the specified rcu_torture_writer() synchronous grace period,
* while also testing out the polled APIs. Note well that the single-CPU
* grace-period optimizations must be accounted for.
*/
static void do_rtws_sync(struct torture_random_state *trsp, void (*sync)(void))
{
unsigned long cookie;
struct rcu_gp_oldstate cookie_full;
bool dopoll;
bool dopoll_full;
unsigned long r = torture_random(trsp);
dopoll = cur_ops->get_gp_state && cur_ops->poll_gp_state && !(r & 0x300);
dopoll_full = cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full && !(r & 0xc00);
if (dopoll || dopoll_full)
cpus_read_lock();
if (dopoll)
cookie = cur_ops->get_gp_state();
if (dopoll_full)
cur_ops->get_gp_state_full(&cookie_full);
if (cur_ops->poll_need_2gp && cur_ops->poll_need_2gp(dopoll, dopoll_full))
sync();
sync();
WARN_ONCE(dopoll && !cur_ops->poll_gp_state(cookie),
"%s: Cookie check 3 failed %pS() online %*pbl.",
__func__, sync, cpumask_pr_args(cpu_online_mask));
WARN_ONCE(dopoll_full && !cur_ops->poll_gp_state_full(&cookie_full),
"%s: Cookie check 4 failed %pS() online %*pbl",
__func__, sync, cpumask_pr_args(cpu_online_mask));
if (dopoll || dopoll_full)
cpus_read_unlock();
}
/*
* RCU torture writer kthread. Repeatedly substitutes a new structure
* for that pointed to by rcu_torture_current, freeing the old structure
* after a series of grace periods (the "pipeline").
*/
static int
rcu_torture_writer(void *arg)
{
bool boot_ended;
bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
unsigned long cookie;
struct rcu_gp_oldstate cookie_full;
int expediting = 0;
unsigned long gp_snap;
unsigned long gp_snap1;
struct rcu_gp_oldstate gp_snap_full;
struct rcu_gp_oldstate gp_snap1_full;
int i;
int idx;
int oldnice = task_nice(current);
struct rcu_gp_oldstate rgo[NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE];
struct rcu_torture *rp;
struct rcu_torture *old_rp;
static DEFINE_TORTURE_RANDOM(rand);
unsigned long stallsdone = jiffies;
bool stutter_waited;
unsigned long ulo[NUM_ACTIVE_RCU_POLL_OLDSTATE];
// If a new stall test is added, this must be adjusted.
if (stall_cpu_holdoff + stall_gp_kthread + stall_cpu)
stallsdone += (stall_cpu_holdoff + stall_gp_kthread + stall_cpu + 60) * HZ;
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
if (!can_expedite)
pr_alert("%s" TORTURE_FLAG
" GP expediting controlled from boot/sysfs for %s.\n",
torture_type, cur_ops->name);
if (WARN_ONCE(nsynctypes == 0,
"%s: No update-side primitives.\n", __func__)) {
/*
* No updates primitives, so don't try updating.
* The resulting test won't be testing much, hence the
* above WARN_ONCE().
*/
rcu_torture_writer_state = RTWS_STOPPING;
torture_kthread_stopping("rcu_torture_writer");
return 0;
}
do {
rcu_torture_writer_state = RTWS_FIXED_DELAY;
torture_hrtimeout_us(500, 1000, &rand);
rp = rcu_torture_alloc();
if (rp == NULL)
continue;
rp->rtort_pipe_count = 0;
ASSERT_EXCLUSIVE_WRITER(rp->rtort_pipe_count);
rcu_torture_writer_state = RTWS_DELAY;
udelay(torture_random(&rand) & 0x3ff);
rcu_torture_writer_state = RTWS_REPLACE;
old_rp = rcu_dereference_check(rcu_torture_current,
current == writer_task);
rp->rtort_mbtest = 1;
rcu_assign_pointer(rcu_torture_current, rp);
smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */
if (old_rp) {
i = old_rp->rtort_pipe_count;
if (i > RCU_TORTURE_PIPE_LEN)
i = RCU_TORTURE_PIPE_LEN;
atomic_inc(&rcu_torture_wcount[i]);
WRITE_ONCE(old_rp->rtort_pipe_count,
old_rp->rtort_pipe_count + 1);
ASSERT_EXCLUSIVE_WRITER(old_rp->rtort_pipe_count);
// Make sure readers block polled grace periods.
if (cur_ops->get_gp_state && cur_ops->poll_gp_state) {
idx = cur_ops->readlock();
cookie = cur_ops->get_gp_state();
WARN_ONCE(cur_ops->poll_gp_state(cookie),
"%s: Cookie check 1 failed %s(%d) %lu->%lu\n",
__func__,
rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
cookie, cur_ops->get_gp_state());
if (cur_ops->get_comp_state) {
cookie = cur_ops->get_comp_state();
WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie));
}
cur_ops->readunlock(idx);
}
if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full) {
idx = cur_ops->readlock();
cur_ops->get_gp_state_full(&cookie_full);
WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full),
"%s: Cookie check 5 failed %s(%d) online %*pbl\n",
__func__,
rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
cpumask_pr_args(cpu_online_mask));
if (cur_ops->get_comp_state_full) {
cur_ops->get_comp_state_full(&cookie_full);
WARN_ON_ONCE(!cur_ops->poll_gp_state_full(&cookie_full));
}
cur_ops->readunlock(idx);
}
switch (synctype[torture_random(&rand) % nsynctypes]) {
case RTWS_DEF_FREE:
rcu_torture_writer_state = RTWS_DEF_FREE;
cur_ops->deferred_free(old_rp);
break;
case RTWS_EXP_SYNC:
rcu_torture_writer_state = RTWS_EXP_SYNC;
do_rtws_sync(&rand, cur_ops->exp_sync);
rcu_torture_pipe_update(old_rp);
break;
case RTWS_COND_GET:
rcu_torture_writer_state = RTWS_COND_GET;
gp_snap = cur_ops->get_gp_state();
torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
rcu_torture_writer_state = RTWS_COND_SYNC;
cur_ops->cond_sync(gp_snap);
rcu_torture_pipe_update(old_rp);
break;
case RTWS_COND_GET_EXP:
rcu_torture_writer_state = RTWS_COND_GET_EXP;
gp_snap = cur_ops->get_gp_state_exp();
torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
rcu_torture_writer_state = RTWS_COND_SYNC_EXP;
cur_ops->cond_sync_exp(gp_snap);
rcu_torture_pipe_update(old_rp);
break;
case RTWS_COND_GET_FULL:
rcu_torture_writer_state = RTWS_COND_GET_FULL;
cur_ops->get_gp_state_full(&gp_snap_full);
torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
rcu_torture_writer_state = RTWS_COND_SYNC_FULL;
cur_ops->cond_sync_full(&gp_snap_full);
rcu_torture_pipe_update(old_rp);
break;
case RTWS_COND_GET_EXP_FULL:
rcu_torture_writer_state = RTWS_COND_GET_EXP_FULL;
cur_ops->get_gp_state_full(&gp_snap_full);
torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
rcu_torture_writer_state = RTWS_COND_SYNC_EXP_FULL;
cur_ops->cond_sync_exp_full(&gp_snap_full);
rcu_torture_pipe_update(old_rp);
break;
case RTWS_POLL_GET:
rcu_torture_writer_state = RTWS_POLL_GET;
for (i = 0; i < ARRAY_SIZE(ulo); i++)
ulo[i] = cur_ops->get_comp_state();
gp_snap = cur_ops->start_gp_poll();
rcu_torture_writer_state = RTWS_POLL_WAIT;
while (!cur_ops->poll_gp_state(gp_snap)) {
gp_snap1 = cur_ops->get_gp_state();
for (i = 0; i < ARRAY_SIZE(ulo); i++)
if (cur_ops->poll_gp_state(ulo[i]) ||
cur_ops->same_gp_state(ulo[i], gp_snap1)) {
ulo[i] = gp_snap1;
break;
}
WARN_ON_ONCE(i >= ARRAY_SIZE(ulo));
torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand);
}
rcu_torture_pipe_update(old_rp);
break;
case RTWS_POLL_GET_FULL:
rcu_torture_writer_state = RTWS_POLL_GET_FULL;
for (i = 0; i < ARRAY_SIZE(rgo); i++)
cur_ops->get_comp_state_full(&rgo[i]);
cur_ops->start_gp_poll_full(&gp_snap_full);
rcu_torture_writer_state = RTWS_POLL_WAIT_FULL;
while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
cur_ops->get_gp_state_full(&gp_snap1_full);
for (i = 0; i < ARRAY_SIZE(rgo); i++)
if (cur_ops->poll_gp_state_full(&rgo[i]) ||
cur_ops->same_gp_state_full(&rgo[i],
&gp_snap1_full)) {
rgo[i] = gp_snap1_full;
break;
}
WARN_ON_ONCE(i >= ARRAY_SIZE(rgo));
torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand);
}
rcu_torture_pipe_update(old_rp);
break;
case RTWS_POLL_GET_EXP:
rcu_torture_writer_state = RTWS_POLL_GET_EXP;
gp_snap = cur_ops->start_gp_poll_exp();
rcu_torture_writer_state = RTWS_POLL_WAIT_EXP;
while (!cur_ops->poll_gp_state_exp(gp_snap))
torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand);
rcu_torture_pipe_update(old_rp);
break;
case RTWS_POLL_GET_EXP_FULL:
rcu_torture_writer_state = RTWS_POLL_GET_EXP_FULL;
cur_ops->start_gp_poll_exp_full(&gp_snap_full);
rcu_torture_writer_state = RTWS_POLL_WAIT_EXP_FULL;
while (!cur_ops->poll_gp_state_full(&gp_snap_full))
torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand);
rcu_torture_pipe_update(old_rp);
break;
case RTWS_SYNC:
rcu_torture_writer_state = RTWS_SYNC;
do_rtws_sync(&rand, cur_ops->sync);
rcu_torture_pipe_update(old_rp);
break;
default:
WARN_ON_ONCE(1);
break;
}
}
WRITE_ONCE(rcu_torture_current_version,
rcu_torture_current_version + 1);
/* Cycle through nesting levels of rcu_expedite_gp() calls. */
if (can_expedite &&
!(torture_random(&rand) & 0xff & (!!expediting - 1))) {
WARN_ON_ONCE(expediting == 0 && rcu_gp_is_expedited());
if (expediting >= 0)
rcu_expedite_gp();
else
rcu_unexpedite_gp();
if (++expediting > 3)
expediting = -expediting;
} else if (!can_expedite) { /* Disabled during boot, recheck. */
can_expedite = !rcu_gp_is_expedited() &&
!rcu_gp_is_normal();
}
rcu_torture_writer_state = RTWS_STUTTER;
boot_ended = rcu_inkernel_boot_has_ended();
stutter_waited = stutter_wait("rcu_torture_writer");
if (stutter_waited &&
!atomic_read(&rcu_fwd_cb_nodelay) &&
!cur_ops->slow_gps &&
!torture_must_stop() &&
boot_ended &&
time_after(jiffies, stallsdone))
for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
if (list_empty(&rcu_tortures[i].rtort_free) &&
rcu_access_pointer(rcu_torture_current) != &rcu_tortures[i]) {
tracing_off();
if (cur_ops->gp_kthread_dbg)
cur_ops->gp_kthread_dbg();
WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
rcu_ftrace_dump(DUMP_ALL);
}
if (stutter_waited)
sched_set_normal(current, oldnice);
} while (!torture_must_stop());
rcu_torture_current = NULL; // Let stats task know that we are done.
/* Reset expediting back to unexpedited. */
if (expediting > 0)
expediting = -expediting;
while (can_expedite && expediting++ < 0)
rcu_unexpedite_gp();
WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited());
if (!can_expedite)
pr_alert("%s" TORTURE_FLAG
" Dynamic grace-period expediting was disabled.\n",
torture_type);
rcu_torture_writer_state = RTWS_STOPPING;
torture_kthread_stopping("rcu_torture_writer");
return 0;
}
/*
* RCU torture fake writer kthread. Repeatedly calls sync, with a random
* delay between calls.
*/
static int
rcu_torture_fakewriter(void *arg)
{
unsigned long gp_snap;
struct rcu_gp_oldstate gp_snap_full;
DEFINE_TORTURE_RANDOM(rand);
VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started");
set_user_nice(current, MAX_NICE);
rcutorture: Avoid corner-case #DE with nsynctypes check The rcutorture module is used to run torture tests that validate RCU. rcutorture takes a variety of module parameters that configure the functionality of the test. Amongst these parameters are the types of synchronization mechanisms that the rcu_torture_writer and rcu_torture_fakewriter tasks may use, and the torture_type of the run which determines what read and sync operations are used by the various writer and reader tasks that run throughout the test. When the module is configured to only use sync types for which the specified torture_type does not implement the necessary operations, we can end up in a state where nsynctypes is 0. This is not an erroneous state, but it currently crashes the kernel with a #DE due to nsynctypes being used with a modulo operator in rcu_torture_fakewriter(). Here is an example of such a #DE: $ insmod ./rcutorture.ko gp_cond=1 gp_cond_exp=0 gp_exp=0 gp_poll_exp=0 gp_normal=0 gp_poll=0 gp_poll_exp=0 verbose=9999 torture_type=trivial ... [ 8536.525096] divide error: 0000 [#1] PREEMPT SMP PTI [ 8536.525101] CPU: 30 PID: 392138 Comm: rcu_torture_fak Kdump: loaded Tainted: G S 5.17.0-rc1-00179-gc8c42c80febd #24 [ 8536.525105] Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A23 12/08/2020 [ 8536.525106] RIP: 0010:rcu_torture_fakewriter+0xf1/0x2d0 [rcutorture] [ 8536.525121] Code: 00 31 d2 8d 0c f5 00 00 00 00 48 63 c9 48 f7 f1 48 85 d2 0f 84 79 ff ff ff 48 89 e7 e8 78 78 01 00 48 63 0d 29 ca 00 00 31 d2 <48> f7 f1 8b 04 95 00 05 4e a0 83 f8 06 0f 84 ad 00 00 00 7f 1f 83 [ 8536.525124] RSP: 0018:ffffc9000777fef0 EFLAGS: 00010246 [ 8536.525127] RAX: 00000000223d006e RBX: cccccccccccccccd RCX: 0000000000000000 [ 8536.525130] RDX: 0000000000000000 RSI: ffffffff824315b9 RDI: ffffc9000777fef0 [ 8536.525132] RBP: ffffc9000487bb30 R08: 0000000000000002 R09: 000000000002a580 [ 8536.525134] R10: ffffffff82c5f920 R11: 0000000000000000 R12: ffff8881a2c35d00 [ 8536.525136] R13: ffff8881540c8d00 R14: ffffffffa04d39d0 R15: 0000000000000000 [ 8536.525137] FS: 0000000000000000(0000) GS:ffff88903ff80000(0000) knlGS:0000000000000000 [ 8536.525140] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8536.525142] CR2: 00007f839f022000 CR3: 0000000002c0a006 CR4: 00000000007706e0 [ 8536.525144] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 8536.525145] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 8536.525147] PKRU: 55555554 [ 8536.525148] Call Trace: [ 8536.525150] <TASK> [ 8536.525153] kthread+0xe8/0x110 [ 8536.525161] ? kthread_complete_and_exit+0x20/0x20 [ 8536.525167] ret_from_fork+0x22/0x30 [ 8536.525174] </TASK> The solution is to gracefully handle the case of nsynctypes being 0 in rcu_torture_fakewriter() by not performing any work. This is already being done in rcu_torture_writer(), though there is a missing return on that path which will be fixed in a subsequent patch. Signed-off-by: David Vernet <void@manifault.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2022-03-07 22:46:55 +00:00
if (WARN_ONCE(nsynctypes == 0,
"%s: No update-side primitives.\n", __func__)) {
/*
* No updates primitives, so don't try updating.
* The resulting test won't be testing much, hence the
* above WARN_ONCE().
*/
torture_kthread_stopping("rcu_torture_fakewriter");
return 0;
}
do {
torture_hrtimeout_jiffies(torture_random(&rand) % 10, &rand);
if (cur_ops->cb_barrier != NULL &&
torture_random(&rand) % (nfakewriters * 8) == 0) {
cur_ops->cb_barrier();
} else {
switch (synctype[torture_random(&rand) % nsynctypes]) {
case RTWS_DEF_FREE:
break;
case RTWS_EXP_SYNC:
cur_ops->exp_sync();
break;
case RTWS_COND_GET:
gp_snap = cur_ops->get_gp_state();
torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
cur_ops->cond_sync(gp_snap);
break;
case RTWS_COND_GET_EXP:
gp_snap = cur_ops->get_gp_state_exp();
torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
cur_ops->cond_sync_exp(gp_snap);
break;
case RTWS_COND_GET_FULL:
cur_ops->get_gp_state_full(&gp_snap_full);
torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
cur_ops->cond_sync_full(&gp_snap_full);
break;
case RTWS_COND_GET_EXP_FULL:
cur_ops->get_gp_state_full(&gp_snap_full);
torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
cur_ops->cond_sync_exp_full(&gp_snap_full);
break;
case RTWS_POLL_GET:
gp_snap = cur_ops->start_gp_poll();
while (!cur_ops->poll_gp_state(gp_snap)) {
torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand);
}
break;
case RTWS_POLL_GET_FULL:
cur_ops->start_gp_poll_full(&gp_snap_full);
while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand);
}
break;
case RTWS_POLL_GET_EXP:
gp_snap = cur_ops->start_gp_poll_exp();
while (!cur_ops->poll_gp_state_exp(gp_snap)) {
torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand);
}
break;
case RTWS_POLL_GET_EXP_FULL:
cur_ops->start_gp_poll_exp_full(&gp_snap_full);
while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand);
}
break;
case RTWS_SYNC:
cur_ops->sync();
break;
default:
WARN_ON_ONCE(1);
break;
}
}
stutter_wait("rcu_torture_fakewriter");
} while (!torture_must_stop());
torture_kthread_stopping("rcu_torture_fakewriter");
return 0;
}
static void rcu_torture_timer_cb(struct rcu_head *rhp)
{
kfree(rhp);
}
// Set up and carry out testing of RCU's global memory ordering
static void rcu_torture_reader_do_mbchk(long myid, struct rcu_torture *rtp,
struct torture_random_state *trsp)
{
unsigned long loops;
int noc = torture_num_online_cpus();
int rdrchked;
int rdrchker;
struct rcu_torture_reader_check *rtrcp; // Me.
struct rcu_torture_reader_check *rtrcp_assigner; // Assigned us to do checking.
struct rcu_torture_reader_check *rtrcp_chked; // Reader being checked.
struct rcu_torture_reader_check *rtrcp_chker; // Reader doing checking when not me.
if (myid < 0)
return; // Don't try this from timer handlers.
// Increment my counter.
rtrcp = &rcu_torture_reader_mbchk[myid];
WRITE_ONCE(rtrcp->rtc_myloops, rtrcp->rtc_myloops + 1);
// Attempt to assign someone else some checking work.
rdrchked = torture_random(trsp) % nrealreaders;
rtrcp_chked = &rcu_torture_reader_mbchk[rdrchked];
rdrchker = torture_random(trsp) % nrealreaders;
rtrcp_chker = &rcu_torture_reader_mbchk[rdrchker];
if (rdrchked != myid && rdrchked != rdrchker && noc >= rdrchked && noc >= rdrchker &&
smp_load_acquire(&rtrcp->rtc_chkrdr) < 0 && // Pairs with smp_store_release below.
!READ_ONCE(rtp->rtort_chkp) &&
!smp_load_acquire(&rtrcp_chker->rtc_assigner)) { // Pairs with smp_store_release below.
rtrcp->rtc_chkloops = READ_ONCE(rtrcp_chked->rtc_myloops);
WARN_ON_ONCE(rtrcp->rtc_chkrdr >= 0);
rtrcp->rtc_chkrdr = rdrchked;
WARN_ON_ONCE(rtrcp->rtc_ready); // This gets set after the grace period ends.
if (cmpxchg_relaxed(&rtrcp_chker->rtc_assigner, NULL, rtrcp) ||
cmpxchg_relaxed(&rtp->rtort_chkp, NULL, rtrcp))
(void)cmpxchg_relaxed(&rtrcp_chker->rtc_assigner, rtrcp, NULL); // Back out.
}
// If assigned some completed work, do it!
rtrcp_assigner = READ_ONCE(rtrcp->rtc_assigner);
if (!rtrcp_assigner || !smp_load_acquire(&rtrcp_assigner->rtc_ready))
return; // No work or work not yet ready.
rdrchked = rtrcp_assigner->rtc_chkrdr;
if (WARN_ON_ONCE(rdrchked < 0))
return;
rtrcp_chked = &rcu_torture_reader_mbchk[rdrchked];
loops = READ_ONCE(rtrcp_chked->rtc_myloops);
atomic_inc(&n_rcu_torture_mbchk_tries);
if (ULONG_CMP_LT(loops, rtrcp_assigner->rtc_chkloops))
atomic_inc(&n_rcu_torture_mbchk_fail);
rtrcp_assigner->rtc_chkloops = loops + ULONG_MAX / 2;
rtrcp_assigner->rtc_ready = 0;
smp_store_release(&rtrcp->rtc_assigner, NULL); // Someone else can assign us work.
smp_store_release(&rtrcp_assigner->rtc_chkrdr, -1); // Assigner can again assign.
}
/*
* Do one extension of an RCU read-side critical section using the
* current reader state in readstate (set to zero for initial entry
* to extended critical section), set the new state as specified by
* newstate (set to zero for final exit from extended critical section),
* and random-number-generator state in trsp. If this is neither the
* beginning or end of the critical section and if there was actually a
* change, do a ->read_delay().
*/
static void rcutorture_one_extend(int *readstate, int newstate,
struct torture_random_state *trsp,
struct rt_read_seg *rtrsp)
{
unsigned long flags;
int idxnew1 = -1;
int idxnew2 = -1;
int idxold1 = *readstate;
int idxold2 = idxold1;
int statesnew = ~*readstate & newstate;
int statesold = *readstate & ~newstate;
WARN_ON_ONCE(idxold2 < 0);
WARN_ON_ONCE((idxold2 >> RCUTORTURE_RDR_SHIFT_2) > 1);
rtrsp->rt_readstate = newstate;
/* First, put new protection in place to avoid critical-section gap. */
if (statesnew & RCUTORTURE_RDR_BH)
local_bh_disable();
if (statesnew & RCUTORTURE_RDR_RBH)
rcu_read_lock_bh();
if (statesnew & RCUTORTURE_RDR_IRQ)
local_irq_disable();
if (statesnew & RCUTORTURE_RDR_PREEMPT)
preempt_disable();
if (statesnew & RCUTORTURE_RDR_SCHED)
rcu_read_lock_sched();
if (statesnew & RCUTORTURE_RDR_RCU_1)
idxnew1 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_1;
if (statesnew & RCUTORTURE_RDR_RCU_2)
idxnew2 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_2;
/*
* Next, remove old protection, in decreasing order of strength
* to avoid unlock paths that aren't safe in the stronger
* context. Namely: BH can not be enabled with disabled interrupts.
* Additionally PREEMPT_RT requires that BH is enabled in preemptible
* context.
*/
if (statesold & RCUTORTURE_RDR_IRQ)
local_irq_enable();
if (statesold & RCUTORTURE_RDR_PREEMPT)
preempt_enable();
if (statesold & RCUTORTURE_RDR_SCHED)
rcu_read_unlock_sched();
if (statesold & RCUTORTURE_RDR_BH)
local_bh_enable();
if (statesold & RCUTORTURE_RDR_RBH)
rcu_read_unlock_bh();
if (statesold & RCUTORTURE_RDR_RCU_2) {
cur_ops->readunlock((idxold2 >> RCUTORTURE_RDR_SHIFT_2) & 0x1);
WARN_ON_ONCE(idxnew2 != -1);
idxold2 = 0;
}
if (statesold & RCUTORTURE_RDR_RCU_1) {
bool lockit;
lockit = !cur_ops->no_pi_lock && !statesnew && !(torture_random(trsp) & 0xffff);
if (lockit)
raw_spin_lock_irqsave(&current->pi_lock, flags);
cur_ops->readunlock((idxold1 >> RCUTORTURE_RDR_SHIFT_1) & 0x1);
WARN_ON_ONCE(idxnew1 != -1);
idxold1 = 0;
if (lockit)
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
}
/* Delay if neither beginning nor end and there was a change. */
if ((statesnew || statesold) && *readstate && newstate)
cur_ops->read_delay(trsp, rtrsp);
/* Update the reader state. */
if (idxnew1 == -1)
idxnew1 = idxold1 & RCUTORTURE_RDR_MASK_1;
WARN_ON_ONCE(idxnew1 < 0);
if (WARN_ON_ONCE((idxnew1 >> RCUTORTURE_RDR_SHIFT_1) > 1))
pr_info("Unexpected idxnew1 value of %#x\n", idxnew1);
if (idxnew2 == -1)
idxnew2 = idxold2 & RCUTORTURE_RDR_MASK_2;
WARN_ON_ONCE(idxnew2 < 0);
WARN_ON_ONCE((idxnew2 >> RCUTORTURE_RDR_SHIFT_2) > 1);
*readstate = idxnew1 | idxnew2 | newstate;
WARN_ON_ONCE(*readstate < 0);
if (WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT_2) > 1))
pr_info("Unexpected idxnew2 value of %#x\n", idxnew2);
}
/* Return the biggest extendables mask given current RCU and boot parameters. */
static int rcutorture_extend_mask_max(void)
{
int mask;
WARN_ON_ONCE(extendables & ~RCUTORTURE_MAX_EXTEND);
mask = extendables & RCUTORTURE_MAX_EXTEND & cur_ops->extendables;
mask = mask | RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2;
return mask;
}
/* Return a random protection state mask, but with at least one bit set. */
static int
rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
{
int mask = rcutorture_extend_mask_max();
unsigned long randmask1 = torture_random(trsp);
unsigned long randmask2 = randmask1 >> 3;
unsigned long preempts = RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED;
unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ;
unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH;
WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT_1);
/* Mostly only one bit (need preemption!), sometimes lots of bits. */
if (!(randmask1 & 0x7))
mask = mask & randmask2;
else
mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS));
// Can't have nested RCU reader without outer RCU reader.
if (!(mask & RCUTORTURE_RDR_RCU_1) && (mask & RCUTORTURE_RDR_RCU_2)) {
if (oldmask & RCUTORTURE_RDR_RCU_1)
mask &= ~RCUTORTURE_RDR_RCU_2;
else
mask |= RCUTORTURE_RDR_RCU_1;
}
/*
* Can't enable bh w/irq disabled.
*/
if (mask & RCUTORTURE_RDR_IRQ)
mask |= oldmask & bhs;
/*
* Ideally these sequences would be detected in debug builds
* (regardless of RT), but until then don't stop testing
* them on non-RT.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
/* Can't modify BH in atomic context */
if (oldmask & preempts_irq)
mask &= ~bhs;
if ((oldmask | mask) & preempts_irq)
mask |= oldmask & bhs;
}
return mask ?: RCUTORTURE_RDR_RCU_1;
}
/*
* Do a randomly selected number of extensions of an existing RCU read-side
* critical section.
*/
static struct rt_read_seg *
rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp,
struct rt_read_seg *rtrsp)
{
int i;
int j;
int mask = rcutorture_extend_mask_max();
WARN_ON_ONCE(!*readstate); /* -Existing- RCU read-side critsect! */
if (!((mask - 1) & mask))
return rtrsp; /* Current RCU reader not extendable. */
/* Bias towards larger numbers of loops. */
i = torture_random(trsp);
i = ((i | (i >> 3)) & RCUTORTURE_RDR_MAX_LOOPS) + 1;
for (j = 0; j < i; j++) {
mask = rcutorture_extend_mask(*readstate, trsp);
rcutorture_one_extend(readstate, mask, trsp, &rtrsp[j]);
}
return &rtrsp[j];
}
/*
* Do one read-side critical section, returning false if there was
* no data to read. Can be invoked both from process context and
* from a timer handler.
*/
static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
{
bool checkpolling = !(torture_random(trsp) & 0xfff);
unsigned long cookie;
struct rcu_gp_oldstate cookie_full;
int i;
unsigned long started;
unsigned long completed;
int newstate;
struct rcu_torture *p;
int pipe_count;
int readstate = 0;
struct rt_read_seg rtseg[RCUTORTURE_RDR_MAX_SEGS] = { { 0 } };
struct rt_read_seg *rtrsp = &rtseg[0];
struct rt_read_seg *rtrsp1;
unsigned long long ts;
WARN_ON_ONCE(!rcu_is_watching());
newstate = rcutorture_extend_mask(readstate, trsp);
rcutorture_one_extend(&readstate, newstate, trsp, rtrsp++);
if (checkpolling) {
if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
cookie = cur_ops->get_gp_state();
if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full)
cur_ops->get_gp_state_full(&cookie_full);
}
started = cur_ops->get_gp_seq();
ts = rcu_trace_clock_local();
rcu: Introduce lockdep-based checking to RCU read-side primitives Inspection is proving insufficient to catch all RCU misuses, which is understandable given that rcu_dereference() might be protected by any of four different flavors of RCU (RCU, RCU-bh, RCU-sched, and SRCU), and might also/instead be protected by any of a number of locking primitives. It is therefore time to enlist the aid of lockdep. This set of patches is inspired by earlier work by Peter Zijlstra and Thomas Gleixner, and takes the following approach: o Set up separate lockdep classes for RCU, RCU-bh, and RCU-sched. o Set up separate lockdep classes for each instance of SRCU. o Create primitives that check for being in an RCU read-side critical section. These return exact answers if lockdep is fully enabled, but if unsure, report being in an RCU read-side critical section. (We want to avoid false positives!) The primitives are: For RCU: rcu_read_lock_held(void) For RCU-bh: rcu_read_lock_bh_held(void) For RCU-sched: rcu_read_lock_sched_held(void) For SRCU: srcu_read_lock_held(struct srcu_struct *sp) o Add rcu_dereference_check(), which takes a second argument in which one places a boolean expression based on the above primitives and/or lockdep_is_held(). o A new kernel configuration parameter, CONFIG_PROVE_RCU, enables rcu_dereference_check(). This depends on CONFIG_PROVE_LOCKING, and should be quite helpful during the transition period while CONFIG_PROVE_RCU-unaware patches are in flight. The existing rcu_dereference() primitive does no checking, but upcoming patches will change that. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-23 01:04:45 +00:00
p = rcu_dereference_check(rcu_torture_current,
!cur_ops->readlock_held || cur_ops->readlock_held());
if (p == NULL) {
/* Wait for rcu_torture_writer to get underway */
rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
return false;
}
if (p->rtort_mbtest == 0)
atomic_inc(&n_rcu_torture_mberror);
rcu_torture_reader_do_mbchk(myid, p, trsp);
rtrsp = rcutorture_loop_extend(&readstate, trsp, rtrsp);
preempt_disable();
pipe_count = READ_ONCE(p->rtort_pipe_count);
if (pipe_count > RCU_TORTURE_PIPE_LEN) {
// Should not happen in a correct RCU implementation,
// happens quite often for torture_type=busted.
pipe_count = RCU_TORTURE_PIPE_LEN;
}
completed = cur_ops->get_gp_seq();
if (pipe_count > 1) {
do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
ts, started, completed);
rcu_ftrace_dump(DUMP_ALL);
}
__this_cpu_inc(rcu_torture_count[pipe_count]);
completed = rcutorture_seq_diff(completed, started);
if (completed > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
completed = RCU_TORTURE_PIPE_LEN;
}
__this_cpu_inc(rcu_torture_batch[completed]);
preempt_enable();
if (checkpolling) {
if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
WARN_ONCE(cur_ops->poll_gp_state(cookie),
"%s: Cookie check 2 failed %s(%d) %lu->%lu\n",
__func__,
rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
cookie, cur_ops->get_gp_state());
if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full)
WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full),
"%s: Cookie check 6 failed %s(%d) online %*pbl\n",
__func__,
rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
cpumask_pr_args(cpu_online_mask));
}
rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
WARN_ON_ONCE(readstate);
// This next splat is expected behavior if leakpointer, especially
// for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels.
WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1);
/* If error or close call, record the sequence of reader protections. */
if ((pipe_count > 1 || completed > 1) && !xchg(&err_segs_recorded, 1)) {
i = 0;
for (rtrsp1 = &rtseg[0]; rtrsp1 < rtrsp; rtrsp1++)
err_segs[i++] = *rtrsp1;
rt_read_nsegs = i;
}
return true;
}
static DEFINE_TORTURE_RANDOM_PERCPU(rcu_torture_timer_rand);
/*
* RCU torture reader from timer handler. Dereferences rcu_torture_current,
* incrementing the corresponding element of the pipeline array. The
* counter in the element should never be greater than 1, otherwise, the
* RCU implementation is broken.
*/
static void rcu_torture_timer(struct timer_list *unused)
{
atomic_long_inc(&n_rcu_torture_timers);
(void)rcu_torture_one_read(this_cpu_ptr(&rcu_torture_timer_rand), -1);
/* Test call_rcu() invocation from interrupt handler. */
if (cur_ops->call) {
struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_NOWAIT);
if (rhp)
cur_ops->call(rhp, rcu_torture_timer_cb);
}
}
/*
* RCU torture reader kthread. Repeatedly dereferences rcu_torture_current,
* incrementing the corresponding element of the pipeline array. The
* counter in the element should never be greater than 1, otherwise, the
* RCU implementation is broken.
*/
static int
rcu_torture_reader(void *arg)
{
unsigned long lastsleep = jiffies;
long myid = (long)arg;
int mynumonline = myid;
DEFINE_TORTURE_RANDOM(rand);
struct timer_list t;
VERBOSE_TOROUT_STRING("rcu_torture_reader task started");
set_user_nice(current, MAX_NICE);
if (irqreader && cur_ops->irq_capable)
timer_setup_on_stack(&t, rcu_torture_timer, 0);
tick_dep_set_task(current, TICK_DEP_BIT_RCU);
do {
if (irqreader && cur_ops->irq_capable) {
if (!timer_pending(&t))
mod_timer(&t, jiffies + 1);
}
if (!rcu_torture_one_read(&rand, myid) && !torture_must_stop())
schedule_timeout_interruptible(HZ);
if (time_after(jiffies, lastsleep) && !torture_must_stop()) {
torture_hrtimeout_us(500, 1000, &rand);
lastsleep = jiffies + 10;
}
while (torture_num_online_cpus() < mynumonline && !torture_must_stop())
schedule_timeout_interruptible(HZ / 5);
stutter_wait("rcu_torture_reader");
} while (!torture_must_stop());
if (irqreader && cur_ops->irq_capable) {
del_timer_sync(&t);
destroy_timer_on_stack(&t);
}
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
torture_kthread_stopping("rcu_torture_reader");
return 0;
}
/*
* Randomly Toggle CPUs' callback-offload state. This uses hrtimers to
* increase race probabilities and fuzzes the interval between toggling.
*/
static int rcu_nocb_toggle(void *arg)
{
int cpu;
int maxcpu = -1;
int oldnice = task_nice(current);
long r;
DEFINE_TORTURE_RANDOM(rand);
ktime_t toggle_delay;
unsigned long toggle_fuzz;
ktime_t toggle_interval = ms_to_ktime(nocbs_toggle);
VERBOSE_TOROUT_STRING("rcu_nocb_toggle task started");
while (!rcu_inkernel_boot_has_ended())
schedule_timeout_interruptible(HZ / 10);
for_each_possible_cpu(cpu)
maxcpu = cpu;
WARN_ON(maxcpu < 0);
if (toggle_interval > ULONG_MAX)
toggle_fuzz = ULONG_MAX >> 3;
else
toggle_fuzz = toggle_interval >> 3;
if (toggle_fuzz <= 0)
toggle_fuzz = NSEC_PER_USEC;
do {
r = torture_random(&rand);
cpu = (r >> 1) % (maxcpu + 1);
if (r & 0x1) {
rcu_nocb_cpu_offload(cpu);
atomic_long_inc(&n_nocb_offload);
} else {
rcu_nocb_cpu_deoffload(cpu);
atomic_long_inc(&n_nocb_deoffload);
}
toggle_delay = torture_random(&rand) % toggle_fuzz + toggle_interval;
set_current_state(TASK_INTERRUPTIBLE);
schedule_hrtimeout(&toggle_delay, HRTIMER_MODE_REL);
if (stutter_wait("rcu_nocb_toggle"))
sched_set_normal(current, oldnice);
} while (!torture_must_stop());
torture_kthread_stopping("rcu_nocb_toggle");
return 0;
}
/*
* Print torture statistics. Caller must ensure that there is only
* one call to this function at a given time!!! This is normally
* accomplished by relying on the module system to only have one copy
* of the module loaded, and then by giving the rcu_torture_stats
* kthread full control (or the init/cleanup functions when rcu_torture_stats
* thread is not running).
*/
static void
rcu_torture_stats_print(void)
{
int cpu;
int i;
long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
struct rcu_torture *rtcp;
static unsigned long rtcv_snap = ULONG_MAX;
static bool splatted;
struct task_struct *wtp;
for_each_possible_cpu(cpu) {
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
}
}
for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
if (pipesummary[i] != 0)
break;
}
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
rtcp = rcu_access_pointer(rcu_torture_current);
pr_cont("rtc: %p %s: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
rtcp,
rtcp && !rcu_stall_is_suppressed_at_boot() ? "ver" : "VER",
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
atomic_read(&n_rcu_torture_alloc),
atomic_read(&n_rcu_torture_alloc_fail),
atomic_read(&n_rcu_torture_free));
pr_cont("rtmbe: %d rtmbkf: %d/%d rtbe: %ld rtbke: %ld ",
atomic_read(&n_rcu_torture_mberror),
atomic_read(&n_rcu_torture_mbchk_fail), atomic_read(&n_rcu_torture_mbchk_tries),
n_rcu_torture_barrier_error,
n_rcu_torture_boost_ktrerror);
pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
n_rcu_torture_boost_failure,
n_rcu_torture_boosts,
atomic_long_read(&n_rcu_torture_timers));
torture_onoff_stats();
pr_cont("barrier: %ld/%ld:%ld ",
data_race(n_barrier_successes),
data_race(n_barrier_attempts),
data_race(n_rcu_torture_barrier_error));
pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
pr_cont("nocb-toggles: %ld:%ld\n",
atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
if (atomic_read(&n_rcu_torture_mberror) ||
atomic_read(&n_rcu_torture_mbchk_fail) ||
n_rcu_torture_barrier_error || n_rcu_torture_boost_ktrerror ||
n_rcu_torture_boost_failure || i > 1) {
pr_cont("%s", "!!! ");
atomic_inc(&n_rcu_torture_error);
WARN_ON_ONCE(atomic_read(&n_rcu_torture_mberror));
WARN_ON_ONCE(atomic_read(&n_rcu_torture_mbchk_fail));
WARN_ON_ONCE(n_rcu_torture_barrier_error); // rcu_barrier()
WARN_ON_ONCE(n_rcu_torture_boost_ktrerror); // no boost kthread
WARN_ON_ONCE(n_rcu_torture_boost_failure); // boost failed (TIMER_SOFTIRQ RT prio?)
WARN_ON_ONCE(i > 1); // Too-short grace period
}
pr_cont("Reader Pipe: ");
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
pr_cont(" %ld", pipesummary[i]);
pr_cont("\n");
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
pr_cont("Reader Batch: ");
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
pr_cont(" %ld", batchsummary[i]);
pr_cont("\n");
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
pr_cont("Free-Block Circulation: ");
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
pr_cont(" %d", atomic_read(&rcu_torture_wcount[i]));
}
pr_cont("\n");
if (cur_ops->stats)
cur_ops->stats();
if (rtcv_snap == rcu_torture_current_version &&
rcu_access_pointer(rcu_torture_current) &&
!rcu_stall_is_suppressed()) {
int __maybe_unused flags = 0;
unsigned long __maybe_unused gp_seq = 0;
if (cur_ops->get_gp_data)
cur_ops->get_gp_data(&flags, &gp_seq);
wtp = READ_ONCE(writer_task);
pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#x cpu %d\n",
rcu_torture_writer_state_getname(),
rcu_torture_writer_state, gp_seq, flags,
wtp == NULL ? ~0U : wtp->__state,
wtp == NULL ? -1 : (int)task_cpu(wtp));
if (!splatted && wtp) {
sched_show_task(wtp);
splatted = true;
}
if (cur_ops->gp_kthread_dbg)
cur_ops->gp_kthread_dbg();
rcu_ftrace_dump(DUMP_ALL);
}
rtcv_snap = rcu_torture_current_version;
}
/*
* Periodically prints torture statistics, if periodic statistics printing
* was specified via the stat_interval module parameter.
*/
static int
rcu_torture_stats(void *arg)
{
VERBOSE_TOROUT_STRING("rcu_torture_stats task started");
do {
schedule_timeout_interruptible(stat_interval * HZ);
rcu_torture_stats_print();
torture_shutdown_absorb("rcu_torture_stats");
} while (!torture_must_stop());
torture_kthread_stopping("rcu_torture_stats");
return 0;
}
/* Test mem_dump_obj() and friends. */
static void rcu_torture_mem_dump_obj(void)
{
struct rcu_head *rhp;
struct kmem_cache *kcp;
static int z;
kcp = kmem_cache_create("rcuscale", 136, 8, SLAB_STORE_USER, NULL);
if (WARN_ON_ONCE(!kcp))
return;
rhp = kmem_cache_alloc(kcp, GFP_KERNEL);
if (WARN_ON_ONCE(!rhp)) {
kmem_cache_destroy(kcp);
return;
}
pr_alert("mem_dump_obj() slab test: rcu_torture_stats = %px, &rhp = %px, rhp = %px, &z = %px\n", stats_task, &rhp, rhp, &z);
pr_alert("mem_dump_obj(ZERO_SIZE_PTR):");
mem_dump_obj(ZERO_SIZE_PTR);
pr_alert("mem_dump_obj(NULL):");
mem_dump_obj(NULL);
pr_alert("mem_dump_obj(%px):", &rhp);
mem_dump_obj(&rhp);
pr_alert("mem_dump_obj(%px):", rhp);
mem_dump_obj(rhp);
pr_alert("mem_dump_obj(%px):", &rhp->func);
mem_dump_obj(&rhp->func);
pr_alert("mem_dump_obj(%px):", &z);
mem_dump_obj(&z);
kmem_cache_free(kcp, rhp);
kmem_cache_destroy(kcp);
rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
if (WARN_ON_ONCE(!rhp))
return;
pr_alert("mem_dump_obj() kmalloc test: rcu_torture_stats = %px, &rhp = %px, rhp = %px\n", stats_task, &rhp, rhp);
pr_alert("mem_dump_obj(kmalloc %px):", rhp);
mem_dump_obj(rhp);
pr_alert("mem_dump_obj(kmalloc %px):", &rhp->func);
mem_dump_obj(&rhp->func);
kfree(rhp);
rhp = vmalloc(4096);
if (WARN_ON_ONCE(!rhp))
return;
pr_alert("mem_dump_obj() vmalloc test: rcu_torture_stats = %px, &rhp = %px, rhp = %px\n", stats_task, &rhp, rhp);
pr_alert("mem_dump_obj(vmalloc %px):", rhp);
mem_dump_obj(rhp);
pr_alert("mem_dump_obj(vmalloc %px):", &rhp->func);
mem_dump_obj(&rhp->func);
vfree(rhp);
}
static void
rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
{
pr_alert("%s" TORTURE_FLAG
"--- %s: nreaders=%d nfakewriters=%d "
"stat_interval=%d verbose=%d test_no_idle_hz=%d "
"shuffle_interval=%d stutter=%d irqreader=%d "
"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
"test_boost=%d/%d test_boost_interval=%d "
"test_boost_duration=%d shutdown_secs=%d "
"stall_cpu=%d stall_cpu_holdoff=%d stall_cpu_irqsoff=%d "
"stall_cpu_block=%d "
"n_barrier_cbs=%d "
"onoff_interval=%d onoff_holdoff=%d "
"read_exit_delay=%d read_exit_burst=%d "
"nocbs_nthreads=%d nocbs_toggle=%d "
"test_nmis=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
test_boost, cur_ops->can_boost,
test_boost_interval, test_boost_duration, shutdown_secs,
stall_cpu, stall_cpu_holdoff, stall_cpu_irqsoff,
stall_cpu_block,
n_barrier_cbs,
onoff_interval, onoff_holdoff,
read_exit_delay, read_exit_burst,
nocbs_nthreads, nocbs_toggle,
test_nmis);
}
static int rcutorture_booster_cleanup(unsigned int cpu)
{
struct task_struct *t;
if (boost_tasks[cpu] == NULL)
return 0;
mutex_lock(&boost_mutex);
t = boost_tasks[cpu];
boost_tasks[cpu] = NULL;
rcu_torture_enable_rt_throttle();
mutex_unlock(&boost_mutex);
/* This must be outside of the mutex, otherwise deadlock! */
torture_stop_kthread(rcu_torture_boost, t);
return 0;
}
static int rcutorture_booster_init(unsigned int cpu)
{
int retval;
if (boost_tasks[cpu] != NULL)
return 0; /* Already created, nothing more to do. */
rcutorture: Fix ksoftirqd boosting timing and iteration The RCU priority boosting can fail in two situations: 1) If (nr_cpus= > maxcpus=), which means if the total number of CPUs is higher than those brought online at boot, then torture_onoff() may later bring up CPUs that weren't online on boot. Now since rcutorture initialization only boosts the ksoftirqds of the CPUs that have been set online on boot, the CPUs later set online by torture_onoff won't benefit from the boost, making RCU priority boosting fail. 2) The ksoftirqd kthreads are boosted after the creation of rcu_torture_boost() kthreads, which opens a window large enough for these rcu_torture_boost() kthreads to wait (despite running at FIFO priority) for ksoftirqds that are still running at SCHED_NORMAL priority. The issues can trigger for example with: ./kvm.sh --configs TREE01 --kconfig "CONFIG_RCU_BOOST=y" [ 34.968561] rcu-torture: !!! [ 34.968627] ------------[ cut here ]------------ [ 35.014054] WARNING: CPU: 4 PID: 114 at kernel/rcu/rcutorture.c:1979 rcu_torture_stats_print+0x5ad/0x610 [ 35.052043] Modules linked in: [ 35.069138] CPU: 4 PID: 114 Comm: rcu_torture_sta Not tainted 5.18.0-rc1 #1 [ 35.096424] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014 [ 35.154570] RIP: 0010:rcu_torture_stats_print+0x5ad/0x610 [ 35.198527] Code: 63 1b 02 00 74 02 0f 0b 48 83 3d 35 63 1b 02 00 74 02 0f 0b 48 83 3d 21 63 1b 02 00 74 02 0f 0b 48 83 3d 0d 63 1b 02 00 74 02 <0f> 0b 83 eb 01 0f 8e ba fc ff ff 0f 0b e9 b3 fc ff f82 [ 37.251049] RSP: 0000:ffffa92a0050bdf8 EFLAGS: 00010202 [ 37.277320] rcu: De-offloading 8 [ 37.290367] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000001 [ 37.290387] RDX: 0000000000000000 RSI: 00000000ffffbfff RDI: 00000000ffffffff [ 37.290398] RBP: 000000000000007b R08: 0000000000000000 R09: c0000000ffffbfff [ 37.290407] R10: 000000000000002a R11: ffffa92a0050bc18 R12: ffffa92a0050be20 [ 37.290417] R13: ffffa92a0050be78 R14: 0000000000000000 R15: 000000000001bea0 [ 37.290427] FS: 0000000000000000(0000) GS:ffff96045eb00000(0000) knlGS:0000000000000000 [ 37.290448] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 37.290460] CR2: 0000000000000000 CR3: 000000001dc0c000 CR4: 00000000000006e0 [ 37.290470] Call Trace: [ 37.295049] <TASK> [ 37.295065] ? preempt_count_add+0x63/0x90 [ 37.295095] ? _raw_spin_lock_irqsave+0x12/0x40 [ 37.295125] ? rcu_torture_stats_print+0x610/0x610 [ 37.295143] rcu_torture_stats+0x29/0x70 [ 37.295160] kthread+0xe3/0x110 [ 37.295176] ? kthread_complete_and_exit+0x20/0x20 [ 37.295193] ret_from_fork+0x22/0x30 [ 37.295218] </TASK> Fix this with boosting the ksoftirqds kthreads from the boosting hotplug callback itself and before the boosting kthreads are created. Fixes: ea6d962e80b6 ("rcutorture: Judge RCU priority boosting on grace periods, not callbacks") Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2022-06-10 13:03:57 +00:00
// Testing RCU priority boosting requires rcutorture do
// some serious abuse. Counter this by running ksoftirqd
// at higher priority.
if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) {
struct sched_param sp;
struct task_struct *t;
t = per_cpu(ksoftirqd, cpu);
WARN_ON_ONCE(!t);
sp.sched_priority = 2;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
}
/* Don't allow time recalculation while creating a new task. */
mutex_lock(&boost_mutex);
rcu_torture_disable_rt_throttle();
VERBOSE_TOROUT_STRING("Creating rcu_torture_boost task");
boost_tasks[cpu] = kthread_run_on_cpu(rcu_torture_boost, NULL,
cpu, "rcu_torture_boost_%u");
if (IS_ERR(boost_tasks[cpu])) {
retval = PTR_ERR(boost_tasks[cpu]);
VERBOSE_TOROUT_STRING("rcu_torture_boost task create failed");
n_rcu_torture_boost_ktrerror++;
boost_tasks[cpu] = NULL;
mutex_unlock(&boost_mutex);
return retval;
}
mutex_unlock(&boost_mutex);
return 0;
}
static int rcu_torture_stall_nf(struct notifier_block *nb, unsigned long v, void *ptr)
{
pr_info("%s: v=%lu, duration=%lu.\n", __func__, v, (unsigned long)ptr);
return NOTIFY_OK;
}
static struct notifier_block rcu_torture_stall_block = {
.notifier_call = rcu_torture_stall_nf,
};
/*
* CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then
* induces a CPU stall for the time specified by stall_cpu. If a new
* stall test is added, stallsdone in rcu_torture_writer() must be adjusted.
*/
static int rcu_torture_stall(void *args)
{
int idx;
int ret;
unsigned long stop_at;
VERBOSE_TOROUT_STRING("rcu_torture_stall task started");
rcu: Restrict access to RCU CPU stall notifiers Although the RCU CPU stall notifiers can be useful for dumping state when tracking down delicate forward-progress bugs where NUMA effects cause cache lines to be delivered to a given CPU regularly, but always in a state that prevents that CPU from making forward progress. These bugs can be detected by the RCU CPU stall-warning mechanism, but in some cases, the stall-warnings printk()s disrupt the forward-progress bug before any useful state can be obtained. Unfortunately, the notifier mechanism added by commit 5b404fdabacf ("rcu: Add RCU CPU stall notifier") can make matters worse if used at all carelessly. For example, if the stall warning was caused by a lock not being released, then any attempt to acquire that lock in the notifier will hang. This will prevent not only the notifier from producing any useful output, but it will also prevent the stall-warning message from ever appearing. This commit therefore hides this new RCU CPU stall notifier mechanism under a new RCU_CPU_STALL_NOTIFIER Kconfig option that depends on both DEBUG_KERNEL and RCU_EXPERT. In addition, the rcupdate.rcu_cpu_stall_notifiers=1 kernel boot parameter must also be specified. The RCU_CPU_STALL_NOTIFIER Kconfig option's help text contains a warning and explains the dangers of careless use, recommending lockless notifier code. In addition, a WARN() is triggered each time that an attempt is made to register a stall-warning notifier in kernels built with CONFIG_RCU_CPU_STALL_NOTIFIER=y. This combination of measures will keep use of this mechanism confined to debug kernels and away from routine deployments. [ paulmck: Apply Dan Carpenter feedback. ] Fixes: 5b404fdabacf ("rcu: Add RCU CPU stall notifier") Reported-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org> Signed-off-by: Neeraj Upadhyay (AMD) <neeraj.iitr10@gmail.com>
2023-11-02 01:28:38 +00:00
if (rcu_cpu_stall_notifiers) {
ret = rcu_stall_chain_notifier_register(&rcu_torture_stall_block);
if (ret)
pr_info("%s: rcu_stall_chain_notifier_register() returned %d, %sexpected.\n",
__func__, ret, !IS_ENABLED(CONFIG_RCU_STALL_COMMON) ? "un" : "");
}
if (stall_cpu_holdoff > 0) {
VERBOSE_TOROUT_STRING("rcu_torture_stall begin holdoff");
schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
VERBOSE_TOROUT_STRING("rcu_torture_stall end holdoff");
}
if (!kthread_should_stop() && stall_gp_kthread > 0) {
VERBOSE_TOROUT_STRING("rcu_torture_stall begin GP stall");
rcu_gp_set_torture_wait(stall_gp_kthread * HZ);
for (idx = 0; idx < stall_gp_kthread + 2; idx++) {
if (kthread_should_stop())
break;
schedule_timeout_uninterruptible(HZ);
}
}
if (!kthread_should_stop() && stall_cpu > 0) {
VERBOSE_TOROUT_STRING("rcu_torture_stall begin CPU stall");
stop_at = ktime_get_seconds() + stall_cpu;
/* RCU CPU stall is expected behavior in following code. */
idx = cur_ops->readlock();
if (stall_cpu_irqsoff)
local_irq_disable();
else if (!stall_cpu_block)
preempt_disable();
pr_alert("%s start on CPU %d.\n",
__func__, raw_smp_processor_id());
rcutorture: Make stall-tasks directly exit when rcutorture tests end When the rcutorture tests start to exit, the rcu_torture_cleanup() is invoked to stop kthreads and release resources, if the stall-task kthreads exist, cpu-stall has started and the rcutorture.stall_cpu is set to a larger value, the rcu_torture_cleanup() will be blocked for a long time and the hung-task may occur, this commit therefore add kthread_should_stop() to the loop of cpu-stall operation, when rcutorture tests ends, no need to wait for cpu-stall to end, exit directly. Use the following command to test: insmod rcutorture.ko torture_type=srcu fwd_progress=0 stat_interval=4 stall_cpu_block=1 stall_cpu=200 stall_cpu_holdoff=10 read_exit_burst=0 object_debug=1 rmmod rcutorture [15361.918610] INFO: task rmmod:878 blocked for more than 122 seconds. [15361.918613] Tainted: G W 6.8.0-rc2-yoctodev-standard+ #25 [15361.918615] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [15361.918616] task:rmmod state:D stack:0 pid:878 tgid:878 ppid:773 flags:0x00004002 [15361.918621] Call Trace: [15361.918623] <TASK> [15361.918626] __schedule+0xc0d/0x28f0 [15361.918631] ? __pfx___schedule+0x10/0x10 [15361.918635] ? rcu_is_watching+0x19/0xb0 [15361.918638] ? schedule+0x1f6/0x290 [15361.918642] ? __pfx_lock_release+0x10/0x10 [15361.918645] ? schedule+0xc9/0x290 [15361.918648] ? schedule+0xc9/0x290 [15361.918653] ? trace_preempt_off+0x54/0x100 [15361.918657] ? schedule+0xc9/0x290 [15361.918661] schedule+0xd0/0x290 [15361.918665] schedule_timeout+0x56d/0x7d0 [15361.918669] ? debug_smp_processor_id+0x1b/0x30 [15361.918672] ? rcu_is_watching+0x19/0xb0 [15361.918676] ? __pfx_schedule_timeout+0x10/0x10 [15361.918679] ? debug_smp_processor_id+0x1b/0x30 [15361.918683] ? rcu_is_watching+0x19/0xb0 [15361.918686] ? wait_for_completion+0x179/0x4c0 [15361.918690] ? __pfx_lock_release+0x10/0x10 [15361.918693] ? __kasan_check_write+0x18/0x20 [15361.918696] ? wait_for_completion+0x9d/0x4c0 [15361.918700] ? _raw_spin_unlock_irq+0x36/0x50 [15361.918703] ? wait_for_completion+0x179/0x4c0 [15361.918707] ? _raw_spin_unlock_irq+0x36/0x50 [15361.918710] ? wait_for_completion+0x179/0x4c0 [15361.918714] ? trace_preempt_on+0x54/0x100 [15361.918718] ? wait_for_completion+0x179/0x4c0 [15361.918723] wait_for_completion+0x181/0x4c0 [15361.918728] ? __pfx_wait_for_completion+0x10/0x10 [15361.918738] kthread_stop+0x152/0x470 [15361.918742] _torture_stop_kthread+0x44/0xc0 [torture 7af7f9cbba28271a10503b653f9e05d518fbc8c3] [15361.918752] rcu_torture_cleanup+0x2ac/0xe90 [rcutorture f2cb1f556ee7956270927183c4c2c7749a336529] [15361.918766] ? __pfx_rcu_torture_cleanup+0x10/0x10 [rcutorture f2cb1f556ee7956270927183c4c2c7749a336529] [15361.918777] ? __kasan_check_write+0x18/0x20 [15361.918781] ? __mutex_unlock_slowpath+0x17c/0x670 [15361.918789] ? __might_fault+0xcd/0x180 [15361.918793] ? find_module_all+0x104/0x1d0 [15361.918799] __x64_sys_delete_module+0x2a4/0x3f0 [15361.918803] ? __pfx___x64_sys_delete_module+0x10/0x10 [15361.918807] ? syscall_exit_to_user_mode+0x149/0x280 Signed-off-by: Zqiang <qiang.zhang1211@gmail.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
2024-03-21 08:28:50 +00:00
while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(), stop_at) &&
!kthread_should_stop())
if (stall_cpu_block) {
#ifdef CONFIG_PREEMPTION
preempt_schedule();
#else
schedule_timeout_uninterruptible(HZ);
#endif
} else if (stall_no_softlockup) {
touch_softlockup_watchdog();
}
if (stall_cpu_irqsoff)
local_irq_enable();
else if (!stall_cpu_block)
preempt_enable();
cur_ops->readunlock(idx);
}
pr_alert("%s end.\n", __func__);
rcu: Restrict access to RCU CPU stall notifiers Although the RCU CPU stall notifiers can be useful for dumping state when tracking down delicate forward-progress bugs where NUMA effects cause cache lines to be delivered to a given CPU regularly, but always in a state that prevents that CPU from making forward progress. These bugs can be detected by the RCU CPU stall-warning mechanism, but in some cases, the stall-warnings printk()s disrupt the forward-progress bug before any useful state can be obtained. Unfortunately, the notifier mechanism added by commit 5b404fdabacf ("rcu: Add RCU CPU stall notifier") can make matters worse if used at all carelessly. For example, if the stall warning was caused by a lock not being released, then any attempt to acquire that lock in the notifier will hang. This will prevent not only the notifier from producing any useful output, but it will also prevent the stall-warning message from ever appearing. This commit therefore hides this new RCU CPU stall notifier mechanism under a new RCU_CPU_STALL_NOTIFIER Kconfig option that depends on both DEBUG_KERNEL and RCU_EXPERT. In addition, the rcupdate.rcu_cpu_stall_notifiers=1 kernel boot parameter must also be specified. The RCU_CPU_STALL_NOTIFIER Kconfig option's help text contains a warning and explains the dangers of careless use, recommending lockless notifier code. In addition, a WARN() is triggered each time that an attempt is made to register a stall-warning notifier in kernels built with CONFIG_RCU_CPU_STALL_NOTIFIER=y. This combination of measures will keep use of this mechanism confined to debug kernels and away from routine deployments. [ paulmck: Apply Dan Carpenter feedback. ] Fixes: 5b404fdabacf ("rcu: Add RCU CPU stall notifier") Reported-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org> Signed-off-by: Neeraj Upadhyay (AMD) <neeraj.iitr10@gmail.com>
2023-11-02 01:28:38 +00:00
if (rcu_cpu_stall_notifiers && !ret) {
ret = rcu_stall_chain_notifier_unregister(&rcu_torture_stall_block);
if (ret)
pr_info("%s: rcu_stall_chain_notifier_unregister() returned %d.\n", __func__, ret);
}
torture_shutdown_absorb("rcu_torture_stall");
while (!kthread_should_stop())
schedule_timeout_interruptible(10 * HZ);
return 0;
}
/* Spawn CPU-stall kthread, if stall_cpu specified. */
static int __init rcu_torture_stall_init(void)
{
if (stall_cpu <= 0 && stall_gp_kthread <= 0)
return 0;
return torture_create_kthread(rcu_torture_stall, NULL, stall_task);
}
/* State structure for forward-progress self-propagating RCU callback. */
struct fwd_cb_state {
struct rcu_head rh;
int stop;
};
/*
* Forward-progress self-propagating RCU callback function. Because
* callbacks run from softirq, this function is an implicit RCU read-side
* critical section.
*/
static void rcu_torture_fwd_prog_cb(struct rcu_head *rhp)
{
struct fwd_cb_state *fcsp = container_of(rhp, struct fwd_cb_state, rh);
if (READ_ONCE(fcsp->stop)) {
WRITE_ONCE(fcsp->stop, 2);
return;
}
cur_ops->call(&fcsp->rh, rcu_torture_fwd_prog_cb);
}
/* State for continuous-flood RCU callbacks. */
struct rcu_fwd_cb {
struct rcu_head rh;
struct rcu_fwd_cb *rfc_next;
struct rcu_fwd *rfc_rfp;
int rfc_gps;
};
#define MAX_FWD_CB_JIFFIES (8 * HZ) /* Maximum CB test duration. */
#define MIN_FWD_CB_LAUNDERS 3 /* This many CB invocations to count. */
#define MIN_FWD_CBS_LAUNDERED 100 /* Number of counted CBs. */
#define FWD_CBS_HIST_DIV 10 /* Histogram buckets/second. */
#define N_LAUNDERS_HIST (2 * MAX_FWD_CB_JIFFIES / (HZ / FWD_CBS_HIST_DIV))
struct rcu_launder_hist {
long n_launders;
unsigned long launder_gp_seq;
};
struct rcu_fwd {
spinlock_t rcu_fwd_lock;
struct rcu_fwd_cb *rcu_fwd_cb_head;
struct rcu_fwd_cb **rcu_fwd_cb_tail;
long n_launders_cb;
unsigned long rcu_fwd_startat;
struct rcu_launder_hist n_launders_hist[N_LAUNDERS_HIST];
unsigned long rcu_launder_gp_seq_start;
int rcu_fwd_id;
};
static DEFINE_MUTEX(rcu_fwd_mutex);
static struct rcu_fwd *rcu_fwds;
static unsigned long rcu_fwd_seq;
static atomic_long_t rcu_fwd_max_cbs;
static bool rcu_fwd_emergency_stop;
static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
{
unsigned long gps;
unsigned long gps_old;
int i;
int j;
for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--)
if (rfp->n_launders_hist[i].n_launders > 0)
break;
pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):",
__func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat);
gps_old = rfp->rcu_launder_gp_seq_start;
for (j = 0; j <= i; j++) {
gps = rfp->n_launders_hist[j].launder_gp_seq;
pr_cont(" %ds/%d: %ld:%ld",
j + 1, FWD_CBS_HIST_DIV,
rfp->n_launders_hist[j].n_launders,
rcutorture_seq_diff(gps, gps_old));
gps_old = gps;
}
pr_cont("\n");
}
/* Callback function for continuous-flood RCU callbacks. */
static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp)
{
unsigned long flags;
int i;
struct rcu_fwd_cb *rfcp = container_of(rhp, struct rcu_fwd_cb, rh);
struct rcu_fwd_cb **rfcpp;
struct rcu_fwd *rfp = rfcp->rfc_rfp;
rfcp->rfc_next = NULL;
rfcp->rfc_gps++;
spin_lock_irqsave(&rfp->rcu_fwd_lock, flags);
rfcpp = rfp->rcu_fwd_cb_tail;
rfp->rcu_fwd_cb_tail = &rfcp->rfc_next;
smp_store_release(rfcpp, rfcp);
WRITE_ONCE(rfp->n_launders_cb, rfp->n_launders_cb + 1);
i = ((jiffies - rfp->rcu_fwd_startat) / (HZ / FWD_CBS_HIST_DIV));
if (i >= ARRAY_SIZE(rfp->n_launders_hist))
i = ARRAY_SIZE(rfp->n_launders_hist) - 1;
rfp->n_launders_hist[i].n_launders++;
rfp->n_launders_hist[i].launder_gp_seq = cur_ops->get_gp_seq();
spin_unlock_irqrestore(&rfp->rcu_fwd_lock, flags);
}
// Give the scheduler a chance, even on nohz_full CPUs.
static void rcu_torture_fwd_prog_cond_resched(unsigned long iter)
{
if (IS_ENABLED(CONFIG_PREEMPTION) && IS_ENABLED(CONFIG_NO_HZ_FULL)) {
// Real call_rcu() floods hit userspace, so emulate that.
if (need_resched() || (iter & 0xfff))
schedule();
return;
}
// No userspace emulation: CB invocation throttles call_rcu()
cond_resched();
}
/*
* Free all callbacks on the rcu_fwd_cb_head list, either because the
* test is over or because we hit an OOM event.
*/
static unsigned long rcu_torture_fwd_prog_cbfree(struct rcu_fwd *rfp)
{
unsigned long flags;
unsigned long freed = 0;
struct rcu_fwd_cb *rfcp;
for (;;) {
spin_lock_irqsave(&rfp->rcu_fwd_lock, flags);
rfcp = rfp->rcu_fwd_cb_head;
if (!rfcp) {
spin_unlock_irqrestore(&rfp->rcu_fwd_lock, flags);
break;
}
rfp->rcu_fwd_cb_head = rfcp->rfc_next;
if (!rfp->rcu_fwd_cb_head)
rfp->rcu_fwd_cb_tail = &rfp->rcu_fwd_cb_head;
spin_unlock_irqrestore(&rfp->rcu_fwd_lock, flags);
kfree(rfcp);
freed++;
rcu_torture_fwd_prog_cond_resched(freed);
if (tick_nohz_full_enabled()) {
local_irq_save(flags);
rcu_momentary_dyntick_idle();
local_irq_restore(flags);
}
}
return freed;
}
/* Carry out need_resched()/cond_resched() forward-progress testing. */
static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
int *tested, int *tested_tries)
{
unsigned long cver;
unsigned long dur;
struct fwd_cb_state fcs;
unsigned long gps;
int idx;
int sd;
int sd4;
bool selfpropcb = false;
unsigned long stopat;
static DEFINE_TORTURE_RANDOM(trs);
pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
if (!cur_ops->sync)
return; // Cannot do need_resched() forward progress testing without ->sync.
if (cur_ops->call && cur_ops->cb_barrier) {
init_rcu_head_on_stack(&fcs.rh);
selfpropcb = true;
}
/* Tight loop containing cond_resched(). */
atomic_inc(&rcu_fwd_cb_nodelay);
cur_ops->sync(); /* Later readers see above write. */
if (selfpropcb) {
WRITE_ONCE(fcs.stop, 0);
cur_ops->call(&fcs.rh, rcu_torture_fwd_prog_cb);
}
cver = READ_ONCE(rcu_torture_current_version);
gps = cur_ops->get_gp_seq();
sd = cur_ops->stall_dur() + 1;
sd4 = (sd + fwd_progress_div - 1) / fwd_progress_div;
dur = sd4 + torture_random(&trs) % (sd - sd4);
WRITE_ONCE(rfp->rcu_fwd_startat, jiffies);
stopat = rfp->rcu_fwd_startat + dur;
while (time_before(jiffies, stopat) &&
!shutdown_time_arrived() &&
!READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) {
idx = cur_ops->readlock();
udelay(10);
cur_ops->readunlock(idx);
if (!fwd_progress_need_resched || need_resched())
cond_resched();
}
(*tested_tries)++;
if (!time_before(jiffies, stopat) &&
!shutdown_time_arrived() &&
!READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) {
(*tested)++;
cver = READ_ONCE(rcu_torture_current_version) - cver;
gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
WARN_ON(!cver && gps < 2);
pr_alert("%s: %d Duration %ld cver %ld gps %ld\n", __func__,
rfp->rcu_fwd_id, dur, cver, gps);
}
if (selfpropcb) {
WRITE_ONCE(fcs.stop, 1);
cur_ops->sync(); /* Wait for running CB to complete. */
pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id);
cur_ops->cb_barrier(); /* Wait for queued callbacks. */
}
if (selfpropcb) {
WARN_ON(READ_ONCE(fcs.stop) != 2);
destroy_rcu_head_on_stack(&fcs.rh);
}
schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */
atomic_dec(&rcu_fwd_cb_nodelay);
}
/* Carry out call_rcu() forward-progress testing. */
static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
{
unsigned long cver;
unsigned long flags;
unsigned long gps;
int i;
long n_launders;
long n_launders_cb_snap;
long n_launders_sa;
long n_max_cbs;
long n_max_gps;
struct rcu_fwd_cb *rfcp;
struct rcu_fwd_cb *rfcpn;
unsigned long stopat;
unsigned long stoppedat;
pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
if (READ_ONCE(rcu_fwd_emergency_stop))
return; /* Get out of the way quickly, no GP wait! */
rcutorture: Add trivial RCU implementation I have been showing off a trivial RCU implementation for non-preemptive environments for some time now: #define rcu_read_lock() #define rcu_read_unlock() #define rcu_dereference(p) READ_ONCE(p) #define rcu_assign_pointer(p, v) smp_store_release(&(p), (v)) void synchronize_rcu(void) { int cpu; for_each_online_cpu(cpu) sched_setaffinity(current->pid, cpumask_of(cpu)); } Trivial or not, as the old saying goes, "if it ain't tested, it don't work!". This commit therefore adds a "trivial" flavor to rcutorture and a corresponding TRIVIAL test scenario. This variant does not handle CPU hotplug, which is unconditionally enabled on x86 for post-v5.1-rc3 kernels, which is why the TRIVIAL.boot says "rcutorture.onoff_interval=0". This commit actually does handle CONFIG_PREEMPT=y kernels, but only because it turns back the Linux-kernel clock in order to provide these alternative definitions (or the moral equivalent thereof): #define rcu_read_lock() preempt_disable() #define rcu_read_unlock() preempt_enable() In CONFIG_PREEMPT=n kernels without debugging, these are equivalent to empty macros give or take a compiler barrier. However, the have been successfully tested with actual empty macros as well. Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com> [ paulmck: Fix symbol issue reported by kbuild test robot <lkp@intel.com>. ] [ paulmck: Work around sched_setaffinity() issue noted by Andrea Parri. ] [ paulmck: Add rcutorture.shuffle_interval=0 to TRIVIAL.boot to fix interaction with shuffler task noted by Peter Zijlstra. ] Tested-by: Andrea Parri <andrea.parri@amarulasolutions.com>
2019-04-19 14:38:27 +00:00
if (!cur_ops->call)
return; /* Can't do call_rcu() fwd prog without ->call. */
/* Loop continuously posting RCU callbacks. */
atomic_inc(&rcu_fwd_cb_nodelay);
cur_ops->sync(); /* Later readers see above write. */
WRITE_ONCE(rfp->rcu_fwd_startat, jiffies);
stopat = rfp->rcu_fwd_startat + MAX_FWD_CB_JIFFIES;
n_launders = 0;
rfp->n_launders_cb = 0; // Hoist initialization for multi-kthread
n_launders_sa = 0;
n_max_cbs = 0;
n_max_gps = 0;
for (i = 0; i < ARRAY_SIZE(rfp->n_launders_hist); i++)
rfp->n_launders_hist[i].n_launders = 0;
cver = READ_ONCE(rcu_torture_current_version);
gps = cur_ops->get_gp_seq();
rfp->rcu_launder_gp_seq_start = gps;
tick_dep_set_task(current, TICK_DEP_BIT_RCU);
while (time_before(jiffies, stopat) &&
!shutdown_time_arrived() &&
!READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) {
rfcp = READ_ONCE(rfp->rcu_fwd_cb_head);
rfcpn = NULL;
if (rfcp)
rfcpn = READ_ONCE(rfcp->rfc_next);
if (rfcpn) {
if (rfcp->rfc_gps >= MIN_FWD_CB_LAUNDERS &&
++n_max_gps >= MIN_FWD_CBS_LAUNDERED)
break;
rfp->rcu_fwd_cb_head = rfcpn;
n_launders++;
n_launders_sa++;
} else if (!cur_ops->cbflood_max || cur_ops->cbflood_max > n_max_cbs) {
rfcp = kmalloc(sizeof(*rfcp), GFP_KERNEL);
if (WARN_ON_ONCE(!rfcp)) {
schedule_timeout_interruptible(1);
continue;
}
n_max_cbs++;
n_launders_sa = 0;
rfcp->rfc_gps = 0;
rfcp->rfc_rfp = rfp;
} else {
rfcp = NULL;
}
if (rfcp)
cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs);
if (tick_nohz_full_enabled()) {
local_irq_save(flags);
rcu_momentary_dyntick_idle();
local_irq_restore(flags);
}
}
stoppedat = jiffies;
n_launders_cb_snap = READ_ONCE(rfp->n_launders_cb);
cver = READ_ONCE(rcu_torture_current_version) - cver;
gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id);
cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */
(void)rcu_torture_fwd_prog_cbfree(rfp);
if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop) &&
!shutdown_time_arrived()) {
if (WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED) && cur_ops->gp_kthread_dbg)
cur_ops->gp_kthread_dbg();
pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld #online %u\n",
__func__,
stoppedat - rfp->rcu_fwd_startat, jiffies - stoppedat,
n_launders + n_max_cbs - n_launders_cb_snap,
n_launders, n_launders_sa,
n_max_gps, n_max_cbs, cver, gps, num_online_cpus());
atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs);
mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
rcu_torture_fwd_cb_hist(rfp);
mutex_unlock(&rcu_fwd_mutex);
}
schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
atomic_dec(&rcu_fwd_cb_nodelay);
}
/*
* OOM notifier, but this only prints diagnostic information for the
* current forward-progress test.
*/
static int rcutorture_oom_notify(struct notifier_block *self,
unsigned long notused, void *nfreed)
{
int i;
long ncbs;
struct rcu_fwd *rfp;
mutex_lock(&rcu_fwd_mutex);
rfp = rcu_fwds;
if (!rfp) {
mutex_unlock(&rcu_fwd_mutex);
return NOTIFY_OK;
}
WARN(1, "%s invoked upon OOM during forward-progress testing.\n",
__func__);
for (i = 0; i < fwd_progress; i++) {
rcu_torture_fwd_cb_hist(&rfp[i]);
rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp[i].rcu_fwd_startat)) / 2);
}
WRITE_ONCE(rcu_fwd_emergency_stop, true);
smp_mb(); /* Emergency stop before free and wait to avoid hangs. */
ncbs = 0;
for (i = 0; i < fwd_progress; i++)
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
cur_ops->cb_barrier();
ncbs = 0;
for (i = 0; i < fwd_progress; i++)
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
cur_ops->cb_barrier();
ncbs = 0;
for (i = 0; i < fwd_progress; i++)
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
smp_mb(); /* Frees before return to avoid redoing OOM. */
(*(unsigned long *)nfreed)++; /* Forward progress CBs freed! */
pr_info("%s returning after OOM processing.\n", __func__);
mutex_unlock(&rcu_fwd_mutex);
return NOTIFY_OK;
}
static struct notifier_block rcutorture_oom_nb = {
.notifier_call = rcutorture_oom_notify
};
/* Carry out grace-period forward-progress testing. */
static int rcu_torture_fwd_prog(void *args)
{
bool firsttime = true;
long max_cbs;
int oldnice = task_nice(current);
unsigned long oldseq = READ_ONCE(rcu_fwd_seq);
struct rcu_fwd *rfp = args;
int tested = 0;
int tested_tries = 0;
VERBOSE_TOROUT_STRING("rcu_torture_fwd_progress task started");
rcu_bind_current_to_nocb();
if (!IS_ENABLED(CONFIG_SMP) || !IS_ENABLED(CONFIG_RCU_BOOST))
set_user_nice(current, MAX_NICE);
do {
if (!rfp->rcu_fwd_id) {
schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
WRITE_ONCE(rcu_fwd_emergency_stop, false);
if (!firsttime) {
max_cbs = atomic_long_xchg(&rcu_fwd_max_cbs, 0);
pr_alert("%s n_max_cbs: %ld\n", __func__, max_cbs);
}
firsttime = false;
WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
} else {
while (READ_ONCE(rcu_fwd_seq) == oldseq && !torture_must_stop())
schedule_timeout_interruptible(HZ / 20);
oldseq = READ_ONCE(rcu_fwd_seq);
}
pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
if (rcu_inkernel_boot_has_ended() && torture_num_online_cpus() > rfp->rcu_fwd_id)
rcu_torture_fwd_prog_cr(rfp);
if ((cur_ops->stall_dur && cur_ops->stall_dur() > 0) &&
(!IS_ENABLED(CONFIG_TINY_RCU) ||
(rcu_inkernel_boot_has_ended() &&
torture_num_online_cpus() > rfp->rcu_fwd_id)))
rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
/* Avoid slow periods, better to test when busy. */
if (stutter_wait("rcu_torture_fwd_prog"))
sched_set_normal(current, oldnice);
} while (!torture_must_stop());
/* Short runs might not contain a valid forward-progress attempt. */
if (!rfp->rcu_fwd_id) {
WARN_ON(!tested && tested_tries >= 5);
pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries);
}
torture_kthread_stopping("rcu_torture_fwd_prog");
return 0;
}
/* If forward-progress checking is requested and feasible, spawn the thread. */
static int __init rcu_torture_fwd_prog_init(void)
{
int i;
int ret = 0;
struct rcu_fwd *rfp;
if (!fwd_progress)
return 0; /* Not requested, so don't do it. */
if (fwd_progress >= nr_cpu_ids) {
VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Limiting fwd_progress to # CPUs.\n");
fwd_progress = nr_cpu_ids;
} else if (fwd_progress < 0) {
fwd_progress = nr_cpu_ids;
}
if ((!cur_ops->sync && !cur_ops->call) ||
(!cur_ops->cbflood_max && (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0)) ||
cur_ops == &rcu_busted_ops) {
VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test");
fwd_progress = 0;
return 0;
}
if (stall_cpu > 0) {
VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall testing");
fwd_progress = 0;
rcu: Fix undefined Kconfig macros Invoking scripts/checkkconfigsymbols.py in the Linux-kernel source tree located the following issues: 1. TREE_PREEMPT_RCU Referencing files: arch/sh/configs/sdk7786_defconfig It should now be CONFIG_PREEMPT_RCU. Except that the CONFIG_PREEMPT=y in that same file implies CONFIG_PREEMPT_RCU=y. Therefore, delete the CONFIG_TREE_PREEMPT_RCU=y line. The reason is as follows: In kernel/rcu/Kconfig, we have config PREEMPT_RCU bool default y if PREEMPTION https://www.kernel.org/doc/Documentation/kbuild/kconfig-language.txt says, "The default value is only assigned to the config symbol if no other value was set by the user (via the input prompt above)." there is no prompt in config PREEMPT_RCU entry, so we are guaranteed to get CONFIG_PREEMPT_RCU=y when CONFIG_PREEMPT is present. 2. RCU_CPU_STALL_INFO Referencing files: arch/xtensa/configs/nommu_kc705_defconfig The old Kconfig option RCU_CPU_STALL_INFO was removed by commit 75c27f119b64 ("rcu: Remove CONFIG_RCU_CPU_STALL_INFO"), and the kernel now acts as if this Kconfig option was unconditionally enabled. 3. RCU_NOCB_CPU_ALL Referencing files: Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst This is an old snapshot of the code. I update this from the real rcu_prepare_for_idle() function in kernel/rcu/tree_plugin.h. This change was tested by invoking "make htmldocs". 4. RCU_TORTURE_TESTS Referencing files: kernel/rcu/rcutorture.c Forward-progress checking conflicts with CPU-stall testing, so we should complain at "modprobe rcutorture" when both are enabled. Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-07-25 21:43:33 +00:00
if (IS_MODULE(CONFIG_RCU_TORTURE_TEST))
return -EINVAL; /* In module, can fail back to user. */
WARN_ON(1); /* Make sure rcutorture notices conflict. */
return 0;
}
if (fwd_progress_holdoff <= 0)
fwd_progress_holdoff = 1;
if (fwd_progress_div <= 0)
fwd_progress_div = 4;
rfp = kcalloc(fwd_progress, sizeof(*rfp), GFP_KERNEL);
fwd_prog_tasks = kcalloc(fwd_progress, sizeof(*fwd_prog_tasks), GFP_KERNEL);
if (!rfp || !fwd_prog_tasks) {
kfree(rfp);
kfree(fwd_prog_tasks);
fwd_prog_tasks = NULL;
fwd_progress = 0;
return -ENOMEM;
}
for (i = 0; i < fwd_progress; i++) {
spin_lock_init(&rfp[i].rcu_fwd_lock);
rfp[i].rcu_fwd_cb_tail = &rfp[i].rcu_fwd_cb_head;
rfp[i].rcu_fwd_id = i;
}
mutex_lock(&rcu_fwd_mutex);
rcu_fwds = rfp;
mutex_unlock(&rcu_fwd_mutex);
register_oom_notifier(&rcutorture_oom_nb);
for (i = 0; i < fwd_progress; i++) {
ret = torture_create_kthread(rcu_torture_fwd_prog, &rcu_fwds[i], fwd_prog_tasks[i]);
if (ret) {
fwd_progress = i;
return ret;
}
}
return 0;
}
static void rcu_torture_fwd_prog_cleanup(void)
{
int i;
struct rcu_fwd *rfp;
if (!rcu_fwds || !fwd_prog_tasks)
return;
for (i = 0; i < fwd_progress; i++)
torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_tasks[i]);
unregister_oom_notifier(&rcutorture_oom_nb);
mutex_lock(&rcu_fwd_mutex);
rfp = rcu_fwds;
rcu_fwds = NULL;
mutex_unlock(&rcu_fwd_mutex);
kfree(rfp);
kfree(fwd_prog_tasks);
fwd_prog_tasks = NULL;
}
/* Callback function for RCU barrier testing. */
static void rcu_torture_barrier_cbf(struct rcu_head *rcu)
{
atomic_inc(&barrier_cbs_invoked);
}
/* IPI handler to get callback posted on desired CPU, if online. */
rcutorture: Fix invalid context warning when enable srcu barrier testing When the torture_type is set srcu or srcud and cb_barrier is non-zero, running the rcutorture test will trigger the following warning: [ 163.910989][ C1] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [ 163.910994][ C1] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 [ 163.910999][ C1] preempt_count: 10001, expected: 0 [ 163.911002][ C1] RCU nest depth: 0, expected: 0 [ 163.911005][ C1] INFO: lockdep is turned off. [ 163.911007][ C1] irq event stamp: 30964 [ 163.911010][ C1] hardirqs last enabled at (30963): [<ffffffffabc7df52>] do_idle+0x362/0x500 [ 163.911018][ C1] hardirqs last disabled at (30964): [<ffffffffae616eff>] sysvec_call_function_single+0xf/0xd0 [ 163.911025][ C1] softirqs last enabled at (0): [<ffffffffabb6475f>] copy_process+0x16ff/0x6580 [ 163.911033][ C1] softirqs last disabled at (0): [<0000000000000000>] 0x0 [ 163.911038][ C1] Preemption disabled at: [ 163.911039][ C1] [<ffffffffacf1964b>] stack_depot_save_flags+0x24b/0x6c0 [ 163.911063][ C1] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 6.8.0-rc4-rt4-yocto-preempt-rt+ #3 1e39aa9a737dd024a3275c4f835a872f673a7d3a [ 163.911071][ C1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [ 163.911075][ C1] Call Trace: [ 163.911078][ C1] <IRQ> [ 163.911080][ C1] dump_stack_lvl+0x88/0xd0 [ 163.911089][ C1] dump_stack+0x10/0x20 [ 163.911095][ C1] __might_resched+0x36f/0x530 [ 163.911105][ C1] rt_spin_lock+0x82/0x1c0 [ 163.911112][ C1] spin_lock_irqsave_ssp_contention+0xb8/0x100 [ 163.911121][ C1] srcu_gp_start_if_needed+0x782/0xf00 [ 163.911128][ C1] ? _raw_spin_unlock_irqrestore+0x46/0x70 [ 163.911136][ C1] ? debug_object_active_state+0x336/0x470 [ 163.911148][ C1] ? __pfx_srcu_gp_start_if_needed+0x10/0x10 [ 163.911156][ C1] ? __pfx_lock_release+0x10/0x10 [ 163.911165][ C1] ? __pfx_rcu_torture_barrier_cbf+0x10/0x10 [ 163.911188][ C1] __call_srcu+0x9f/0xe0 [ 163.911196][ C1] call_srcu+0x13/0x20 [ 163.911201][ C1] srcu_torture_call+0x1b/0x30 [ 163.911224][ C1] rcu_torture_barrier1cb+0x4a/0x60 [ 163.911247][ C1] __flush_smp_call_function_queue+0x267/0xca0 [ 163.911256][ C1] ? __pfx_rcu_torture_barrier1cb+0x10/0x10 [ 163.911281][ C1] generic_smp_call_function_single_interrupt+0x13/0x20 [ 163.911288][ C1] __sysvec_call_function_single+0x7d/0x280 [ 163.911295][ C1] sysvec_call_function_single+0x93/0xd0 [ 163.911302][ C1] </IRQ> [ 163.911304][ C1] <TASK> [ 163.911308][ C1] asm_sysvec_call_function_single+0x1b/0x20 [ 163.911313][ C1] RIP: 0010:default_idle+0x17/0x20 [ 163.911326][ C1] RSP: 0018:ffff888001997dc8 EFLAGS: 00000246 [ 163.911333][ C1] RAX: 0000000000000000 RBX: dffffc0000000000 RCX: ffffffffae618b51 [ 163.911337][ C1] RDX: 0000000000000000 RSI: ffffffffaea80920 RDI: ffffffffaec2de80 [ 163.911342][ C1] RBP: ffff888001997dc8 R08: 0000000000000001 R09: ffffed100d740cad [ 163.911346][ C1] R10: ffffed100d740cac R11: ffff88806ba06563 R12: 0000000000000001 [ 163.911350][ C1] R13: ffffffffafe460c0 R14: ffffffffafe460c0 R15: 0000000000000000 [ 163.911358][ C1] ? ct_kernel_exit.constprop.3+0x121/0x160 [ 163.911369][ C1] ? lockdep_hardirqs_on+0xc4/0x150 [ 163.911376][ C1] arch_cpu_idle+0x9/0x10 [ 163.911383][ C1] default_idle_call+0x7a/0xb0 [ 163.911390][ C1] do_idle+0x362/0x500 [ 163.911398][ C1] ? __pfx_do_idle+0x10/0x10 [ 163.911404][ C1] ? complete_with_flags+0x8b/0xb0 [ 163.911416][ C1] cpu_startup_entry+0x58/0x70 [ 163.911423][ C1] start_secondary+0x221/0x280 [ 163.911430][ C1] ? __pfx_start_secondary+0x10/0x10 [ 163.911440][ C1] secondary_startup_64_no_verify+0x17f/0x18b [ 163.911455][ C1] </TASK> This commit therefore use smp_call_on_cpu() instead of smp_call_function_single(), make rcu_torture_barrier1cb() invoked happens on task-context. Signed-off-by: Zqiang <qiang.zhang1211@gmail.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
2024-03-25 07:52:19 +00:00
static int rcu_torture_barrier1cb(void *rcu_void)
{
struct rcu_head *rhp = rcu_void;
cur_ops->call(rhp, rcu_torture_barrier_cbf);
rcutorture: Fix invalid context warning when enable srcu barrier testing When the torture_type is set srcu or srcud and cb_barrier is non-zero, running the rcutorture test will trigger the following warning: [ 163.910989][ C1] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [ 163.910994][ C1] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 [ 163.910999][ C1] preempt_count: 10001, expected: 0 [ 163.911002][ C1] RCU nest depth: 0, expected: 0 [ 163.911005][ C1] INFO: lockdep is turned off. [ 163.911007][ C1] irq event stamp: 30964 [ 163.911010][ C1] hardirqs last enabled at (30963): [<ffffffffabc7df52>] do_idle+0x362/0x500 [ 163.911018][ C1] hardirqs last disabled at (30964): [<ffffffffae616eff>] sysvec_call_function_single+0xf/0xd0 [ 163.911025][ C1] softirqs last enabled at (0): [<ffffffffabb6475f>] copy_process+0x16ff/0x6580 [ 163.911033][ C1] softirqs last disabled at (0): [<0000000000000000>] 0x0 [ 163.911038][ C1] Preemption disabled at: [ 163.911039][ C1] [<ffffffffacf1964b>] stack_depot_save_flags+0x24b/0x6c0 [ 163.911063][ C1] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 6.8.0-rc4-rt4-yocto-preempt-rt+ #3 1e39aa9a737dd024a3275c4f835a872f673a7d3a [ 163.911071][ C1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [ 163.911075][ C1] Call Trace: [ 163.911078][ C1] <IRQ> [ 163.911080][ C1] dump_stack_lvl+0x88/0xd0 [ 163.911089][ C1] dump_stack+0x10/0x20 [ 163.911095][ C1] __might_resched+0x36f/0x530 [ 163.911105][ C1] rt_spin_lock+0x82/0x1c0 [ 163.911112][ C1] spin_lock_irqsave_ssp_contention+0xb8/0x100 [ 163.911121][ C1] srcu_gp_start_if_needed+0x782/0xf00 [ 163.911128][ C1] ? _raw_spin_unlock_irqrestore+0x46/0x70 [ 163.911136][ C1] ? debug_object_active_state+0x336/0x470 [ 163.911148][ C1] ? __pfx_srcu_gp_start_if_needed+0x10/0x10 [ 163.911156][ C1] ? __pfx_lock_release+0x10/0x10 [ 163.911165][ C1] ? __pfx_rcu_torture_barrier_cbf+0x10/0x10 [ 163.911188][ C1] __call_srcu+0x9f/0xe0 [ 163.911196][ C1] call_srcu+0x13/0x20 [ 163.911201][ C1] srcu_torture_call+0x1b/0x30 [ 163.911224][ C1] rcu_torture_barrier1cb+0x4a/0x60 [ 163.911247][ C1] __flush_smp_call_function_queue+0x267/0xca0 [ 163.911256][ C1] ? __pfx_rcu_torture_barrier1cb+0x10/0x10 [ 163.911281][ C1] generic_smp_call_function_single_interrupt+0x13/0x20 [ 163.911288][ C1] __sysvec_call_function_single+0x7d/0x280 [ 163.911295][ C1] sysvec_call_function_single+0x93/0xd0 [ 163.911302][ C1] </IRQ> [ 163.911304][ C1] <TASK> [ 163.911308][ C1] asm_sysvec_call_function_single+0x1b/0x20 [ 163.911313][ C1] RIP: 0010:default_idle+0x17/0x20 [ 163.911326][ C1] RSP: 0018:ffff888001997dc8 EFLAGS: 00000246 [ 163.911333][ C1] RAX: 0000000000000000 RBX: dffffc0000000000 RCX: ffffffffae618b51 [ 163.911337][ C1] RDX: 0000000000000000 RSI: ffffffffaea80920 RDI: ffffffffaec2de80 [ 163.911342][ C1] RBP: ffff888001997dc8 R08: 0000000000000001 R09: ffffed100d740cad [ 163.911346][ C1] R10: ffffed100d740cac R11: ffff88806ba06563 R12: 0000000000000001 [ 163.911350][ C1] R13: ffffffffafe460c0 R14: ffffffffafe460c0 R15: 0000000000000000 [ 163.911358][ C1] ? ct_kernel_exit.constprop.3+0x121/0x160 [ 163.911369][ C1] ? lockdep_hardirqs_on+0xc4/0x150 [ 163.911376][ C1] arch_cpu_idle+0x9/0x10 [ 163.911383][ C1] default_idle_call+0x7a/0xb0 [ 163.911390][ C1] do_idle+0x362/0x500 [ 163.911398][ C1] ? __pfx_do_idle+0x10/0x10 [ 163.911404][ C1] ? complete_with_flags+0x8b/0xb0 [ 163.911416][ C1] cpu_startup_entry+0x58/0x70 [ 163.911423][ C1] start_secondary+0x221/0x280 [ 163.911430][ C1] ? __pfx_start_secondary+0x10/0x10 [ 163.911440][ C1] secondary_startup_64_no_verify+0x17f/0x18b [ 163.911455][ C1] </TASK> This commit therefore use smp_call_on_cpu() instead of smp_call_function_single(), make rcu_torture_barrier1cb() invoked happens on task-context. Signed-off-by: Zqiang <qiang.zhang1211@gmail.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
2024-03-25 07:52:19 +00:00
return 0;
}
/* kthread function to register callbacks used to test RCU barriers. */
static int rcu_torture_barrier_cbs(void *arg)
{
long myid = (long)arg;
bool lastphase = false;
bool newphase;
struct rcu_head rcu;
init_rcu_head_on_stack(&rcu);
VERBOSE_TOROUT_STRING("rcu_torture_barrier_cbs task started");
set_user_nice(current, MAX_NICE);
do {
wait_event(barrier_cbs_wq[myid],
(newphase =
smp_load_acquire(&barrier_phase)) != lastphase ||
torture_must_stop());
lastphase = newphase;
if (torture_must_stop())
break;
/*
* The above smp_load_acquire() ensures barrier_phase load
* is ordered before the following ->call().
*/
rcutorture: Fix invalid context warning when enable srcu barrier testing When the torture_type is set srcu or srcud and cb_barrier is non-zero, running the rcutorture test will trigger the following warning: [ 163.910989][ C1] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [ 163.910994][ C1] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 [ 163.910999][ C1] preempt_count: 10001, expected: 0 [ 163.911002][ C1] RCU nest depth: 0, expected: 0 [ 163.911005][ C1] INFO: lockdep is turned off. [ 163.911007][ C1] irq event stamp: 30964 [ 163.911010][ C1] hardirqs last enabled at (30963): [<ffffffffabc7df52>] do_idle+0x362/0x500 [ 163.911018][ C1] hardirqs last disabled at (30964): [<ffffffffae616eff>] sysvec_call_function_single+0xf/0xd0 [ 163.911025][ C1] softirqs last enabled at (0): [<ffffffffabb6475f>] copy_process+0x16ff/0x6580 [ 163.911033][ C1] softirqs last disabled at (0): [<0000000000000000>] 0x0 [ 163.911038][ C1] Preemption disabled at: [ 163.911039][ C1] [<ffffffffacf1964b>] stack_depot_save_flags+0x24b/0x6c0 [ 163.911063][ C1] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 6.8.0-rc4-rt4-yocto-preempt-rt+ #3 1e39aa9a737dd024a3275c4f835a872f673a7d3a [ 163.911071][ C1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [ 163.911075][ C1] Call Trace: [ 163.911078][ C1] <IRQ> [ 163.911080][ C1] dump_stack_lvl+0x88/0xd0 [ 163.911089][ C1] dump_stack+0x10/0x20 [ 163.911095][ C1] __might_resched+0x36f/0x530 [ 163.911105][ C1] rt_spin_lock+0x82/0x1c0 [ 163.911112][ C1] spin_lock_irqsave_ssp_contention+0xb8/0x100 [ 163.911121][ C1] srcu_gp_start_if_needed+0x782/0xf00 [ 163.911128][ C1] ? _raw_spin_unlock_irqrestore+0x46/0x70 [ 163.911136][ C1] ? debug_object_active_state+0x336/0x470 [ 163.911148][ C1] ? __pfx_srcu_gp_start_if_needed+0x10/0x10 [ 163.911156][ C1] ? __pfx_lock_release+0x10/0x10 [ 163.911165][ C1] ? __pfx_rcu_torture_barrier_cbf+0x10/0x10 [ 163.911188][ C1] __call_srcu+0x9f/0xe0 [ 163.911196][ C1] call_srcu+0x13/0x20 [ 163.911201][ C1] srcu_torture_call+0x1b/0x30 [ 163.911224][ C1] rcu_torture_barrier1cb+0x4a/0x60 [ 163.911247][ C1] __flush_smp_call_function_queue+0x267/0xca0 [ 163.911256][ C1] ? __pfx_rcu_torture_barrier1cb+0x10/0x10 [ 163.911281][ C1] generic_smp_call_function_single_interrupt+0x13/0x20 [ 163.911288][ C1] __sysvec_call_function_single+0x7d/0x280 [ 163.911295][ C1] sysvec_call_function_single+0x93/0xd0 [ 163.911302][ C1] </IRQ> [ 163.911304][ C1] <TASK> [ 163.911308][ C1] asm_sysvec_call_function_single+0x1b/0x20 [ 163.911313][ C1] RIP: 0010:default_idle+0x17/0x20 [ 163.911326][ C1] RSP: 0018:ffff888001997dc8 EFLAGS: 00000246 [ 163.911333][ C1] RAX: 0000000000000000 RBX: dffffc0000000000 RCX: ffffffffae618b51 [ 163.911337][ C1] RDX: 0000000000000000 RSI: ffffffffaea80920 RDI: ffffffffaec2de80 [ 163.911342][ C1] RBP: ffff888001997dc8 R08: 0000000000000001 R09: ffffed100d740cad [ 163.911346][ C1] R10: ffffed100d740cac R11: ffff88806ba06563 R12: 0000000000000001 [ 163.911350][ C1] R13: ffffffffafe460c0 R14: ffffffffafe460c0 R15: 0000000000000000 [ 163.911358][ C1] ? ct_kernel_exit.constprop.3+0x121/0x160 [ 163.911369][ C1] ? lockdep_hardirqs_on+0xc4/0x150 [ 163.911376][ C1] arch_cpu_idle+0x9/0x10 [ 163.911383][ C1] default_idle_call+0x7a/0xb0 [ 163.911390][ C1] do_idle+0x362/0x500 [ 163.911398][ C1] ? __pfx_do_idle+0x10/0x10 [ 163.911404][ C1] ? complete_with_flags+0x8b/0xb0 [ 163.911416][ C1] cpu_startup_entry+0x58/0x70 [ 163.911423][ C1] start_secondary+0x221/0x280 [ 163.911430][ C1] ? __pfx_start_secondary+0x10/0x10 [ 163.911440][ C1] secondary_startup_64_no_verify+0x17f/0x18b [ 163.911455][ C1] </TASK> This commit therefore use smp_call_on_cpu() instead of smp_call_function_single(), make rcu_torture_barrier1cb() invoked happens on task-context. Signed-off-by: Zqiang <qiang.zhang1211@gmail.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
2024-03-25 07:52:19 +00:00
if (smp_call_on_cpu(myid, rcu_torture_barrier1cb, &rcu, 1))
cur_ops->call(&rcu, rcu_torture_barrier_cbf);
rcutorture: Fix invalid context warning when enable srcu barrier testing When the torture_type is set srcu or srcud and cb_barrier is non-zero, running the rcutorture test will trigger the following warning: [ 163.910989][ C1] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [ 163.910994][ C1] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 [ 163.910999][ C1] preempt_count: 10001, expected: 0 [ 163.911002][ C1] RCU nest depth: 0, expected: 0 [ 163.911005][ C1] INFO: lockdep is turned off. [ 163.911007][ C1] irq event stamp: 30964 [ 163.911010][ C1] hardirqs last enabled at (30963): [<ffffffffabc7df52>] do_idle+0x362/0x500 [ 163.911018][ C1] hardirqs last disabled at (30964): [<ffffffffae616eff>] sysvec_call_function_single+0xf/0xd0 [ 163.911025][ C1] softirqs last enabled at (0): [<ffffffffabb6475f>] copy_process+0x16ff/0x6580 [ 163.911033][ C1] softirqs last disabled at (0): [<0000000000000000>] 0x0 [ 163.911038][ C1] Preemption disabled at: [ 163.911039][ C1] [<ffffffffacf1964b>] stack_depot_save_flags+0x24b/0x6c0 [ 163.911063][ C1] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 6.8.0-rc4-rt4-yocto-preempt-rt+ #3 1e39aa9a737dd024a3275c4f835a872f673a7d3a [ 163.911071][ C1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [ 163.911075][ C1] Call Trace: [ 163.911078][ C1] <IRQ> [ 163.911080][ C1] dump_stack_lvl+0x88/0xd0 [ 163.911089][ C1] dump_stack+0x10/0x20 [ 163.911095][ C1] __might_resched+0x36f/0x530 [ 163.911105][ C1] rt_spin_lock+0x82/0x1c0 [ 163.911112][ C1] spin_lock_irqsave_ssp_contention+0xb8/0x100 [ 163.911121][ C1] srcu_gp_start_if_needed+0x782/0xf00 [ 163.911128][ C1] ? _raw_spin_unlock_irqrestore+0x46/0x70 [ 163.911136][ C1] ? debug_object_active_state+0x336/0x470 [ 163.911148][ C1] ? __pfx_srcu_gp_start_if_needed+0x10/0x10 [ 163.911156][ C1] ? __pfx_lock_release+0x10/0x10 [ 163.911165][ C1] ? __pfx_rcu_torture_barrier_cbf+0x10/0x10 [ 163.911188][ C1] __call_srcu+0x9f/0xe0 [ 163.911196][ C1] call_srcu+0x13/0x20 [ 163.911201][ C1] srcu_torture_call+0x1b/0x30 [ 163.911224][ C1] rcu_torture_barrier1cb+0x4a/0x60 [ 163.911247][ C1] __flush_smp_call_function_queue+0x267/0xca0 [ 163.911256][ C1] ? __pfx_rcu_torture_barrier1cb+0x10/0x10 [ 163.911281][ C1] generic_smp_call_function_single_interrupt+0x13/0x20 [ 163.911288][ C1] __sysvec_call_function_single+0x7d/0x280 [ 163.911295][ C1] sysvec_call_function_single+0x93/0xd0 [ 163.911302][ C1] </IRQ> [ 163.911304][ C1] <TASK> [ 163.911308][ C1] asm_sysvec_call_function_single+0x1b/0x20 [ 163.911313][ C1] RIP: 0010:default_idle+0x17/0x20 [ 163.911326][ C1] RSP: 0018:ffff888001997dc8 EFLAGS: 00000246 [ 163.911333][ C1] RAX: 0000000000000000 RBX: dffffc0000000000 RCX: ffffffffae618b51 [ 163.911337][ C1] RDX: 0000000000000000 RSI: ffffffffaea80920 RDI: ffffffffaec2de80 [ 163.911342][ C1] RBP: ffff888001997dc8 R08: 0000000000000001 R09: ffffed100d740cad [ 163.911346][ C1] R10: ffffed100d740cac R11: ffff88806ba06563 R12: 0000000000000001 [ 163.911350][ C1] R13: ffffffffafe460c0 R14: ffffffffafe460c0 R15: 0000000000000000 [ 163.911358][ C1] ? ct_kernel_exit.constprop.3+0x121/0x160 [ 163.911369][ C1] ? lockdep_hardirqs_on+0xc4/0x150 [ 163.911376][ C1] arch_cpu_idle+0x9/0x10 [ 163.911383][ C1] default_idle_call+0x7a/0xb0 [ 163.911390][ C1] do_idle+0x362/0x500 [ 163.911398][ C1] ? __pfx_do_idle+0x10/0x10 [ 163.911404][ C1] ? complete_with_flags+0x8b/0xb0 [ 163.911416][ C1] cpu_startup_entry+0x58/0x70 [ 163.911423][ C1] start_secondary+0x221/0x280 [ 163.911430][ C1] ? __pfx_start_secondary+0x10/0x10 [ 163.911440][ C1] secondary_startup_64_no_verify+0x17f/0x18b [ 163.911455][ C1] </TASK> This commit therefore use smp_call_on_cpu() instead of smp_call_function_single(), make rcu_torture_barrier1cb() invoked happens on task-context. Signed-off-by: Zqiang <qiang.zhang1211@gmail.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
2024-03-25 07:52:19 +00:00
if (atomic_dec_and_test(&barrier_cbs_count))
wake_up(&barrier_wq);
} while (!torture_must_stop());
if (cur_ops->cb_barrier != NULL)
cur_ops->cb_barrier();
destroy_rcu_head_on_stack(&rcu);
torture_kthread_stopping("rcu_torture_barrier_cbs");
return 0;
}
/* kthread function to drive and coordinate RCU barrier testing. */
static int rcu_torture_barrier(void *arg)
{
int i;
VERBOSE_TOROUT_STRING("rcu_torture_barrier task starting");
do {
atomic_set(&barrier_cbs_invoked, 0);
atomic_set(&barrier_cbs_count, n_barrier_cbs);
/* Ensure barrier_phase ordered after prior assignments. */
smp_store_release(&barrier_phase, !barrier_phase);
for (i = 0; i < n_barrier_cbs; i++)
wake_up(&barrier_cbs_wq[i]);
wait_event(barrier_wq,
atomic_read(&barrier_cbs_count) == 0 ||
torture_must_stop());
if (torture_must_stop())
break;
n_barrier_attempts++;
cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */
if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) {
n_rcu_torture_barrier_error++;
pr_err("barrier_cbs_invoked = %d, n_barrier_cbs = %d\n",
atomic_read(&barrier_cbs_invoked),
n_barrier_cbs);
WARN_ON(1);
// Wait manually for the remaining callbacks
i = 0;
do {
if (WARN_ON(i++ > HZ))
i = INT_MIN;
schedule_timeout_interruptible(1);
cur_ops->cb_barrier();
} while (atomic_read(&barrier_cbs_invoked) !=
n_barrier_cbs &&
!torture_must_stop());
smp_mb(); // Can't trust ordering if broken.
if (!torture_must_stop())
pr_err("Recovered: barrier_cbs_invoked = %d\n",
atomic_read(&barrier_cbs_invoked));
} else {
n_barrier_successes++;
}
schedule_timeout_interruptible(HZ / 10);
} while (!torture_must_stop());
torture_kthread_stopping("rcu_torture_barrier");
return 0;
}
/* Initialize RCU barrier testing. */
static int rcu_torture_barrier_init(void)
{
int i;
int ret;
if (n_barrier_cbs <= 0)
return 0;
if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) {
pr_alert("%s" TORTURE_FLAG
" Call or barrier ops missing for %s,\n",
torture_type, cur_ops->name);
pr_alert("%s" TORTURE_FLAG
" RCU barrier testing omitted from run.\n",
torture_type);
return 0;
}
atomic_set(&barrier_cbs_count, 0);
atomic_set(&barrier_cbs_invoked, 0);
barrier_cbs_tasks =
kcalloc(n_barrier_cbs, sizeof(barrier_cbs_tasks[0]),
GFP_KERNEL);
barrier_cbs_wq =
kcalloc(n_barrier_cbs, sizeof(barrier_cbs_wq[0]), GFP_KERNEL);
if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
return -ENOMEM;
for (i = 0; i < n_barrier_cbs; i++) {
init_waitqueue_head(&barrier_cbs_wq[i]);
ret = torture_create_kthread(rcu_torture_barrier_cbs,
(void *)(long)i,
barrier_cbs_tasks[i]);
if (ret)
return ret;
}
return torture_create_kthread(rcu_torture_barrier, NULL, barrier_task);
}
/* Clean up after RCU barrier testing. */
static void rcu_torture_barrier_cleanup(void)
{
int i;
torture_stop_kthread(rcu_torture_barrier, barrier_task);
if (barrier_cbs_tasks != NULL) {
for (i = 0; i < n_barrier_cbs; i++)
torture_stop_kthread(rcu_torture_barrier_cbs,
barrier_cbs_tasks[i]);
kfree(barrier_cbs_tasks);
barrier_cbs_tasks = NULL;
}
if (barrier_cbs_wq != NULL) {
kfree(barrier_cbs_wq);
barrier_cbs_wq = NULL;
}
}
static bool rcu_torture_can_boost(void)
{
static int boost_warn_once;
int prio;
if (!(test_boost == 1 && cur_ops->can_boost) && test_boost != 2)
return false;
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
if (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)
return false;
prio = rcu_get_gp_kthreads_prio();
if (!prio)
return false;
if (prio < 2) {
rcutorture: Judge RCU priority boosting on grace periods, not callbacks Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2021-03-30 23:30:32 +00:00
if (boost_warn_once == 1)
return false;
pr_alert("%s: WARN: RCU kthread priority too low to test boosting. Skipping RCU boost test. Try passing rcutree.kthread_prio > 1 on the kernel command line.\n", KBUILD_MODNAME);
boost_warn_once = 1;
return false;
}
return true;
}
static bool read_exit_child_stop;
static bool read_exit_child_stopped;
static wait_queue_head_t read_exit_wq;
// Child kthread which just does an rcutorture reader and exits.
static int rcu_torture_read_exit_child(void *trsp_in)
{
struct torture_random_state *trsp = trsp_in;
set_user_nice(current, MAX_NICE);
// Minimize time between reading and exiting.
while (!kthread_should_stop())
schedule_timeout_uninterruptible(HZ / 20);
(void)rcu_torture_one_read(trsp, -1);
return 0;
}
// Parent kthread which creates and destroys read-exit child kthreads.
static int rcu_torture_read_exit(void *unused)
{
bool errexit = false;
int i;
struct task_struct *tsp;
DEFINE_TORTURE_RANDOM(trs);
// Allocate and initialize.
set_user_nice(current, MAX_NICE);
VERBOSE_TOROUT_STRING("rcu_torture_read_exit: Start of test");
// Each pass through this loop does one read-exit episode.
do {
VERBOSE_TOROUT_STRING("rcu_torture_read_exit: Start of episode");
for (i = 0; i < read_exit_burst; i++) {
if (READ_ONCE(read_exit_child_stop))
break;
stutter_wait("rcu_torture_read_exit");
// Spawn child.
tsp = kthread_run(rcu_torture_read_exit_child,
&trs, "%s", "rcu_torture_read_exit_child");
if (IS_ERR(tsp)) {
TOROUT_ERRSTRING("out of memory");
errexit = true;
break;
}
cond_resched();
kthread_stop(tsp);
n_read_exits++;
}
VERBOSE_TOROUT_STRING("rcu_torture_read_exit: End of episode");
rcu_barrier(); // Wait for task_struct free, avoid OOM.
i = 0;
for (; !errexit && !READ_ONCE(read_exit_child_stop) && i < read_exit_delay; i++)
schedule_timeout_uninterruptible(HZ);
} while (!errexit && !READ_ONCE(read_exit_child_stop));
// Clean up and exit.
smp_store_release(&read_exit_child_stopped, true); // After reaping.
smp_mb(); // Store before wakeup.
wake_up(&read_exit_wq);
while (!torture_must_stop())
schedule_timeout_uninterruptible(HZ / 20);
torture_kthread_stopping("rcu_torture_read_exit");
return 0;
}
static int rcu_torture_read_exit_init(void)
{
if (read_exit_burst <= 0)
return 0;
init_waitqueue_head(&read_exit_wq);
read_exit_child_stop = false;
read_exit_child_stopped = false;
return torture_create_kthread(rcu_torture_read_exit, NULL,
read_exit_task);
}
static void rcu_torture_read_exit_cleanup(void)
{
if (!read_exit_task)
return;
WRITE_ONCE(read_exit_child_stop, true);
smp_mb(); // Above write before wait.
wait_event(read_exit_wq, smp_load_acquire(&read_exit_child_stopped));
torture_stop_kthread(rcutorture_read_exit, read_exit_task);
}
static void rcutorture_test_nmis(int n)
{
#if IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)
int cpu;
int dumpcpu;
int i;
for (i = 0; i < n; i++) {
preempt_disable();
cpu = smp_processor_id();
dumpcpu = cpu + 1;
if (dumpcpu >= nr_cpu_ids)
dumpcpu = 0;
pr_alert("%s: CPU %d invoking dump_cpu_task(%d)\n", __func__, cpu, dumpcpu);
dump_cpu_task(dumpcpu);
preempt_enable();
schedule_timeout_uninterruptible(15 * HZ);
}
#else // #if IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)
WARN_ONCE(n, "Non-zero rcutorture.test_nmis=%d permitted only when rcutorture is built in.\n", test_nmis);
#endif // #else // #if IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)
}
static enum cpuhp_state rcutor_hp;
static void
rcu_torture_cleanup(void)
{
int firsttime;
int flags = 0;
unsigned long gp_seq = 0;
int i;
if (torture_cleanup_begin()) {
if (cur_ops->cb_barrier != NULL) {
pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
cur_ops->cb_barrier();
}
if (cur_ops->gp_slow_unregister)
cur_ops->gp_slow_unregister(NULL);
return;
}
if (!cur_ops) {
torture_cleanup_end();
return;
}
rcutorture_test_nmis(test_nmis);
if (cur_ops->gp_kthread_dbg)
cur_ops->gp_kthread_dbg();
rcu_torture_read_exit_cleanup();
rcu_torture_barrier_cleanup();
rcu_torture_fwd_prog_cleanup();
torture_stop_kthread(rcu_torture_stall, stall_task);
torture_stop_kthread(rcu_torture_writer, writer_task);
if (nocb_tasks) {
for (i = 0; i < nrealnocbers; i++)
torture_stop_kthread(rcu_nocb_toggle, nocb_tasks[i]);
kfree(nocb_tasks);
nocb_tasks = NULL;
}
if (reader_tasks) {
for (i = 0; i < nrealreaders; i++)
torture_stop_kthread(rcu_torture_reader,
reader_tasks[i]);
kfree(reader_tasks);
reader_tasks = NULL;
}
kfree(rcu_torture_reader_mbchk);
rcu_torture_reader_mbchk = NULL;
if (fakewriter_tasks) {
for (i = 0; i < nfakewriters; i++)
torture_stop_kthread(rcu_torture_fakewriter,
fakewriter_tasks[i]);
kfree(fakewriter_tasks);
fakewriter_tasks = NULL;
}
if (cur_ops->get_gp_data)
cur_ops->get_gp_data(&flags, &gp_seq);
pr_alert("%s: End-test grace-period state: g%ld f%#x total-gps=%ld\n",
cur_ops->name, (long)gp_seq, flags,
rcutorture_seq_diff(gp_seq, start_gp_seq));
torture_stop_kthread(rcu_torture_stats, stats_task);
torture_stop_kthread(rcu_torture_fqs, fqs_task);
if (rcu_torture_can_boost() && rcutor_hp >= 0)
cpuhp_remove_state(rcutor_hp);
/*
* Wait for all RCU callbacks to fire, then do torture-type-specific
* cleanup operations.
*/
if (cur_ops->cb_barrier != NULL) {
pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
cur_ops->cb_barrier();
}
if (cur_ops->cleanup != NULL)
cur_ops->cleanup();
rcu_torture_mem_dump_obj();
rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
if (err_segs_recorded) {
pr_alert("Failure/close-call rcutorture reader segments:\n");
if (rt_read_nsegs == 0)
pr_alert("\t: No segments recorded!!!\n");
firsttime = 1;
for (i = 0; i < rt_read_nsegs; i++) {
pr_alert("\t%d: %#x ", i, err_segs[i].rt_readstate);
if (err_segs[i].rt_delay_jiffies != 0) {
pr_cont("%s%ldjiffies", firsttime ? "" : "+",
err_segs[i].rt_delay_jiffies);
firsttime = 0;
}
if (err_segs[i].rt_delay_ms != 0) {
pr_cont("%s%ldms", firsttime ? "" : "+",
err_segs[i].rt_delay_ms);
firsttime = 0;
}
if (err_segs[i].rt_delay_us != 0) {
pr_cont("%s%ldus", firsttime ? "" : "+",
err_segs[i].rt_delay_us);
firsttime = 0;
}
pr_cont("%s\n",
err_segs[i].rt_preempted ? "preempted" : "");
}
}
if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
else if (torture_onoff_failures())
rcu_torture_print_module_parms(cur_ops,
"End of test: RCU_HOTPLUG");
else
rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
torture_cleanup_end();
if (cur_ops->gp_slow_unregister)
cur_ops->gp_slow_unregister(NULL);
}
static void rcu_torture_leak_cb(struct rcu_head *rhp)
{
}
static void rcu_torture_err_cb(struct rcu_head *rhp)
{
/*
* This -might- happen due to race conditions, but is unlikely.
* The scenario that leads to this happening is that the
* first of the pair of duplicate callbacks is queued,
* someone else starts a grace period that includes that
* callback, then the second of the pair must wait for the
* next grace period. Unlikely, but can happen. If it
* does happen, the debug-objects subsystem won't have splatted.
*/
pr_alert("%s: duplicated callback was invoked.\n", KBUILD_MODNAME);
}
/*
* Verify that double-free causes debug-objects to complain, but only
* if CONFIG_DEBUG_OBJECTS_RCU_HEAD=y. Otherwise, say that the test
* cannot be carried out.
*/
static void rcu_test_debug_objects(void)
{
struct rcu_head rh1;
struct rcu_head rh2;
int idx;
if (!IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) {
pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_%s()\n",
KBUILD_MODNAME, cur_ops->name);
return;
}
if (WARN_ON_ONCE(cur_ops->debug_objects &&
(!cur_ops->call || !cur_ops->cb_barrier)))
return;
struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
init_rcu_head_on_stack(&rh1);
init_rcu_head_on_stack(&rh2);
pr_alert("%s: WARN: Duplicate call_%s() test starting.\n", KBUILD_MODNAME, cur_ops->name);
/* Try to queue the rh2 pair of callbacks for the same grace period. */
idx = cur_ops->readlock(); /* Make it impossible to finish a grace period. */
cur_ops->call(&rh1, rcu_torture_leak_cb); /* Start grace period. */
cur_ops->call(&rh2, rcu_torture_leak_cb);
cur_ops->call(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
if (rhp) {
cur_ops->call(rhp, rcu_torture_leak_cb);
cur_ops->call(rhp, rcu_torture_err_cb); /* Another duplicate callback. */
}
cur_ops->readunlock(idx);
/* Wait for them all to get done so we can safely return. */
cur_ops->cb_barrier();
pr_alert("%s: WARN: Duplicate call_%s() test complete.\n", KBUILD_MODNAME, cur_ops->name);
destroy_rcu_head_on_stack(&rh1);
destroy_rcu_head_on_stack(&rh2);
kfree(rhp);
}
static void rcutorture_sync(void)
{
static unsigned long n;
if (cur_ops->sync && !(++n & 0xfff))
cur_ops->sync();
}
static DEFINE_MUTEX(mut0);
static DEFINE_MUTEX(mut1);
static DEFINE_MUTEX(mut2);
static DEFINE_MUTEX(mut3);
static DEFINE_MUTEX(mut4);
static DEFINE_MUTEX(mut5);
static DEFINE_MUTEX(mut6);
static DEFINE_MUTEX(mut7);
static DEFINE_MUTEX(mut8);
static DEFINE_MUTEX(mut9);
static DECLARE_RWSEM(rwsem0);
static DECLARE_RWSEM(rwsem1);
static DECLARE_RWSEM(rwsem2);
static DECLARE_RWSEM(rwsem3);
static DECLARE_RWSEM(rwsem4);
static DECLARE_RWSEM(rwsem5);
static DECLARE_RWSEM(rwsem6);
static DECLARE_RWSEM(rwsem7);
static DECLARE_RWSEM(rwsem8);
static DECLARE_RWSEM(rwsem9);
DEFINE_STATIC_SRCU(srcu0);
DEFINE_STATIC_SRCU(srcu1);
DEFINE_STATIC_SRCU(srcu2);
DEFINE_STATIC_SRCU(srcu3);
DEFINE_STATIC_SRCU(srcu4);
DEFINE_STATIC_SRCU(srcu5);
DEFINE_STATIC_SRCU(srcu6);
DEFINE_STATIC_SRCU(srcu7);
DEFINE_STATIC_SRCU(srcu8);
DEFINE_STATIC_SRCU(srcu9);
static int srcu_lockdep_next(const char *f, const char *fl, const char *fs, const char *fu, int i,
int cyclelen, int deadlock)
{
int j = i + 1;
if (j >= cyclelen)
j = deadlock ? 0 : -1;
if (j >= 0)
pr_info("%s: %s(%d), %s(%d), %s(%d)\n", f, fl, i, fs, j, fu, i);
else
pr_info("%s: %s(%d), %s(%d)\n", f, fl, i, fu, i);
return j;
}
// Test lockdep on SRCU-based deadlock scenarios.
static void rcu_torture_init_srcu_lockdep(void)
{
int cyclelen;
int deadlock;
bool err = false;
int i;
int j;
int idx;
struct mutex *muts[] = { &mut0, &mut1, &mut2, &mut3, &mut4,
&mut5, &mut6, &mut7, &mut8, &mut9 };
struct rw_semaphore *rwsems[] = { &rwsem0, &rwsem1, &rwsem2, &rwsem3, &rwsem4,
&rwsem5, &rwsem6, &rwsem7, &rwsem8, &rwsem9 };
struct srcu_struct *srcus[] = { &srcu0, &srcu1, &srcu2, &srcu3, &srcu4,
&srcu5, &srcu6, &srcu7, &srcu8, &srcu9 };
int testtype;
if (!test_srcu_lockdep)
return;
deadlock = test_srcu_lockdep / 1000;
testtype = (test_srcu_lockdep / 10) % 100;
cyclelen = test_srcu_lockdep % 10;
WARN_ON_ONCE(ARRAY_SIZE(muts) != ARRAY_SIZE(srcus));
if (WARN_ONCE(deadlock != !!deadlock,
"%s: test_srcu_lockdep=%d and deadlock digit %d must be zero or one.\n",
__func__, test_srcu_lockdep, deadlock))
err = true;
if (WARN_ONCE(cyclelen <= 0,
"%s: test_srcu_lockdep=%d and cycle-length digit %d must be greater than zero.\n",
__func__, test_srcu_lockdep, cyclelen))
err = true;
if (err)
goto err_out;
if (testtype == 0) {
pr_info("%s: test_srcu_lockdep = %05d: SRCU %d-way %sdeadlock.\n",
__func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-");
if (deadlock && cyclelen == 1)
pr_info("%s: Expect hang.\n", __func__);
for (i = 0; i < cyclelen; i++) {
j = srcu_lockdep_next(__func__, "srcu_read_lock", "synchronize_srcu",
"srcu_read_unlock", i, cyclelen, deadlock);
idx = srcu_read_lock(srcus[i]);
if (j >= 0)
synchronize_srcu(srcus[j]);
srcu_read_unlock(srcus[i], idx);
}
return;
}
if (testtype == 1) {
pr_info("%s: test_srcu_lockdep = %05d: SRCU/mutex %d-way %sdeadlock.\n",
__func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-");
for (i = 0; i < cyclelen; i++) {
pr_info("%s: srcu_read_lock(%d), mutex_lock(%d), mutex_unlock(%d), srcu_read_unlock(%d)\n",
__func__, i, i, i, i);
idx = srcu_read_lock(srcus[i]);
mutex_lock(muts[i]);
mutex_unlock(muts[i]);
srcu_read_unlock(srcus[i], idx);
j = srcu_lockdep_next(__func__, "mutex_lock", "synchronize_srcu",
"mutex_unlock", i, cyclelen, deadlock);
mutex_lock(muts[i]);
if (j >= 0)
synchronize_srcu(srcus[j]);
mutex_unlock(muts[i]);
}
return;
}
if (testtype == 2) {
pr_info("%s: test_srcu_lockdep = %05d: SRCU/rwsem %d-way %sdeadlock.\n",
__func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-");
for (i = 0; i < cyclelen; i++) {
pr_info("%s: srcu_read_lock(%d), down_read(%d), up_read(%d), srcu_read_unlock(%d)\n",
__func__, i, i, i, i);
idx = srcu_read_lock(srcus[i]);
down_read(rwsems[i]);
up_read(rwsems[i]);
srcu_read_unlock(srcus[i], idx);
j = srcu_lockdep_next(__func__, "down_write", "synchronize_srcu",
"up_write", i, cyclelen, deadlock);
down_write(rwsems[i]);
if (j >= 0)
synchronize_srcu(srcus[j]);
up_write(rwsems[i]);
}
return;
}
#ifdef CONFIG_TASKS_TRACE_RCU
if (testtype == 3) {
pr_info("%s: test_srcu_lockdep = %05d: SRCU and Tasks Trace RCU %d-way %sdeadlock.\n",
__func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-");
if (deadlock && cyclelen == 1)
pr_info("%s: Expect hang.\n", __func__);
for (i = 0; i < cyclelen; i++) {
char *fl = i == 0 ? "rcu_read_lock_trace" : "srcu_read_lock";
char *fs = i == cyclelen - 1 ? "synchronize_rcu_tasks_trace"
: "synchronize_srcu";
char *fu = i == 0 ? "rcu_read_unlock_trace" : "srcu_read_unlock";
j = srcu_lockdep_next(__func__, fl, fs, fu, i, cyclelen, deadlock);
if (i == 0)
rcu_read_lock_trace();
else
idx = srcu_read_lock(srcus[i]);
if (j >= 0) {
if (i == cyclelen - 1)
synchronize_rcu_tasks_trace();
else
synchronize_srcu(srcus[j]);
}
if (i == 0)
rcu_read_unlock_trace();
else
srcu_read_unlock(srcus[i], idx);
}
return;
}
#endif // #ifdef CONFIG_TASKS_TRACE_RCU
err_out:
pr_info("%s: test_srcu_lockdep = %05d does nothing.\n", __func__, test_srcu_lockdep);
pr_info("%s: test_srcu_lockdep = DNNL.\n", __func__);
pr_info("%s: D: Deadlock if nonzero.\n", __func__);
pr_info("%s: NN: Test number, 0=SRCU, 1=SRCU/mutex, 2=SRCU/rwsem, 3=SRCU/Tasks Trace RCU.\n", __func__);
pr_info("%s: L: Cycle length.\n", __func__);
if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU))
pr_info("%s: NN=3 disallowed because kernel is built with CONFIG_TASKS_TRACE_RCU=n\n", __func__);
}
static int __init
rcu_torture_init(void)
{
long i;
int cpu;
int firsterr = 0;
int flags = 0;
unsigned long gp_seq = 0;
static struct rcu_torture_ops *torture_ops[] = {
&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops,
TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
&trivial_ops,
};
if (!torture_init_begin(torture_type, verbose))
return -EBUSY;
/* Process args and tell the world that the torturer is on the job. */
for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
cur_ops = torture_ops[i];
if (strcmp(torture_type, cur_ops->name) == 0)
break;
}
if (i == ARRAY_SIZE(torture_ops)) {
pr_alert("rcu-torture: invalid torture type: \"%s\"\n",
torture_type);
pr_alert("rcu-torture types:");
for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
pr_cont(" %s", torture_ops[i]->name);
pr_cont("\n");
firsterr = -EINVAL;
cur_ops = NULL;
goto unwind;
}
if (cur_ops->fqs == NULL && fqs_duration != 0) {
pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
fqs_duration = 0;
}
if (nocbs_nthreads != 0 && (cur_ops != &rcu_ops ||
!IS_ENABLED(CONFIG_RCU_NOCB_CPU))) {
pr_alert("rcu-torture types: %s and CONFIG_RCU_NOCB_CPU=%d, nocb toggle disabled.\n",
cur_ops->name, IS_ENABLED(CONFIG_RCU_NOCB_CPU));
nocbs_nthreads = 0;
}
if (cur_ops->init)
cur_ops->init();
rcu_torture_init_srcu_lockdep();
if (nreaders >= 0) {
nrealreaders = nreaders;
} else {
nrealreaders = num_online_cpus() - 2 - nreaders;
if (nrealreaders <= 0)
nrealreaders = 1;
}
rcu_torture_print_module_parms(cur_ops, "Start of test");
if (cur_ops->get_gp_data)
cur_ops->get_gp_data(&flags, &gp_seq);
start_gp_seq = gp_seq;
pr_alert("%s: Start-test grace-period state: g%ld f%#x\n",
cur_ops->name, (long)gp_seq, flags);
/* Set up the freelist. */
INIT_LIST_HEAD(&rcu_torture_freelist);
for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++) {
rcu_tortures[i].rtort_mbtest = 0;
list_add_tail(&rcu_tortures[i].rtort_free,
&rcu_torture_freelist);
}
/* Initialize the statistics so that each run gets its own numbers. */
rcu_torture_current = NULL;
rcu_torture_current_version = 0;
atomic_set(&n_rcu_torture_alloc, 0);
atomic_set(&n_rcu_torture_alloc_fail, 0);
atomic_set(&n_rcu_torture_free, 0);
atomic_set(&n_rcu_torture_mberror, 0);
atomic_set(&n_rcu_torture_mbchk_fail, 0);
atomic_set(&n_rcu_torture_mbchk_tries, 0);
atomic_set(&n_rcu_torture_error, 0);
n_rcu_torture_barrier_error = 0;
n_rcu_torture_boost_ktrerror = 0;
n_rcu_torture_boost_failure = 0;
n_rcu_torture_boosts = 0;
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
atomic_set(&rcu_torture_wcount[i], 0);
for_each_possible_cpu(cpu) {
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
per_cpu(rcu_torture_count, cpu)[i] = 0;
per_cpu(rcu_torture_batch, cpu)[i] = 0;
}
}
err_segs_recorded = 0;
rt_read_nsegs = 0;
/* Start up the kthreads. */
rcu_torture_write_types();
firsterr = torture_create_kthread(rcu_torture_writer, NULL,
writer_task);
if (torture_init_error(firsterr))
goto unwind;
if (nfakewriters > 0) {
fakewriter_tasks = kcalloc(nfakewriters,
sizeof(fakewriter_tasks[0]),
GFP_KERNEL);
if (fakewriter_tasks == NULL) {
TOROUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
}
for (i = 0; i < nfakewriters; i++) {
firsterr = torture_create_kthread(rcu_torture_fakewriter,
NULL, fakewriter_tasks[i]);
if (torture_init_error(firsterr))
goto unwind;
}
reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
GFP_KERNEL);
rcu_torture_reader_mbchk = kcalloc(nrealreaders, sizeof(*rcu_torture_reader_mbchk),
GFP_KERNEL);
if (!reader_tasks || !rcu_torture_reader_mbchk) {
TOROUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
for (i = 0; i < nrealreaders; i++) {
rcu_torture_reader_mbchk[i].rtc_chkrdr = -1;
firsterr = torture_create_kthread(rcu_torture_reader, (void *)i,
reader_tasks[i]);
if (torture_init_error(firsterr))
goto unwind;
}
nrealnocbers = nocbs_nthreads;
if (WARN_ON(nrealnocbers < 0))
nrealnocbers = 1;
if (WARN_ON(nocbs_toggle < 0))
nocbs_toggle = HZ;
if (nrealnocbers > 0) {
nocb_tasks = kcalloc(nrealnocbers, sizeof(nocb_tasks[0]), GFP_KERNEL);
if (nocb_tasks == NULL) {
TOROUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
} else {
nocb_tasks = NULL;
}
for (i = 0; i < nrealnocbers; i++) {
firsterr = torture_create_kthread(rcu_nocb_toggle, NULL, nocb_tasks[i]);
if (torture_init_error(firsterr))
goto unwind;
}
if (stat_interval > 0) {
firsterr = torture_create_kthread(rcu_torture_stats, NULL,
stats_task);
if (torture_init_error(firsterr))
goto unwind;
}
if (test_no_idle_hz && shuffle_interval > 0) {
firsterr = torture_shuffle_init(shuffle_interval * HZ);
if (torture_init_error(firsterr))
goto unwind;
}
if (stutter < 0)
stutter = 0;
if (stutter) {
int t;
t = cur_ops->stall_dur ? cur_ops->stall_dur() : stutter * HZ;
firsterr = torture_stutter_init(stutter * HZ, t);
if (torture_init_error(firsterr))
goto unwind;
}
if (fqs_duration < 0)
fqs_duration = 0;
if (fqs_holdoff < 0)
fqs_holdoff = 0;
if (fqs_duration && fqs_holdoff) {
/* Create the fqs thread */
firsterr = torture_create_kthread(rcu_torture_fqs, NULL,
fqs_task);
if (torture_init_error(firsterr))
goto unwind;
}
if (test_boost_interval < 1)
test_boost_interval = 1;
if (test_boost_duration < 2)
test_boost_duration = 2;
if (rcu_torture_can_boost()) {
boost_starttime = jiffies + test_boost_interval * HZ;
firsterr = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "RCU_TORTURE",
rcutorture_booster_init,
rcutorture_booster_cleanup);
rcutor_hp = firsterr;
if (torture_init_error(firsterr))
goto unwind;
}
shutdown_jiffies = jiffies + shutdown_secs * HZ;
firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
if (torture_init_error(firsterr))
goto unwind;
firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval,
rcutorture_sync);
if (torture_init_error(firsterr))
goto unwind;
firsterr = rcu_torture_stall_init();
if (torture_init_error(firsterr))
goto unwind;
firsterr = rcu_torture_fwd_prog_init();
if (torture_init_error(firsterr))
goto unwind;
firsterr = rcu_torture_barrier_init();
if (torture_init_error(firsterr))
goto unwind;
firsterr = rcu_torture_read_exit_init();
if (torture_init_error(firsterr))
goto unwind;
if (object_debug)
rcu_test_debug_objects();
torture_init_end();
if (cur_ops->gp_slow_register && !WARN_ON_ONCE(!cur_ops->gp_slow_unregister))
cur_ops->gp_slow_register(&rcu_fwd_cb_nodelay);
return 0;
unwind:
torture_init_end();
rcu_torture_cleanup();
if (shutdown_secs) {
WARN_ON(!IS_MODULE(CONFIG_RCU_TORTURE_TEST));
kernel_power_off();
}
return firsterr;
}
module_init(rcu_torture_init);
module_exit(rcu_torture_cleanup);