From 1806b1f97f7ab0bb8bda7c117b2573a335cea940 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Oct 2024 11:02:49 -0700 Subject: [PATCH 01/31] refscale: Add test for sched_clock() This commit adds a "sched-clock" test for the sched_clock() function. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/refscale.c | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index aacfcc9838b3..1b47376acdc4 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "rcu.h" @@ -531,6 +532,39 @@ static const struct ref_scale_ops acqrel_ops = { static volatile u64 stopopts; +static void ref_sched_clock_section(const int nloops) +{ + u64 x = 0; + int i; + + preempt_disable(); + for (i = nloops; i >= 0; i--) + x += sched_clock(); + preempt_enable(); + stopopts = x; +} + +static void ref_sched_clock_delay_section(const int nloops, const int udl, const int ndl) +{ + u64 x = 0; + int i; + + preempt_disable(); + for (i = nloops; i >= 0; i--) { + x += sched_clock(); + un_delay(udl, ndl); + } + preempt_enable(); + stopopts = x; +} + +static const struct ref_scale_ops sched_clock_ops = { + .readsection = ref_sched_clock_section, + .delaysection = ref_sched_clock_delay_section, + .name = "sched-clock" +}; + + static void ref_clock_section(const int nloops) { u64 x = 0; @@ -1130,9 +1164,9 @@ ref_scale_init(void) int firsterr = 0; static const struct ref_scale_ops *scale_ops[] = { &rcu_ops, &srcu_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS - &refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, - &clock_ops, &jiffies_ops, &typesafe_ref_ops, &typesafe_lock_ops, - &typesafe_seqlock_ops, + &refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, + &acqrel_ops, &sched_clock_ops, &clock_ops, &jiffies_ops, + &typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops, }; if (!torture_init_begin(scale_type, verbose)) From 6ca774f06a7df650f41b38b67bec0665d862ac23 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Oct 2024 11:56:28 -0700 Subject: [PATCH 02/31] torture: Make kvm-remote.sh give up on unresponsive system Currently, a system that stops responding at the wrong time will hang kvm-remote.sh. This can happen when the system in question is forced offline for maintenance, and there is currently no way for the user to kick this script into moving ahead. This commit therefore causes kvm-remote.sh to wait at most 15 minutes for a non-responsive system, that is, a system for which ssh gives an exit code of 255. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- .../selftests/rcutorture/bin/kvm-remote.sh | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 134cdef5a6e0..48a8052d5dae 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -181,10 +181,11 @@ done # Function to check for presence of a file on the specified system. # Complain if the system cannot be reached, and retry after a wait. -# Currently just waits forever if a machine disappears. +# Currently just waits 15 minutes if a machine disappears. # # Usage: checkremotefile system pathname checkremotefile () { + local nsshfails=0 local ret local sleeptime=60 @@ -195,6 +196,11 @@ checkremotefile () { if test "$ret" -eq 255 then echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" + nsshfails=$((nsshfails+1)) + if ((nsshfails > 15)) + then + return 255 + fi elif test "$ret" -eq 0 then return 0 @@ -268,12 +274,23 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log" for i in $systems do echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log" - while checkremotefile "$i" "$resdir/$ds/remote.run" + while : do + checkremotefile "$i" "$resdir/$ds/remote.run" + ret=$? + if test "$ret" -eq 1 + then + echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" + ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) + break; + fi + if test "$ret" -eq 255 + then + echo System $i persistent ssh failure, lost results `date` | tee -a "$oldrun/remote-log" + break; + fi sleep 30 done - echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" - ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) done ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" From 0203b485d26d5b403ff4ed21e4cc85ba9ec0fe67 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Oct 2024 16:42:37 -0700 Subject: [PATCH 03/31] torture: Add dowarn argument to torture_sched_setaffinity() Current use cases of torture_sched_setaffinity() are well served by its unconditional warning on error. However, an upcoming use case for a preemption kthread needs to avoid warnings that might otherwise arise when that kthread attempted to bind itself to a CPU on its way offline. This commit therefore adds a dowarn argument that, when false, suppresses the warning. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/torture.h | 2 +- kernel/locking/locktorture.c | 6 +++--- kernel/rcu/rcutorture.c | 2 +- kernel/rcu/update.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/torture.h b/include/linux/torture.h index c2e979f82f8d..0134e7221cae 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -130,7 +130,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #endif #if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) -long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn); #endif #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index de95ec07e477..cc33470f4de9 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -106,7 +106,7 @@ static const struct kernel_param_ops lt_bind_ops = { module_param_cb(bind_readers, <_bind_ops, &bind_readers, 0644); module_param_cb(bind_writers, <_bind_ops, &bind_writers, 0644); -long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn); static struct task_struct *stats_task; static struct task_struct **writer_tasks; @@ -1358,7 +1358,7 @@ static int __init lock_torture_init(void) if (torture_init_error(firsterr)) goto unwind; if (cpumask_nonempty(bind_writers)) - torture_sched_setaffinity(writer_tasks[i]->pid, bind_writers); + torture_sched_setaffinity(writer_tasks[i]->pid, bind_writers, true); create_reader: if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress)) @@ -1369,7 +1369,7 @@ static int __init lock_torture_init(void) if (torture_init_error(firsterr)) goto unwind; if (cpumask_nonempty(bind_readers)) - torture_sched_setaffinity(reader_tasks[j]->pid, bind_readers); + torture_sched_setaffinity(reader_tasks[j]->pid, bind_readers, true); } if (stat_interval > 0) { firsterr = torture_create_kthread(lock_torture_stats, NULL, diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 612d27690335..908506b68c41 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -857,7 +857,7 @@ static void synchronize_rcu_trivial(void) int cpu; for_each_online_cpu(cpu) { - torture_sched_setaffinity(current->pid, cpumask_of(cpu)); + torture_sched_setaffinity(current->pid, cpumask_of(cpu), true); WARN_ON_ONCE(raw_smp_processor_id() != cpu); } } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index f8436969e0c8..c912b594ba98 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -527,12 +527,12 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); #if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) /* Get rcutorture access to sched_setaffinity(). */ -long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn) { int ret; ret = sched_setaffinity(pid, in_mask); - WARN_ONCE(ret, "%s: sched_setaffinity(%d) returned %d\n", __func__, pid, ret); + WARN_ONCE(dowarn && ret, "%s: sched_setaffinity(%d) returned %d\n", __func__, pid, ret); return ret; } EXPORT_SYMBOL_GPL(torture_sched_setaffinity); From 584975ccb7bd8088e681b0b75335295d0a2c6da1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Oct 2024 11:42:53 -0700 Subject: [PATCH 04/31] rcutorture: Add random real-time preemption This commit adds the rcutorture.preempt_duration kernel module parameter, which gives the real-time preemption duration in milliseconds (zero to disable, which is the default) and also the rcutorture.preempt_interval module parameter, which gives the interval between successive preemptions, also in milliseconds, defaulting to one second. The CPU to preempt is chosen at random from those online at that time. Races between preempting a given CPU and that CPU going offline are ignored, and preemption is forgone when this occurs. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- .../admin-guide/kernel-parameters.txt | 16 +++++++ kernel/rcu/rcutorture.c | 44 ++++++++++++++++++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dc663c0ca670..65e5343b46cf 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5424,6 +5424,22 @@ Set time (jiffies) between CPU-hotplug operations, or zero to disable CPU-hotplug testing. + rcutorture.preempt_duration= [KNL] + Set duration (in milliseconds) of preemptions + by a high-priority FIFO real-time task. Set to + zero (the default) to disable. The CPUs to + preempt are selected randomly from the set that + are online at a given point in time. Races with + CPUs going offline are ignored, with that attempt + at preemption skipped. + + rcutorture.preempt_interval= [KNL] + Set interval (in milliseconds, defaulting to one + second) between preemptions by a high-priority + FIFO real-time task. This delay is mediated + by an hrtimer and is further fuzzed to avoid + inadvertent synchronizations. + rcutorture.read_exit_burst= [KNL] The number of times in a given read-then-exit episode that a set of read-then-exit kthreads diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 908506b68c41..99780a74da44 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -109,6 +109,8 @@ torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable"); torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable"); torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)"); +torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable"); +torture_param(int, preempt_interval, MSEC_PER_SEC, "Interval between preemptions (ms)"); torture_param(int, read_exit_delay, 13, "Delay between read-then-exit episodes (s)"); torture_param(int, read_exit_burst, 16, "# of read-then-exit bursts per episode, zero to disable"); torture_param(int, reader_flavor, 0x1, "Reader flavors to use, one per bit."); @@ -149,6 +151,7 @@ static struct task_struct **fwd_prog_tasks; static struct task_struct **barrier_cbs_tasks; static struct task_struct *barrier_task; static struct task_struct *read_exit_task; +static struct task_struct *preempt_task; #define RCU_TORTURE_PIPE_LEN 10 @@ -2425,7 +2428,8 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) "read_exit_delay=%d read_exit_burst=%d " "reader_flavor=%x " "nocbs_nthreads=%d nocbs_toggle=%d " - "test_nmis=%d\n", + "test_nmis=%d " + "preempt_duration=%d preempt_interval=%d\n", torture_type, tag, nrealreaders, nfakewriters, stat_interval, verbose, test_no_idle_hz, shuffle_interval, stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, @@ -2438,7 +2442,8 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) read_exit_delay, read_exit_burst, reader_flavor, nocbs_nthreads, nocbs_toggle, - test_nmis); + test_nmis, + preempt_duration, preempt_interval); } static int rcutorture_booster_cleanup(unsigned int cpu) @@ -3418,6 +3423,35 @@ static void rcutorture_test_nmis(int n) #endif // #else // #if IS_BUILTIN(CONFIG_RCU_TORTURE_TEST) } +// Randomly preempt online CPUs. +static int rcu_torture_preempt(void *unused) +{ + int cpu = -1; + DEFINE_TORTURE_RANDOM(rand); + + schedule_timeout_idle(stall_cpu_holdoff); + do { + // Wait for preempt_interval ms with up to 100us fuzz. + torture_hrtimeout_ms(preempt_interval, 100, &rand); + // Select online CPU. + cpu = cpumask_next(cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_next(-1, cpu_online_mask); + WARN_ON_ONCE(cpu >= nr_cpu_ids); + // Move to that CPU, if can't do so, retry later. + if (torture_sched_setaffinity(current->pid, cpumask_of(cpu), false)) + continue; + // Preempt at high-ish priority, then reset to normal. + sched_set_fifo(current); + torture_sched_setaffinity(current->pid, cpu_present_mask, true); + mdelay(preempt_duration); + sched_set_normal(current, 0); + stutter_wait("rcu_torture_preempt"); + } while (!torture_must_stop()); + torture_kthread_stopping("rcu_torture_preempt"); + return 0; +} + static enum cpuhp_state rcutor_hp; static void @@ -3446,6 +3480,7 @@ rcu_torture_cleanup(void) if (cur_ops->gp_kthread_dbg) cur_ops->gp_kthread_dbg(); + torture_stop_kthread(rcu_torture_preempt, preempt_task); rcu_torture_read_exit_cleanup(); rcu_torture_barrier_cleanup(); rcu_torture_fwd_prog_cleanup(); @@ -4019,6 +4054,11 @@ rcu_torture_init(void) firsterr = rcu_torture_read_exit_init(); if (torture_init_error(firsterr)) goto unwind; + if (preempt_duration > 0) { + firsterr = torture_create_kthread(rcu_torture_preempt, NULL, preempt_task); + if (torture_init_error(firsterr)) + goto unwind; + } if (object_debug) rcu_test_debug_objects(); torture_init_end(); From 5ec090011bd2bb6ea6c2c607371db57ee0506a89 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Oct 2024 11:49:54 -0700 Subject: [PATCH 05/31] rcutorture: Make the TREE03 scenario do preemption This commit adds the rcutorture.preempt_duration module parameter to rcutorture's TREE03.boot parameter list in order to better test preemption of RCU read-side critical sections. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 8e50bfd4b710..90318591dae2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -5,3 +5,4 @@ rcutree.gp_cleanup_delay=3 rcutree.kthread_prio=2 threadirqs rcutree.use_softirq=0 +rcutorture.preempt_duration=10 From 579a05da40a4980870a13d30cd0532f77aa15b8b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Oct 2024 09:50:40 -0700 Subject: [PATCH 06/31] rcutorture: Decorate failing reader segments with CPU ID This commit adds CPU number to the "Failure/close-call rcutorture reader segments" list printed at the end of an rcutorture run that had too-short grace periods. This information can help debugging interactions with migration and CPU hotplug. However, experience indicates that sampling the CPU number in rcutorture's read-side code can reduce the probability of too-short bugs by a small integer factor. And small integer factors are crucial to RCU bug hunting, so this commit also introduces a default-off RCU_TORTURE_TEST_LOG_CPU Kconfig option to enable this CPU-number-logging functionality at build time. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/Kconfig.debug | 15 +++++++++++++++ kernel/rcu/rcutorture.c | 9 +++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 9b0b52e1836f..b3ac000004bf 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -53,6 +53,21 @@ config RCU_TORTURE_TEST Say M if you want the RCU torture tests to build as a module. Say N if you are unsure. +config RCU_TORTURE_TEST_LOG_CPU + tristate "Log CPU for rcutorture failures" + depends on RCU_TORTURE_TEST + default n + help + This option causes rcutorture to decorate each entry of its + log of failure/close-call rcutorture reader segments with the + number of the CPU that the reader was running on at the time. + This information can be useful, but it does incur additional + overhead, overhead that can make both failures and close calls + less probable. + + Say Y here if you want CPU IDs logged. + Say N if you are unsure. + config RCU_REF_SCALE_TEST tristate "Scalability tests for read-side synchronization (RCU and others)" depends on DEBUG_KERNEL diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 99780a74da44..0bc6fc582215 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -262,6 +262,7 @@ struct rt_read_seg { unsigned long rt_delay_ms; unsigned long rt_delay_us; bool rt_preempted; + int rt_cpu; }; static int err_segs_recorded; static struct rt_read_seg err_segs[RCUTORTURE_RDR_MAX_SEGS]; @@ -1862,6 +1863,8 @@ static void rcutorture_one_extend(int *readstate, int newstate, WARN_ON_ONCE(idxold2 < 0); WARN_ON_ONCE(idxold2 & ~RCUTORTURE_RDR_ALLBITS); rtrsp->rt_readstate = newstate; + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) + rtrsp->rt_cpu = raw_smp_processor_id(); /* First, put new protection in place to avoid critical-section gap. */ if (statesnew & RCUTORTURE_RDR_BH) @@ -3559,8 +3562,10 @@ rcu_torture_cleanup(void) err_segs[i].rt_delay_us); firsttime = 0; } - pr_cont("%s\n", - err_segs[i].rt_preempted ? "preempted" : ""); + pr_cont("%s", err_segs[i].rt_preempted ? "preempted" : ""); + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) + pr_cont(" CPU %d", err_segs[i].rt_cpu); + pr_cont("\n"); } } From 7b6c1648bb6e041b3f2284b7f602283adc852bb7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 21 Oct 2024 07:07:47 -0700 Subject: [PATCH 07/31] rcutorture: Use finer-grained timeouts for rcu_torture_writer() polling The rcu_torture_writer() polling currently uses timeouts ranging from zero to 16 milliseconds to wait for the polled grace period to end. This works, but it would be better to have a higher probability of exercising races with the code that cleans up after a grace period. This commit therefore switches from these millisecond-scale timeouts to timeouts ranging from zero to 128 microseconds, and with a full microsecond's worth of timeout fuzz. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/rcutorture.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 0bc6fc582215..6067f2740247 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1561,8 +1561,8 @@ rcu_torture_writer(void *arg) break; } WARN_ON_ONCE(ulo_size > 0 && i >= ulo_size); - torture_hrtimeout_jiffies(torture_random(&rand) % 16, - &rand); + torture_hrtimeout_us(torture_random(&rand) % 128, 1000, + &rand); } rcu_torture_pipe_update(old_rp); break; @@ -1582,8 +1582,8 @@ rcu_torture_writer(void *arg) break; } WARN_ON_ONCE(rgo_size > 0 && i >= rgo_size); - torture_hrtimeout_jiffies(torture_random(&rand) % 16, - &rand); + torture_hrtimeout_us(torture_random(&rand) % 128, 1000, + &rand); } rcu_torture_pipe_update(old_rp); break; @@ -1592,8 +1592,8 @@ rcu_torture_writer(void *arg) gp_snap = cur_ops->start_gp_poll_exp(); rcu_torture_writer_state = RTWS_POLL_WAIT_EXP; while (!cur_ops->poll_gp_state_exp(gp_snap)) - torture_hrtimeout_jiffies(torture_random(&rand) % 16, - &rand); + torture_hrtimeout_us(torture_random(&rand) % 128, 1000, + &rand); rcu_torture_pipe_update(old_rp); break; case RTWS_POLL_GET_EXP_FULL: @@ -1601,8 +1601,8 @@ rcu_torture_writer(void *arg) cur_ops->start_gp_poll_exp_full(&gp_snap_full); rcu_torture_writer_state = RTWS_POLL_WAIT_EXP_FULL; while (!cur_ops->poll_gp_state_full(&gp_snap_full)) - torture_hrtimeout_jiffies(torture_random(&rand) % 16, - &rand); + torture_hrtimeout_us(torture_random(&rand) % 128, 1000, + &rand); rcu_torture_pipe_update(old_rp); break; case RTWS_SYNC: From 4569cf60b6caf26995f314a0c1e14f73ab8c924b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Oct 2024 14:29:11 -0700 Subject: [PATCH 08/31] rcutorture: Add ->cond_sync_exp_full function to rcu_ops structure The rcu_ops structure currently lacks a ->cond_sync_exp_full function, which prevents testign of conditional full-state polled grace periods. This commit therefore adds them, enabling testing this option. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/rcutorture.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 6067f2740247..658ac46581d8 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -577,6 +577,7 @@ static struct rcu_torture_ops rcu_ops = { .start_gp_poll_exp_full = start_poll_synchronize_rcu_expedited_full, .poll_gp_state_exp = poll_state_synchronize_rcu, .cond_sync_exp = cond_synchronize_rcu_expedited, + .cond_sync_exp_full = cond_synchronize_rcu_expedited_full, .call = call_rcu_hurry, .cb_barrier = rcu_barrier, .fqs = rcu_force_quiescent_state, From 0f38c06cab7712fc82c314fe4264a8897f3e6365 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 Oct 2024 13:07:11 -0700 Subject: [PATCH 09/31] rcutorture: Check preemption for failing reader This commit checks to see if the RCU reader has been preempted within its read-side critical section for RCU flavors supporting this notion (currently only preemptible RCU). If such a preemption occurred, then this is printed at the end of the "Failure/close-call rcutorture reader segments" list at the end of the rcutorture run. [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Tested-by: kernel test robot Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/rcupdate_wait.h | 11 +++++++++++ kernel/rcu/rcutorture.c | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 303ab9bee155..f9bed3d3f78d 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -65,4 +65,15 @@ static inline void cond_resched_rcu(void) #endif } +// Has the current task blocked within its current RCU read-side +// critical section? +static inline bool has_rcu_reader_blocked(void) +{ +#ifdef CONFIG_PREEMPT_RCU + return !list_empty(¤t->rcu_node_entry); +#else + return false; +#endif +} + #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 658ac46581d8..9b81e21c75d1 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -267,6 +267,7 @@ struct rt_read_seg { static int err_segs_recorded; static struct rt_read_seg err_segs[RCUTORTURE_RDR_MAX_SEGS]; static int rt_read_nsegs; +static int rt_read_preempted; static const char *rcu_torture_writer_state_getname(void) { @@ -394,6 +395,7 @@ struct rcu_torture_ops { void (*get_gp_data)(int *flags, unsigned long *gp_seq); void (*gp_slow_register)(atomic_t *rgssp); void (*gp_slow_unregister)(atomic_t *rgssp); + bool (*reader_blocked)(void); long cbflood_max; int irq_capable; int can_boost; @@ -587,6 +589,9 @@ static struct rcu_torture_ops rcu_ops = { .get_gp_data = rcutorture_get_gp_data, .gp_slow_register = rcu_gp_slow_register, .gp_slow_unregister = rcu_gp_slow_unregister, + .reader_blocked = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU) + ? has_rcu_reader_blocked + : NULL, .irq_capable = 1, .can_boost = IS_ENABLED(CONFIG_RCU_BOOST), .extendables = RCUTORTURE_MAX_EXTEND, @@ -2035,6 +2040,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) int newstate; struct rcu_torture *p; int pipe_count; + bool preempted = false; int readstate = 0; struct rt_read_seg rtseg[RCUTORTURE_RDR_MAX_SEGS] = { { 0 } }; struct rt_read_seg *rtrsp = &rtseg[0]; @@ -2100,6 +2106,8 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) rcu_torture_writer_state, cpumask_pr_args(cpu_online_mask)); } + if (cur_ops->reader_blocked) + preempted = cur_ops->reader_blocked(); rcutorture_one_extend(&readstate, 0, trsp, rtrsp); WARN_ON_ONCE(readstate); // This next splat is expected behavior if leakpointer, especially @@ -2112,6 +2120,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) for (rtrsp1 = &rtseg[0]; rtrsp1 < rtrsp; rtrsp1++) err_segs[i++] = *rtrsp1; rt_read_nsegs = i; + rt_read_preempted = preempted; } return true; @@ -3569,6 +3578,8 @@ rcu_torture_cleanup(void) pr_cont("\n"); } + if (rt_read_preempted) + pr_alert("\tReader was preempted.\n"); } if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); From 3b476823b98685ec4d228af32323854f8e45aac7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 Oct 2024 16:58:51 -0700 Subject: [PATCH 10/31] rcutorture: Decorate failing reader segments with last CPU ID In kernels built with CONFIG_RCU_TORTURE_TEST_LOG_CPU=y, the CPU is logged at the beginning of each reader segment. This commit further logs it at the end of the full set of reader segments in order to show any migration that might have occurred during the last reader segment. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/rcutorture.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 9b81e21c75d1..61b092a3dc3f 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -268,6 +268,7 @@ static int err_segs_recorded; static struct rt_read_seg err_segs[RCUTORTURE_RDR_MAX_SEGS]; static int rt_read_nsegs; static int rt_read_preempted; +static int rt_last_cpu; static const char *rcu_torture_writer_state_getname(void) { @@ -2108,6 +2109,8 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) } if (cur_ops->reader_blocked) preempted = cur_ops->reader_blocked(); + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) + rt_last_cpu = raw_smp_processor_id(); rcutorture_one_extend(&readstate, 0, trsp, rtrsp); WARN_ON_ONCE(readstate); // This next splat is expected behavior if leakpointer, especially @@ -3580,6 +3583,8 @@ rcu_torture_cleanup(void) } if (rt_read_preempted) pr_alert("\tReader was preempted.\n"); + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) + pr_alert("\tReader last ran on CPU %d.\n", rt_last_cpu); } if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); From b27a34f908c7c2424483e50c13894414bb169aad Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Oct 2024 09:47:11 -0700 Subject: [PATCH 11/31] rcutorture: Add full read-side contexts to "busted" torture type The purpose of the "busted" torture type is to test rcutorture code paths used only when a too-short grace period is detected. Currently, "busted" only uses normal rcu_read_lock()-style readers, which fails to exercise much of the "Failure/close-call rcutorture reader segments" functionality. This commit therefore sets the .extendables field of rcu_busted_ops to RCUTORTURE_MAX_EXTEND in order to more fully exercise the reporting. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/rcutorture.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 61b092a3dc3f..81b3743f81dc 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -639,6 +639,7 @@ static struct rcu_torture_ops rcu_busted_ops = { .exp_sync = synchronize_rcu_busted, .call = call_rcu_busted, .irq_capable = 1, + .extendables = RCUTORTURE_MAX_EXTEND, .name = "busted" }; From 16338e7cb7450574ae3a210db6f35280fc44e50e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Oct 2024 12:21:28 -0700 Subject: [PATCH 12/31] rcutorture: Pretty-print rcutorture reader segments The current "Failure/close-call rcutorture reader segments" output is good and sufficient, but annoying when you have to interpret several tens of them after an all-night rcutorture run. This commit therefore makes them a bit more human-readable. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/rcutorture.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 81b3743f81dc..c875e7239ae7 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -3560,25 +3560,39 @@ rcu_torture_cleanup(void) pr_alert("\t: No segments recorded!!!\n"); firsttime = 1; for (i = 0; i < rt_read_nsegs; i++) { - pr_alert("\t%d: %#x ", i, err_segs[i].rt_readstate); + pr_alert("\t%d: %#4x", i, err_segs[i].rt_readstate); if (err_segs[i].rt_delay_jiffies != 0) { pr_cont("%s%ldjiffies", firsttime ? "" : "+", err_segs[i].rt_delay_jiffies); firsttime = 0; } + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) + pr_cont(" CPU %-2d", err_segs[i].rt_cpu); if (err_segs[i].rt_delay_ms != 0) { - pr_cont("%s%ldms", firsttime ? "" : "+", + pr_cont(" %s%ldms", firsttime ? "" : "+", err_segs[i].rt_delay_ms); firsttime = 0; } if (err_segs[i].rt_delay_us != 0) { - pr_cont("%s%ldus", firsttime ? "" : "+", + pr_cont(" %s%ldus", firsttime ? "" : "+", err_segs[i].rt_delay_us); firsttime = 0; } - pr_cont("%s", err_segs[i].rt_preempted ? "preempted" : ""); - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) - pr_cont(" CPU %d", err_segs[i].rt_cpu); + pr_cont("%s", err_segs[i].rt_preempted ? " preempted" : ""); + if (err_segs[i].rt_readstate & RCUTORTURE_RDR_BH) + pr_cont(" BH"); + if (err_segs[i].rt_readstate & RCUTORTURE_RDR_IRQ) + pr_cont(" IRQ"); + if (err_segs[i].rt_readstate & RCUTORTURE_RDR_PREEMPT) + pr_cont(" PREEMPT"); + if (err_segs[i].rt_readstate & RCUTORTURE_RDR_RBH) + pr_cont(" RBH"); + if (err_segs[i].rt_readstate & RCUTORTURE_RDR_SCHED) + pr_cont(" SCHED"); + if (err_segs[i].rt_readstate & RCUTORTURE_RDR_RCU_1) + pr_cont(" RCU_1"); + if (err_segs[i].rt_readstate & RCUTORTURE_RDR_RCU_2) + pr_cont(" RCU_2"); pr_cont("\n"); } From ec9d6356bfda69abe5f4767dd56c964127913233 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 30 Oct 2024 17:10:22 -0700 Subject: [PATCH 13/31] rcutorture: Make rcutorture_one_extend() check reader state This commit adds reader-state debugging checks to a new function named rcutorture_one_extend_check(), which is invoked before and after setting new reader states by the existing rcutorture_one_extend() function. These checks have proven to be rather heavyweight, reducing reproduction rate of some failures by a factor of two. They are therefore hidden behind a new RCU_TORTURE_TEST_CHK_RDR_STATE Kconfig option. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Tested-by: kernel test robot Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/Kconfig.debug | 16 +++++++++ kernel/rcu/rcutorture.c | 71 +++++++++++++++++++++++++++++++++++----- 2 files changed, 79 insertions(+), 8 deletions(-) diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index b3ac000004bf..6af90510a1ca 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -53,6 +53,22 @@ config RCU_TORTURE_TEST Say M if you want the RCU torture tests to build as a module. Say N if you are unsure. +config RCU_TORTURE_TEST_CHK_RDR_STATE + tristate "Check rcutorture reader state" + depends on RCU_TORTURE_TEST + default n + help + This option causes rcutorture to check the desired rcutorture + reader state for each segment against the actual context. + Note that PREEMPT_COUNT must be enabled if the preempt-disabled + and bh-disabled checks are to take effect, and that PREEMPT_RCU + must be enabled for the RCU-nesting checks to take effect. + These checks add overhead, and this Kconfig options is therefore + disabled by default. + + Say Y here if you want rcutorture reader contexts checked. + Say N if you are unsure. + config RCU_TORTURE_TEST_LOG_CPU tristate "Log CPU for rcutorture failures" depends on RCU_TORTURE_TEST diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index c875e7239ae7..8c7820a00f3c 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -359,7 +359,8 @@ struct rcu_torture_ops { void (*read_delay)(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp); void (*readunlock)(int idx); - int (*readlock_held)(void); + int (*readlock_held)(void); // lockdep. + int (*readlock_nesting)(void); // actual nesting, if available, -1 if not. unsigned long (*get_gp_seq)(void); unsigned long (*gp_diff)(unsigned long new, unsigned long old); void (*deferred_free)(struct rcu_torture *p); @@ -466,6 +467,15 @@ static void rcu_torture_read_unlock(int idx) rcu_read_unlock(); } +static int rcu_torture_readlock_nesting(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) + return rcu_preempt_depth(); + if (IS_ENABLED(CONFIG_PREEMPT_COUNT)) + return (preempt_count() & PREEMPT_MASK); + return -1; +} + /* * Update callback in the pipe. This should be invoked after a grace period. */ @@ -555,6 +565,7 @@ static struct rcu_torture_ops rcu_ops = { .read_delay = rcu_read_delay, .readunlock = rcu_torture_read_unlock, .readlock_held = torture_readlock_not_held, + .readlock_nesting = rcu_torture_readlock_nesting, .get_gp_seq = rcu_get_gp_seq, .gp_diff = rcu_seq_diff, .deferred_free = rcu_torture_deferred_free, @@ -1847,6 +1858,44 @@ static void rcu_torture_reader_do_mbchk(long myid, struct rcu_torture *rtp, smp_store_release(&rtrcp_assigner->rtc_chkrdr, -1); // Assigner can again assign. } +// Verify the specified RCUTORTURE_RDR* state. +#define ROEC_ARGS "%s %s: Current %#x To add %#x To remove %#x\n", __func__, s, curstate, new, old +static void rcutorture_one_extend_check(char *s, int curstate, int new, int old, bool insoftirq) +{ + if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE)) + return; + + WARN_ONCE(!(curstate & RCUTORTURE_RDR_IRQ) && irqs_disabled(), ROEC_ARGS); + WARN_ONCE((curstate & RCUTORTURE_RDR_IRQ) && !irqs_disabled(), ROEC_ARGS); + + // If CONFIG_PREEMPT_COUNT=n, further checks are unreliable. + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) + return; + + WARN_ONCE((curstate & (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH)) && + !(preempt_count() & SOFTIRQ_MASK), ROEC_ARGS); + WARN_ONCE((curstate & (RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED)) && + !(preempt_count() & PREEMPT_MASK), ROEC_ARGS); + WARN_ONCE(cur_ops->readlock_nesting && + (curstate & (RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2)) && + cur_ops->readlock_nesting() == 0, ROEC_ARGS); + + // Timer handlers have all sorts of stuff disabled, so ignore + // unintended disabling. + if (insoftirq) + return; + + WARN_ONCE(cur_ops->extendables && + !(curstate & (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH)) && + (preempt_count() & SOFTIRQ_MASK), ROEC_ARGS); + WARN_ONCE(cur_ops->extendables && + !(curstate & (RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED)) && + (preempt_count() & PREEMPT_MASK), ROEC_ARGS); + WARN_ONCE(cur_ops->readlock_nesting && + !(curstate & (RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2)) && + cur_ops->readlock_nesting() > 0, ROEC_ARGS); +} + /* * Do one extension of an RCU read-side critical section using the * current reader state in readstate (set to zero for initial entry @@ -1856,7 +1905,7 @@ static void rcu_torture_reader_do_mbchk(long myid, struct rcu_torture *rtp, * beginning or end of the critical section and if there was actually a * change, do a ->read_delay(). */ -static void rcutorture_one_extend(int *readstate, int newstate, +static void rcutorture_one_extend(int *readstate, int newstate, bool insoftirq, struct torture_random_state *trsp, struct rt_read_seg *rtrsp) { @@ -1870,6 +1919,7 @@ static void rcutorture_one_extend(int *readstate, int newstate, WARN_ON_ONCE(idxold2 < 0); WARN_ON_ONCE(idxold2 & ~RCUTORTURE_RDR_ALLBITS); + rcutorture_one_extend_check("before change", idxold1, statesnew, statesold, insoftirq); rtrsp->rt_readstate = newstate; if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) rtrsp->rt_cpu = raw_smp_processor_id(); @@ -1890,6 +1940,10 @@ static void rcutorture_one_extend(int *readstate, int newstate, if (statesnew & RCUTORTURE_RDR_RCU_2) idxnew2 = (cur_ops->readlock() << RCUTORTURE_RDR_SHIFT_2) & RCUTORTURE_RDR_MASK_2; + // Complain unless both the old and the new protection is in place. + rcutorture_one_extend_check("during change", + idxold1 | statesnew, statesnew, statesold, insoftirq); + /* * Next, remove old protection, in decreasing order of strength * to avoid unlock paths that aren't safe in the stronger @@ -1940,6 +1994,7 @@ static void rcutorture_one_extend(int *readstate, int newstate, WARN_ON_ONCE(*readstate < 0); if (WARN_ON_ONCE(*readstate & ~RCUTORTURE_RDR_ALLBITS)) pr_info("Unexpected readstate value of %#x\n", *readstate); + rcutorture_one_extend_check("after change", *readstate, statesnew, statesold, insoftirq); } /* Return the biggest extendables mask given current RCU and boot parameters. */ @@ -2006,7 +2061,7 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) * critical section. */ static struct rt_read_seg * -rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp, +rcutorture_loop_extend(int *readstate, bool insoftirq, struct torture_random_state *trsp, struct rt_read_seg *rtrsp) { int i; @@ -2021,7 +2076,7 @@ rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp, i = ((i | (i >> 3)) & RCUTORTURE_RDR_MAX_LOOPS) + 1; for (j = 0; j < i; j++) { mask = rcutorture_extend_mask(*readstate, trsp); - rcutorture_one_extend(readstate, mask, trsp, &rtrsp[j]); + rcutorture_one_extend(readstate, mask, insoftirq, trsp, &rtrsp[j]); } return &rtrsp[j]; } @@ -2051,7 +2106,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) WARN_ON_ONCE(!rcu_is_watching()); newstate = rcutorture_extend_mask(readstate, trsp); - rcutorture_one_extend(&readstate, newstate, trsp, rtrsp++); + rcutorture_one_extend(&readstate, newstate, myid < 0, trsp, rtrsp++); if (checkpolling) { if (cur_ops->get_gp_state && cur_ops->poll_gp_state) cookie = cur_ops->get_gp_state(); @@ -2064,13 +2119,13 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) !cur_ops->readlock_held || cur_ops->readlock_held()); if (p == NULL) { /* Wait for rcu_torture_writer to get underway */ - rcutorture_one_extend(&readstate, 0, trsp, rtrsp); + rcutorture_one_extend(&readstate, 0, myid < 0, trsp, rtrsp); return false; } if (p->rtort_mbtest == 0) atomic_inc(&n_rcu_torture_mberror); rcu_torture_reader_do_mbchk(myid, p, trsp); - rtrsp = rcutorture_loop_extend(&readstate, trsp, rtrsp); + rtrsp = rcutorture_loop_extend(&readstate, myid < 0, trsp, rtrsp); preempt_disable(); pipe_count = READ_ONCE(p->rtort_pipe_count); if (pipe_count > RCU_TORTURE_PIPE_LEN) { @@ -2112,7 +2167,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) preempted = cur_ops->reader_blocked(); if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) rt_last_cpu = raw_smp_processor_id(); - rcutorture_one_extend(&readstate, 0, trsp, rtrsp); + rcutorture_one_extend(&readstate, 0, myid < 0, trsp, rtrsp); WARN_ON_ONCE(readstate); // This next splat is expected behavior if leakpointer, especially // for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels. From a2ab1e457897f4fc8ff23f7ead92ee2b29655c04 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 6 Nov 2024 12:19:35 -0800 Subject: [PATCH 14/31] rcutorture: Ignore attempts to test preemption and forward progress Use of the rcutorture preempt_duration and the default-on fwd_progress kernel parameters can result in preemption of callback processing during forward-progress testing, which is an excellent way to OOM your test if your kernel offloads RCU callbacks. This commit therefore treats preempt_duration in the same way as stall_cpu in CONFIG_RCU_NOCB_CPU=y kernels, prohibiting fwd_progress testing and splatting when rcutorture is built in (as opposed to being a loadable module). Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/rcutorture.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 8c7820a00f3c..3595ce889b44 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -3145,12 +3145,12 @@ static int __init rcu_torture_fwd_prog_init(void) fwd_progress = 0; return 0; } - if (stall_cpu > 0) { - VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall testing"); + if (stall_cpu > 0 || (preempt_duration > 0 && IS_ENABLED(CONFIG_RCU_NOCB_CPU))) { + VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall and/or preemption testing"); fwd_progress = 0; if (IS_MODULE(CONFIG_RCU_TORTURE_TEST)) return -EINVAL; /* In module, can fail back to user. */ - WARN_ON(1); /* Make sure rcutorture notices conflict. */ + WARN_ON(1); /* Make sure rcutorture scripting notices conflict. */ return 0; } if (fwd_progress_holdoff <= 0) From cae7f6319e6451dc49e4239652c6c7b6f54008d2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 6 Nov 2024 13:49:44 -0800 Subject: [PATCH 15/31] rcutorture: Add documentation for recent conditional and polled APIs This commit adds kernel-parameters.txt documentation for rcutorture's (relatively) new gp_cond_exp, gp_cond_full, gp_cond_exp, gp_poll, gp_poll_exp, gp_poll_full, and gp_poll_exp module parameters. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- .../admin-guide/kernel-parameters.txt | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 65e5343b46cf..03d13ca0604f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5362,7 +5362,23 @@ rcutorture.gp_cond= [KNL] Use conditional/asynchronous update-side - primitives, if available. + normal-grace-period primitives, if available. + + rcutorture.gp_cond_exp= [KNL] + Use conditional/asynchronous update-side + expedited-grace-period primitives, if available. + + rcutorture.gp_cond_full= [KNL] + Use conditional/asynchronous update-side + normal-grace-period primitives that also take + concurrent expedited grace periods into account, + if available. + + rcutorture.gp_cond_exp_full= [KNL] + Use conditional/asynchronous update-side + expedited-grace-period primitives that also take + concurrent normal grace periods into account, + if available. rcutorture.gp_exp= [KNL] Use expedited update-side primitives, if available. @@ -5371,6 +5387,24 @@ Use normal (non-expedited) asynchronous update-side primitives, if available. + rcutorture.gp_poll= [KNL] + Use polled update-side normal-grace-period + primitives, if available. + + rcutorture.gp_poll_exp= [KNL] + Use polled update-side expedited-grace-period + primitives, if available. + + rcutorture.gp_poll_full= [KNL] + Use polled update-side normal-grace-period + primitives that also take concurrent expedited + grace periods into account, if available. + + rcutorture.gp_poll_exp_full= [KNL] + Use polled update-side expedited-grace-period + primitives that also take concurrent normal + grace periods into account, if available. + rcutorture.gp_sync= [KNL] Use normal (non-expedited) synchronous update-side primitives, if available. If all From 282e06cc8f595e999f778bd284d6a3608f7d1d62 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 6 Nov 2024 15:42:06 -0800 Subject: [PATCH 16/31] rcutorture: Add parameters to control polled/conditional wait interval This commit adds rcutorture module parameters gp_cond_wi, gp_cond_wi_exp, gp_poll_wi, and gp_poll_wi_exp to control the wait interval for conditional, conditional expedited, polled, and polled expedited grace periods, respectively. When rcu_torture_writer() is testing these types of grace periods, hrtimers are used to randomly wait up to the specified number of microseconds, but with nanosecond granularity. In the case of conditional grace periods (get_state_synchronize_rcu() and cond_synchronize_rcu(), for example) there is just one wait. For polled grace periods (start_poll_synchronize_rcu() and poll_state_synchronize_rcu(), for example), there is a repeated series of waits until the grace period ends. For normal grace periods, the default is 16 jiffies (for example, 16,000 microseconds on a HZ=1000 system) and for expedited grace periods the default is 128 microseconds. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- .../admin-guide/kernel-parameters.txt | 38 +++++++++++++++++++ kernel/rcu/rcutorture.c | 37 ++++++++++++------ 2 files changed, 63 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 03d13ca0604f..3152f2c1da29 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5380,6 +5380,25 @@ concurrent normal grace periods into account, if available. + rcutorture.gp_cond_wi= [KNL] + Nominal wait interval for normal conditional + grace periods (specified by rcutorture's + gp_cond and gp_cond_full module parameters), + in microseconds. The actual wait interval will + be randomly selected to nanosecond granularity up + to this wait interval. Defaults to 16 jiffies, + for example, 16,000 microseconds on a system + with HZ=1000. + + rcutorture.gp_cond_wi_exp= [KNL] + Nominal wait interval for expedited conditional + grace periods (specified by rcutorture's + gp_cond_exp and gp_cond_exp_full module + parameters), in microseconds. The actual wait + interval will be randomly selected to nanosecond + granularity up to this wait interval. Defaults to + 128 microseconds. + rcutorture.gp_exp= [KNL] Use expedited update-side primitives, if available. @@ -5405,6 +5424,25 @@ primitives that also take concurrent normal grace periods into account, if available. + rcutorture.gp_poll_wi= [KNL] + Nominal wait interval for normal conditional + grace periods (specified by rcutorture's + gp_poll and gp_poll_full module parameters), + in microseconds. The actual wait interval will + be randomly selected to nanosecond granularity up + to this wait interval. Defaults to 16 jiffies, + for example, 16,000 microseconds on a system + with HZ=1000. + + rcutorture.gp_poll_wi_exp= [KNL] + Nominal wait interval for expedited conditional + grace periods (specified by rcutorture's + gp_poll_exp and gp_poll_exp_full module + parameters), in microseconds. The actual wait + interval will be randomly selected to nanosecond + granularity up to this wait interval. Defaults to + 128 microseconds. + rcutorture.gp_sync= [KNL] Use normal (non-expedited) synchronous update-side primitives, if available. If all diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 3595ce889b44..235a73dad280 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -92,12 +92,20 @@ torture_param(bool, gp_cond_exp, false, "Use conditional/async expedited GP wait torture_param(bool, gp_cond_full, false, "Use conditional/async full-state GP wait primitives"); torture_param(bool, gp_cond_exp_full, false, "Use conditional/async full-stateexpedited GP wait primitives"); +torture_param(int, gp_cond_wi, 16 * USEC_PER_SEC / HZ, + "Wait interval for normal conditional grace periods, us (default 16 jiffies)"); +torture_param(int, gp_cond_wi_exp, 128, + "Wait interval for expedited conditional grace periods, us (default 128 us)"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); torture_param(bool, gp_normal, false, "Use normal (non-expedited) GP wait primitives"); torture_param(bool, gp_poll, false, "Use polling GP wait primitives"); torture_param(bool, gp_poll_exp, false, "Use polling expedited GP wait primitives"); torture_param(bool, gp_poll_full, false, "Use polling full-state GP wait primitives"); torture_param(bool, gp_poll_exp_full, false, "Use polling full-state expedited GP wait primitives"); +torture_param(int, gp_poll_wi, 16 * USEC_PER_SEC / HZ, + "Wait interval for normal polled grace periods, us (default 16 jiffies)"); +torture_param(int, gp_poll_wi_exp, 128, + "Wait interval for expedited polled grace periods, us (default 128 us)"); torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives"); torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers"); torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers"); @@ -1370,6 +1378,7 @@ static void rcu_torture_write_types(void) pr_alert("%s: gp_sync without primitives.\n", __func__); } pr_alert("%s: Testing %d update types.\n", __func__, nsynctypes); + pr_info("%s: gp_cond_wi %d gp_cond_wi_exp %d gp_poll_wi %d gp_poll_wi_exp %d\n", __func__, gp_cond_wi, gp_cond_wi_exp, gp_poll_wi, gp_poll_wi_exp); } /* @@ -1536,7 +1545,8 @@ rcu_torture_writer(void *arg) case RTWS_COND_GET: rcu_torture_writer_state = RTWS_COND_GET; gp_snap = cur_ops->get_gp_state(); - torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + torture_hrtimeout_us(torture_random(&rand) % gp_cond_wi, + 1000, &rand); rcu_torture_writer_state = RTWS_COND_SYNC; cur_ops->cond_sync(gp_snap); rcu_torture_pipe_update(old_rp); @@ -1544,7 +1554,8 @@ rcu_torture_writer(void *arg) case RTWS_COND_GET_EXP: rcu_torture_writer_state = RTWS_COND_GET_EXP; gp_snap = cur_ops->get_gp_state_exp(); - torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + torture_hrtimeout_us(torture_random(&rand) % gp_cond_wi_exp, + 1000, &rand); rcu_torture_writer_state = RTWS_COND_SYNC_EXP; cur_ops->cond_sync_exp(gp_snap); rcu_torture_pipe_update(old_rp); @@ -1552,7 +1563,8 @@ rcu_torture_writer(void *arg) case RTWS_COND_GET_FULL: rcu_torture_writer_state = RTWS_COND_GET_FULL; cur_ops->get_gp_state_full(&gp_snap_full); - torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + torture_hrtimeout_us(torture_random(&rand) % gp_cond_wi, + 1000, &rand); rcu_torture_writer_state = RTWS_COND_SYNC_FULL; cur_ops->cond_sync_full(&gp_snap_full); rcu_torture_pipe_update(old_rp); @@ -1560,7 +1572,8 @@ rcu_torture_writer(void *arg) case RTWS_COND_GET_EXP_FULL: rcu_torture_writer_state = RTWS_COND_GET_EXP_FULL; cur_ops->get_gp_state_full(&gp_snap_full); - torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + torture_hrtimeout_us(torture_random(&rand) % gp_cond_wi_exp, + 1000, &rand); rcu_torture_writer_state = RTWS_COND_SYNC_EXP_FULL; cur_ops->cond_sync_exp_full(&gp_snap_full); rcu_torture_pipe_update(old_rp); @@ -1580,8 +1593,8 @@ rcu_torture_writer(void *arg) break; } WARN_ON_ONCE(ulo_size > 0 && i >= ulo_size); - torture_hrtimeout_us(torture_random(&rand) % 128, 1000, - &rand); + torture_hrtimeout_us(torture_random(&rand) % gp_poll_wi, + 1000, &rand); } rcu_torture_pipe_update(old_rp); break; @@ -1601,8 +1614,8 @@ rcu_torture_writer(void *arg) break; } WARN_ON_ONCE(rgo_size > 0 && i >= rgo_size); - torture_hrtimeout_us(torture_random(&rand) % 128, 1000, - &rand); + torture_hrtimeout_us(torture_random(&rand) % gp_poll_wi, + 1000, &rand); } rcu_torture_pipe_update(old_rp); break; @@ -1611,8 +1624,8 @@ rcu_torture_writer(void *arg) gp_snap = cur_ops->start_gp_poll_exp(); rcu_torture_writer_state = RTWS_POLL_WAIT_EXP; while (!cur_ops->poll_gp_state_exp(gp_snap)) - torture_hrtimeout_us(torture_random(&rand) % 128, 1000, - &rand); + torture_hrtimeout_us(torture_random(&rand) % gp_poll_wi_exp, + 1000, &rand); rcu_torture_pipe_update(old_rp); break; case RTWS_POLL_GET_EXP_FULL: @@ -1620,8 +1633,8 @@ rcu_torture_writer(void *arg) cur_ops->start_gp_poll_exp_full(&gp_snap_full); rcu_torture_writer_state = RTWS_POLL_WAIT_EXP_FULL; while (!cur_ops->poll_gp_state_full(&gp_snap_full)) - torture_hrtimeout_us(torture_random(&rand) % 128, 1000, - &rand); + torture_hrtimeout_us(torture_random(&rand) % gp_poll_wi_exp, + 1000, &rand); rcu_torture_pipe_update(old_rp); break; case RTWS_SYNC: From c31569eec4815c6fa64948c31f00ebe50b1c75dc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 Nov 2024 14:58:54 -0800 Subject: [PATCH 17/31] rcutorture: Add preempt_count() to rcutorture_one_extend_check() diagnostics This commit adds the value of preempt_count() to the diagnostics produced by rcutorture_one_extend_check() to improve debugging. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/rcutorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 235a73dad280..70c27bd67be1 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1872,7 +1872,7 @@ static void rcu_torture_reader_do_mbchk(long myid, struct rcu_torture *rtp, } // Verify the specified RCUTORTURE_RDR* state. -#define ROEC_ARGS "%s %s: Current %#x To add %#x To remove %#x\n", __func__, s, curstate, new, old +#define ROEC_ARGS "%s %s: Current %#x To add %#x To remove %#x preempt_count() %#x\n", __func__, s, curstate, new, old, preempt_count() static void rcutorture_one_extend_check(char *s, int curstate, int new, int old, bool insoftirq) { if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE)) From 885a6f4729c688c50bb2f470dfcc0ad0dea43c1e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 Nov 2024 15:36:55 -0800 Subject: [PATCH 18/31] rcutorture: Read CPU ID for decoration protected by both reader types Currently, rcutorture_one_extend() reads the CPU ID before making any change to the type of RCU reader. This can be confusing because the properties of the code from which the CPU ID is read are not that of the reader segment that this same CPU ID is listed with. This commit therefore causes rcutorture_one_extend() to read the CPU ID just after the new protections have been added, but before the old protections have been removed. With this change in place, all of the protections of a given reader segment apply from the reading of one CPU ID to the reading of the next. This change therefore also allows a single read of the CPU ID to work for both the old and the new reader segment. And this dual use of a single read of the CPU ID avoids inflicting any additional to heisenbugs. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/rcutorture.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 70c27bd67be1..ab354bb7f1b6 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -271,12 +271,12 @@ struct rt_read_seg { unsigned long rt_delay_us; bool rt_preempted; int rt_cpu; + int rt_end_cpu; }; static int err_segs_recorded; static struct rt_read_seg err_segs[RCUTORTURE_RDR_MAX_SEGS]; static int rt_read_nsegs; static int rt_read_preempted; -static int rt_last_cpu; static const char *rcu_torture_writer_state_getname(void) { @@ -1922,6 +1922,7 @@ static void rcutorture_one_extend(int *readstate, int newstate, bool insoftirq, struct torture_random_state *trsp, struct rt_read_seg *rtrsp) { + bool first; unsigned long flags; int idxnew1 = -1; int idxnew2 = -1; @@ -1930,12 +1931,11 @@ static void rcutorture_one_extend(int *readstate, int newstate, bool insoftirq, int statesnew = ~*readstate & newstate; int statesold = *readstate & ~newstate; + first = idxold1 == 0; WARN_ON_ONCE(idxold2 < 0); WARN_ON_ONCE(idxold2 & ~RCUTORTURE_RDR_ALLBITS); rcutorture_one_extend_check("before change", idxold1, statesnew, statesold, insoftirq); rtrsp->rt_readstate = newstate; - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) - rtrsp->rt_cpu = raw_smp_processor_id(); /* First, put new protection in place to avoid critical-section gap. */ if (statesnew & RCUTORTURE_RDR_BH) @@ -1957,6 +1957,14 @@ static void rcutorture_one_extend(int *readstate, int newstate, bool insoftirq, rcutorture_one_extend_check("during change", idxold1 | statesnew, statesnew, statesold, insoftirq); + // Sample CPU under both sets of protections to reduce confusion. + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) { + int cpu = raw_smp_processor_id(); + rtrsp->rt_cpu = cpu; + if (!first) + rtrsp[-1].rt_end_cpu = cpu; + } + /* * Next, remove old protection, in decreasing order of strength * to avoid unlock paths that aren't safe in the stronger @@ -2178,8 +2186,6 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) } if (cur_ops->reader_blocked) preempted = cur_ops->reader_blocked(); - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) - rt_last_cpu = raw_smp_processor_id(); rcutorture_one_extend(&readstate, 0, myid < 0, trsp, rtrsp); WARN_ON_ONCE(readstate); // This next splat is expected behavior if leakpointer, especially @@ -3634,8 +3640,13 @@ rcu_torture_cleanup(void) err_segs[i].rt_delay_jiffies); firsttime = 0; } - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) - pr_cont(" CPU %-2d", err_segs[i].rt_cpu); + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) { + pr_cont(" CPU %2d", err_segs[i].rt_cpu); + if (err_segs[i].rt_cpu != err_segs[i].rt_end_cpu) + pr_cont("->%-2d", err_segs[i].rt_end_cpu); + else + pr_cont(" ..."); + } if (err_segs[i].rt_delay_ms != 0) { pr_cont(" %s%ldms", firsttime ? "" : "+", err_segs[i].rt_delay_ms); @@ -3666,8 +3677,6 @@ rcu_torture_cleanup(void) } if (rt_read_preempted) pr_alert("\tReader was preempted.\n"); - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) - pr_alert("\tReader last ran on CPU %d.\n", rt_last_cpu); } if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); From 223f16b87d70a62058d3b3f4aae0da570b4380a1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 13 Nov 2024 19:10:19 -0800 Subject: [PATCH 19/31] rcutorture: Add per-reader-segment preemption diagnostics For preemptible RCU, this commit adds an indication for each reader segments to whether the rcu_torture_reader() task was on the ->blkd_tasks lists, though only in kernels built with CONFIG_RCU_TORTURE_TEST_LOG_CPU=y. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/rcutorture.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index ab354bb7f1b6..41b661bf000a 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -464,10 +464,8 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) rtrsp->rt_delay_us = shortdelay_us; } if (!preempt_count() && - !(torture_random(rrsp) % (nrealreaders * 500))) { + !(torture_random(rrsp) % (nrealreaders * 500))) torture_preempt_schedule(); /* QS only if preemptible. */ - rtrsp->rt_preempted = true; - } } static void rcu_torture_read_unlock(int idx) @@ -1961,8 +1959,11 @@ static void rcutorture_one_extend(int *readstate, int newstate, bool insoftirq, if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU)) { int cpu = raw_smp_processor_id(); rtrsp->rt_cpu = cpu; - if (!first) + if (!first) { rtrsp[-1].rt_end_cpu = cpu; + if (cur_ops->reader_blocked) + rtrsp[-1].rt_preempted = cur_ops->reader_blocked(); + } } /* From 0fef924e3918e72768357a220c84e6b4dd2b6180 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 Nov 2024 11:11:18 -0800 Subject: [PATCH 20/31] rcutorture: Use symbols for SRCU reader flavors This commit converts rcutorture.c values for the reader_flavor module parameter from hexadecimal to the SRCU_READ_FLAVOR_* C-preprocessor macros. The actual modprobe or kernel-boot-parameter values for read_flavor must still be entered in hexadecimal. Link: https://lore.kernel.org/all/c48c9dca-fe07-4833-acaa-28c827e5a79e@amd.com/ Suggested-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/srcu.h | 6 ++++++ include/linux/srcutree.h | 6 +----- kernel/rcu/rcutorture.c | 14 +++++++------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 08339eb8a01c..da8224d0f71c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -43,6 +43,12 @@ int init_srcu_struct(struct srcu_struct *ssp); #define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Values for SRCU Tree srcu_data ->srcu_reader_flavor, but also used by rcutorture. */ +#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). +#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). +#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). +#define SRCU_READ_FLAVOR_ALL 0x7 // All of the above. + #ifdef CONFIG_TINY_SRCU #include #elif defined(CONFIG_TREE_SRCU) diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 490aeecc6bb4..80016bbed672 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -26,6 +26,7 @@ struct srcu_data { atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */ + /* Values: SRCU_READ_FLAVOR_.* */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; @@ -43,11 +44,6 @@ struct srcu_data { struct srcu_struct *ssp; }; -/* Values for ->srcu_reader_flavor. */ -#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). -#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). -#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). - /* * Node in SRCU combining tree, similar in function to rcu_data. */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 41b661bf000a..d26fb1d33ed9 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -121,7 +121,7 @@ torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disab torture_param(int, preempt_interval, MSEC_PER_SEC, "Interval between preemptions (ms)"); torture_param(int, read_exit_delay, 13, "Delay between read-then-exit episodes (s)"); torture_param(int, read_exit_burst, 16, "# of read-then-exit bursts per episode, zero to disable"); -torture_param(int, reader_flavor, 0x1, "Reader flavors to use, one per bit."); +torture_param(int, reader_flavor, SRCU_READ_FLAVOR_NORMAL, "Reader flavors to use, one per bit."); torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles"); torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable."); torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable."); @@ -679,17 +679,17 @@ static int srcu_torture_read_lock(void) int idx; int ret = 0; - if ((reader_flavor & 0x1) || !(reader_flavor & 0x7)) { + if ((reader_flavor & SRCU_READ_FLAVOR_NORMAL) || !(reader_flavor & SRCU_READ_FLAVOR_ALL)) { idx = srcu_read_lock(srcu_ctlp); WARN_ON_ONCE(idx & ~0x1); ret += idx; } - if (reader_flavor & 0x2) { + if (reader_flavor & SRCU_READ_FLAVOR_NMI) { idx = srcu_read_lock_nmisafe(srcu_ctlp); WARN_ON_ONCE(idx & ~0x1); ret += idx << 1; } - if (reader_flavor & 0x4) { + if (reader_flavor & SRCU_READ_FLAVOR_LITE) { idx = srcu_read_lock_lite(srcu_ctlp); WARN_ON_ONCE(idx & ~0x1); ret += idx << 2; @@ -719,11 +719,11 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) static void srcu_torture_read_unlock(int idx) { WARN_ON_ONCE((reader_flavor && (idx & ~reader_flavor)) || (!reader_flavor && (idx & ~0x1))); - if (reader_flavor & 0x4) + if (reader_flavor & SRCU_READ_FLAVOR_LITE) srcu_read_unlock_lite(srcu_ctlp, (idx & 0x4) >> 2); - if (reader_flavor & 0x2) + if (reader_flavor & SRCU_READ_FLAVOR_NMI) srcu_read_unlock_nmisafe(srcu_ctlp, (idx & 0x2) >> 1); - if ((reader_flavor & 0x1) || !(reader_flavor & 0x7)) + if ((reader_flavor & SRCU_READ_FLAVOR_NORMAL) || !(reader_flavor & SRCU_READ_FLAVOR_ALL)) srcu_read_unlock(srcu_ctlp, idx & 0x1); } From 049dfe96baf97228a9e98eaf50a8a7386ec7a483 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Oct 2024 16:57:38 +0200 Subject: [PATCH 21/31] rcu: Report callbacks enqueued on offline CPU blind spot Callbacks enqueued after rcutree_report_cpu_dead() fall into RCU barrier blind spot. Report any potential misuse. Reported-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ff98233d4aa5..24f1cb292a92 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3084,8 +3084,11 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) head->func = func; head->next = NULL; kasan_record_aux_stack_noalloc(head); + local_irq_save(flags); rdp = this_cpu_ptr(&rcu_data); + RCU_LOCKDEP_WARN(!rcu_rdp_cpu_online(rdp), "Callback enqueued on offline CPU!"); + lazy = lazy_in && !rcu_async_should_hurry(); /* Add the callback to our list. */ From d16e32f75f30d5228d9e0a3f6ca77b62c3a4383d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 25 Oct 2024 11:43:28 -0700 Subject: [PATCH 22/31] rcu: Make rcu_report_exp_cpu_mult() caller acquire lock There is a hard-to-trigger bug in the expedited grace-period computation whose fix requires that the __sync_rcu_exp_select_node_cpus() function to check that the grace-period sequence number has not changed before invoking rcu_report_exp_cpu_mult(). However, this check must be done while holding the leaf rcu_node structure's ->lock. This commit therefore prepares for that fix by moving this lock's acquisition from rcu_report_exp_cpu_mult() to its callers (all two of them). Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree_exp.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index fb664d3a01c9..581e88d39542 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -227,16 +227,16 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_node *rnp, bool wake) /* * Report expedited quiescent state for multiple CPUs, all covered by the - * specified leaf rcu_node structure. + * specified leaf rcu_node structure, which is acquired by the caller. */ -static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, +static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, unsigned long flags, unsigned long mask, bool wake) + __releases(rnp->lock) { int cpu; - unsigned long flags; struct rcu_data *rdp; - raw_spin_lock_irqsave_rcu_node(rnp, flags); + raw_lockdep_assert_held_rcu_node(rnp); if (!(rnp->expmask & mask)) { raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; @@ -257,8 +257,12 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, */ static void rcu_report_exp_rdp(struct rcu_data *rdp) { + unsigned long flags; + struct rcu_node *rnp = rdp->mynode; + WRITE_ONCE(rdp->cpu_no_qs.b.exp, false); - rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true); + raw_spin_lock_irqsave_rcu_node(rnp, flags); + rcu_report_exp_cpu_mult(rnp, flags, rdp->grpmask, true); } /* Common code for work-done checking. */ @@ -432,8 +436,10 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } /* Report quiescent states for those that went offline. */ - if (mask_ofl_test) - rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false); + if (mask_ofl_test) { + raw_spin_lock_irqsave_rcu_node(rnp, flags); + rcu_report_exp_cpu_mult(rnp, flags, mask_ofl_test, false); + } } static void rcu_exp_sel_wait_wake(unsigned long s); From e2bd168295e4eb719a343086baddfe0abca512a2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 25 Oct 2024 15:56:17 -0700 Subject: [PATCH 23/31] rcu: Move rcu_report_exp_rdp() setting of ->cpu_no_qs.b.exp under lock This commit reduces the state space of rcu_report_exp_rdp() by moving the setting of ->cpu_no_qs.b.exp under the rcu_node structure's ->lock. The lock isn't really all that important here, given that this per-CPU field is supposed to be written only by its CPU, but the disabling of interrupts excludes things like rcu_exp_handler(), which also can write to this same field. Avoiding this sort of interleaved access reduces the state space. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree_exp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 581e88d39542..5c4ea66cc00d 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -260,8 +260,8 @@ static void rcu_report_exp_rdp(struct rcu_data *rdp) unsigned long flags; struct rcu_node *rnp = rdp->mynode; - WRITE_ONCE(rdp->cpu_no_qs.b.exp, false); raw_spin_lock_irqsave_rcu_node(rnp, flags); + WRITE_ONCE(rdp->cpu_no_qs.b.exp, false); rcu_report_exp_cpu_mult(rnp, flags, rdp->grpmask, true); } From 6ae4c30fee05d97c4f53237e3cd13795a6f44422 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 25 Oct 2024 16:33:36 -0700 Subject: [PATCH 24/31] rcu: Replace open-coded rcu_exp_need_qs() from rcu_exp_handler() with call Currently, the preemptible implementation of rcu_exp_handler() almost open-codes rcu_exp_need_qs(). A call to that function would be shorter and would improve expediting in cases where rcu_exp_handler() interrupted a preemption-disabled or bh-disabled region of code. This commit therefore moves rcu_exp_need_qs() out of the non-preemptible leg of the enclosing #ifdef and replaces the open coding in preemptible rcu_exp_handler() with a call to rcu_exp_need_qs(). Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree_exp.h | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 5c4ea66cc00d..f3884393b947 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -718,6 +718,16 @@ static void rcu_exp_sel_wait_wake(unsigned long s) rcu_exp_wait_wake(s); } +/* Request an expedited quiescent state. */ +static void rcu_exp_need_qs(void) +{ + __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true); + /* Store .exp before .rcu_urgent_qs. */ + smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true); + set_tsk_need_resched(current); + set_preempt_need_resched(); +} + #ifdef CONFIG_PREEMPT_RCU /* @@ -742,13 +752,10 @@ static void rcu_exp_handler(void *unused) */ if (!depth) { if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) || - rcu_is_cpu_rrupt_from_idle()) { + rcu_is_cpu_rrupt_from_idle()) rcu_report_exp_rdp(rdp); - } else { - WRITE_ONCE(rdp->cpu_no_qs.b.exp, true); - set_tsk_need_resched(t); - set_preempt_need_resched(); - } + else + rcu_exp_need_qs(); return; } @@ -841,16 +848,6 @@ static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp) #else /* #ifdef CONFIG_PREEMPT_RCU */ -/* Request an expedited quiescent state. */ -static void rcu_exp_need_qs(void) -{ - __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true); - /* Store .exp before .rcu_urgent_qs. */ - smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true); - set_tsk_need_resched(current); - set_preempt_need_resched(); -} - /* Invoked on each online non-idle CPU for expedited quiescent state. */ static void rcu_exp_handler(void *unused) { From 7a323371197b184c8f0cff0d67d74f48f0098164 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 25 Oct 2024 17:17:16 -0700 Subject: [PATCH 25/31] rcu: Make preemptible rcu_exp_handler() check idempotency Although the non-preemptible implementation of rcu_exp_handler() contains checks to enforce idempotency, the preemptible version does not. The reason for this omission is that in preemptible kernels, there is no reporting of quiescent states from CPU hotplug notifiers, and thus no need for idempotency. In theory, anyway. In practice, accidents happen. This commit therefore adds checks under WARN_ON_ONCE() to catch any such accidents. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree_exp.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index f3884393b947..6985c998fe6b 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -746,7 +746,19 @@ static void rcu_exp_handler(void *unused) struct task_struct *t = current; /* - * First, the common case of not being in an RCU read-side + * First, is there no need for a quiescent state from this CPU, + * or is this CPU already looking for a quiescent state for the + * current grace period? If either is the case, just leave. + * However, this should not happen due to the preemptible + * sync_sched_exp_online_cleanup() implementation being a no-op, + * so warn if this does happen. + */ + if (WARN_ON_ONCE(!(READ_ONCE(rnp->expmask) & rdp->grpmask) || + READ_ONCE(rdp->cpu_no_qs.b.exp))) + return; + + /* + * Second, the common case of not being in an RCU read-side * critical section. If also enabled or idle, immediately * report the quiescent state, otherwise defer. */ @@ -760,7 +772,7 @@ static void rcu_exp_handler(void *unused) } /* - * Second, the less-common case of being in an RCU read-side + * Third, the less-common case of being in an RCU read-side * critical section. In this case we can count on a future * rcu_read_unlock(). However, this rcu_read_unlock() might * execute on some other CPU, but in that case there will be @@ -781,7 +793,7 @@ static void rcu_exp_handler(void *unused) return; } - // Finally, negative nesting depth should not happen. + // Fourth and finally, negative nesting depth should not happen. WARN_ON_ONCE(1); } From ecc5e6b0d3c982091b82615af21817ddf9cfdd60 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 25 Oct 2024 17:44:28 -0700 Subject: [PATCH 26/31] rcu: Add KCSAN exclusive-writer assertions for rdp->cpu_no_qs.b.exp The value of rdp->cpu_no_qs.b.exp may be changed only by the corresponding CPU, and that CPU is not even allowed to race with itself, for example, via interrupt handlers. This commit therefore adds KCSAN exclusive-writer assertions to check this constraint. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree_exp.h | 4 ++++ kernel/rcu/tree_plugin.h | 1 + 2 files changed, 5 insertions(+) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6985c998fe6b..ce5b09921d04 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -262,6 +262,7 @@ static void rcu_report_exp_rdp(struct rcu_data *rdp) raw_spin_lock_irqsave_rcu_node(rnp, flags); WRITE_ONCE(rdp->cpu_no_qs.b.exp, false); + ASSERT_EXCLUSIVE_WRITER(rdp->cpu_no_qs.b.exp); rcu_report_exp_cpu_mult(rnp, flags, rdp->grpmask, true); } @@ -721,6 +722,7 @@ static void rcu_exp_sel_wait_wake(unsigned long s) /* Request an expedited quiescent state. */ static void rcu_exp_need_qs(void) { + ASSERT_EXCLUSIVE_WRITER_SCOPED(*this_cpu_ptr(&rcu_data.cpu_no_qs.b.exp)); __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true); /* Store .exp before .rcu_urgent_qs. */ smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true); @@ -753,6 +755,7 @@ static void rcu_exp_handler(void *unused) * sync_sched_exp_online_cleanup() implementation being a no-op, * so warn if this does happen. */ + ASSERT_EXCLUSIVE_WRITER_SCOPED(rdp->cpu_no_qs.b.exp); if (WARN_ON_ONCE(!(READ_ONCE(rnp->expmask) & rdp->grpmask) || READ_ONCE(rdp->cpu_no_qs.b.exp))) return; @@ -867,6 +870,7 @@ static void rcu_exp_handler(void *unused) struct rcu_node *rnp = rdp->mynode; bool preempt_bh_enabled = !(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)); + ASSERT_EXCLUSIVE_WRITER_SCOPED(rdp->cpu_no_qs.b.exp); if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) return; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 3927ea5f7955..bb7ca6eb9ef0 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -275,6 +275,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) rcu_report_exp_rdp(rdp); else WARN_ON_ONCE(rdp->cpu_no_qs.b.exp); + ASSERT_EXCLUSIVE_WRITER_SCOPED(rdp->cpu_no_qs.b.exp); } /* From 1bb03ad383a7311f609ecfd13c3aaab248c0627f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 25 Oct 2024 19:56:20 -0700 Subject: [PATCH 27/31] rcu: Add lockdep_assert_irqs_disabled() to rcu_exp_need_qs() Callers to rcu_exp_need_qs() are supposed to disable interrupts, so this commit enlists lockdep's aid in checking this. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree_exp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index ce5b09921d04..77efed89c79e 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -722,6 +722,7 @@ static void rcu_exp_sel_wait_wake(unsigned long s) /* Request an expedited quiescent state. */ static void rcu_exp_need_qs(void) { + lockdep_assert_irqs_disabled(); ASSERT_EXCLUSIVE_WRITER_SCOPED(*this_cpu_ptr(&rcu_data.cpu_no_qs.b.exp)); __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true); /* Store .exp before .rcu_urgent_qs. */ From 8f6f1636e34531477f0a44df39943e79f89d83c2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Oct 2024 12:22:54 -0700 Subject: [PATCH 28/31] MAINTAINERS: Update RCU git tree RCU now has a shiny new shared git tree, so this commit updates MAINTAINERS to point to it instead of my personal -rcu tree. Reported-by: Song Liu Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- MAINTAINERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 17daa9ee9384..6ff7ba6cc270 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13301,7 +13301,7 @@ L: linux-kernel@vger.kernel.org L: linux-arch@vger.kernel.org L: lkmm@lists.linux.dev S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev F: Documentation/atomic_bitops.txt F: Documentation/atomic_t.txt F: Documentation/core-api/refcount-vs-atomic.rst @@ -19603,7 +19603,7 @@ R: Mathieu Desnoyers R: Lai Jiangshan L: rcu@vger.kernel.org S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev F: tools/testing/selftests/rcutorture RDACM20 Camera Sensor @@ -19682,7 +19682,7 @@ R: Zqiang L: rcu@vger.kernel.org S: Supported W: http://www.rdrop.com/users/paulmck/RCU/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev F: Documentation/RCU/ F: include/linux/rcu* F: kernel/rcu/ @@ -21578,7 +21578,7 @@ R: Mathieu Desnoyers L: rcu@vger.kernel.org S: Supported W: http://www.rdrop.com/users/paulmck/RCU/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev F: include/linux/srcu*.h F: kernel/rcu/srcu*.c @@ -23707,7 +23707,7 @@ M: "Paul E. McKenney" M: Josh Triplett L: linux-kernel@vger.kernel.org S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev F: Documentation/RCU/torture.rst F: kernel/locking/locktorture.c F: kernel/rcu/rcuscale.c From d465492a224b2409508224cf6970d7b97e2285cc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 21 Oct 2024 15:09:39 -0700 Subject: [PATCH 29/31] srcu: Guarantee non-negative return value from srcu_read_lock() For almost 20 years, the int return value from srcu_read_lock() has been always either zero or one. This commit therefore documents the fact that it will be non-negative, and does the same for the underlying __srcu_read_lock(). [ paulmck: Apply Andrii Nakryiko feedback. ] Signed-off-by: Paul E. McKenney Acked-by: Andrii Nakryiko Acked-by: Peter Zijlstra (Intel) Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/srcu.h | 15 ++++++++------- kernel/rcu/srcutree.c | 3 ++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 08339eb8a01c..abaddd7e6ddf 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -232,13 +232,14 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * a mutex that is held elsewhere while calling synchronize_srcu() or * synchronize_srcu_expedited(). * - * The return value from srcu_read_lock() must be passed unaltered - * to the matching srcu_read_unlock(). Note that srcu_read_lock() and - * the matching srcu_read_unlock() must occur in the same context, for - * example, it is illegal to invoke srcu_read_unlock() in an irq handler - * if the matching srcu_read_lock() was invoked in process context. Or, - * for that matter to invoke srcu_read_unlock() from one task and the - * matching srcu_read_lock() from another. + * The return value from srcu_read_lock() is guaranteed to be + * non-negative. This value must be passed unaltered to the matching + * srcu_read_unlock(). Note that srcu_read_lock() and the matching + * srcu_read_unlock() must occur in the same context, for example, it is + * illegal to invoke srcu_read_unlock() in an irq handler if the matching + * srcu_read_lock() was invoked in process context. Or, for that matter to + * invoke srcu_read_unlock() from one task and the matching srcu_read_lock() + * from another. */ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 5e2e53464794..26ef58b481aa 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -738,7 +738,8 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor); /* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. - * Returns an index that must be passed to the matching srcu_read_unlock(). + * Returns a guaranteed non-negative index that must be passed to the + * matching __srcu_read_unlock(). */ int __srcu_read_lock(struct srcu_struct *ssp) { From cfb07b07dda2a17feed96c80c5af85937fcd2e9c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Nov 2024 16:53:53 -0800 Subject: [PATCH 30/31] srcu: Fix typo s/srcu_check_read_flavor()/__srcu_check_read_flavor()/ This commit fixes a typo in which a comment needed to have been updated from srcu_check_read_flavor() to __srcu_check_read_flavor(). Reported-by: Neeraj Upadhyay Closes: https://lore.kernel.org/all/b75d1fcd-6fcd-4619-bb5c-507fa599ee28@amd.com/ Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/srcutree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 490aeecc6bb4..4e69f88bcab9 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -258,7 +258,7 @@ static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp) if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)) return; - // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered. + // Note that the cmpxchg() in __srcu_check_read_flavor() is fully ordered. __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); } From 45c7c67643ae6391a354e42cf729d807fe341491 Mon Sep 17 00:00:00 2001 From: Feng Lee <379943137@qq.com> Date: Tue, 19 Nov 2024 15:29:49 +0800 Subject: [PATCH 31/31] srcu: Remove redundant GP sequence checks in srcu_funnel_gp_start We will perform GP sequence checking at the beginning of srcu_gp_start, thus making it safe to remove duplicate GP sequence checks prior to calling srcu_gp_start. Signed-off-by: Feng Lee <379943137@qq.com> Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/srcutree.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 26ef58b481aa..b83c74c4dcc0 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1077,7 +1077,6 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, /* If grace period not already in progress, start it. */ if (!WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && rcu_seq_state(sup->srcu_gp_seq) == SRCU_STATE_IDLE) { - WARN_ON_ONCE(ULONG_CMP_GE(sup->srcu_gp_seq, sup->srcu_gp_seq_needed)); srcu_gp_start(ssp); // And how can that list_add() in the "else" clause