stop_machine: Add function and caller debug info

Crashes in stop-machine are hard to connect to the calling code, add a
little something to help with that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Link: https://lkml.kernel.org/r/20201023102346.116513635@infradead.org
This commit is contained in:
Peter Zijlstra 2020-09-21 12:58:17 +02:00
parent 23859ae444
commit a8b62fd085
4 changed files with 32 additions and 3 deletions

View File

@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg);
struct cpu_stop_work { struct cpu_stop_work {
struct list_head list; /* cpu_stopper->works */ struct list_head list; /* cpu_stopper->works */
cpu_stop_fn_t fn; cpu_stop_fn_t fn;
unsigned long caller;
void *arg; void *arg;
struct cpu_stop_done *done; struct cpu_stop_done *done;
}; };
@ -36,6 +37,8 @@ void stop_machine_park(int cpu);
void stop_machine_unpark(int cpu); void stop_machine_unpark(int cpu);
void stop_machine_yield(const struct cpumask *cpumask); void stop_machine_yield(const struct cpumask *cpumask);
extern void print_stop_info(const char *log_lvl, struct task_struct *task);
#else /* CONFIG_SMP */ #else /* CONFIG_SMP */
#include <linux/workqueue.h> #include <linux/workqueue.h>
@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu,
return false; return false;
} }
static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
/* /*

View File

@ -6447,6 +6447,7 @@ void sched_show_task(struct task_struct *p)
(unsigned long)task_thread_info(p)->flags); (unsigned long)task_thread_info(p)->flags);
print_worker_info(KERN_INFO, p); print_worker_info(KERN_INFO, p);
print_stop_info(KERN_INFO, p);
show_stack(p, NULL, KERN_INFO); show_stack(p, NULL, KERN_INFO);
put_task_stack(p); put_task_stack(p);
} }

View File

@ -42,11 +42,27 @@ struct cpu_stopper {
struct list_head works; /* list of pending works */ struct list_head works; /* list of pending works */
struct cpu_stop_work stop_work; /* for stop_cpus */ struct cpu_stop_work stop_work; /* for stop_cpus */
unsigned long caller;
cpu_stop_fn_t fn;
}; };
static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
static bool stop_machine_initialized = false; static bool stop_machine_initialized = false;
void print_stop_info(const char *log_lvl, struct task_struct *task)
{
/*
* If @task is a stopper task, it cannot migrate and task_cpu() is
* stable.
*/
struct cpu_stopper *stopper = per_cpu_ptr(&cpu_stopper, task_cpu(task));
if (task != stopper->thread)
return;
printk("%sStopper: %pS <- %pS\n", log_lvl, stopper->fn, (void *)stopper->caller);
}
/* static data for stop_cpus */ /* static data for stop_cpus */
static DEFINE_MUTEX(stop_cpus_mutex); static DEFINE_MUTEX(stop_cpus_mutex);
static bool stop_cpus_in_progress; static bool stop_cpus_in_progress;
@ -123,7 +139,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
{ {
struct cpu_stop_done done; struct cpu_stop_done done;
struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done, .caller = _RET_IP_ };
cpu_stop_init_done(&done, 1); cpu_stop_init_done(&done, 1);
if (!cpu_stop_queue_work(cpu, &work)) if (!cpu_stop_queue_work(cpu, &work))
@ -331,7 +347,8 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
work1 = work2 = (struct cpu_stop_work){ work1 = work2 = (struct cpu_stop_work){
.fn = multi_cpu_stop, .fn = multi_cpu_stop,
.arg = &msdata, .arg = &msdata,
.done = &done .done = &done,
.caller = _RET_IP_,
}; };
cpu_stop_init_done(&done, 2); cpu_stop_init_done(&done, 2);
@ -367,7 +384,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf) struct cpu_stop_work *work_buf)
{ {
*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, .caller = _RET_IP_, };
return cpu_stop_queue_work(cpu, work_buf); return cpu_stop_queue_work(cpu, work_buf);
} }
@ -487,6 +504,8 @@ repeat:
int ret; int ret;
/* cpu stop callbacks must not sleep, make in_atomic() == T */ /* cpu stop callbacks must not sleep, make in_atomic() == T */
stopper->caller = work->caller;
stopper->fn = fn;
preempt_count_inc(); preempt_count_inc();
ret = fn(arg); ret = fn(arg);
if (done) { if (done) {
@ -495,6 +514,8 @@ repeat:
cpu_stop_signal_done(done); cpu_stop_signal_done(done);
} }
preempt_count_dec(); preempt_count_dec();
stopper->fn = NULL;
stopper->caller = 0;
WARN_ONCE(preempt_count(), WARN_ONCE(preempt_count(),
"cpu_stop: %ps(%p) leaked preempt count\n", fn, arg); "cpu_stop: %ps(%p) leaked preempt count\n", fn, arg);
goto repeat; goto repeat;

View File

@ -12,6 +12,7 @@
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/kexec.h> #include <linux/kexec.h>
#include <linux/utsname.h> #include <linux/utsname.h>
#include <linux/stop_machine.h>
static char dump_stack_arch_desc_str[128]; static char dump_stack_arch_desc_str[128];
@ -57,6 +58,7 @@ void dump_stack_print_info(const char *log_lvl)
log_lvl, dump_stack_arch_desc_str); log_lvl, dump_stack_arch_desc_str);
print_worker_info(log_lvl, current); print_worker_info(log_lvl, current);
print_stop_info(log_lvl, current);
} }
/** /**