2018-08-16 15:23:53 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2008-12-24 04:24:12 +00:00
|
|
|
/*
|
|
|
|
* trace_output.c
|
|
|
|
*
|
|
|
|
* Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/mutex.h>
|
|
|
|
#include <linux/ftrace.h>
|
2021-09-14 14:42:40 +00:00
|
|
|
#include <linux/kprobes.h>
|
2017-02-01 15:36:40 +00:00
|
|
|
#include <linux/sched/clock.h>
|
2017-02-08 17:51:29 +00:00
|
|
|
#include <linux/sched/mm.h>
|
2022-11-10 02:03:19 +00:00
|
|
|
#include <linux/idr.h>
|
2008-12-24 04:24:12 +00:00
|
|
|
|
|
|
|
#include "trace_output.h"
|
|
|
|
|
|
|
|
/* must be a power of 2 */
|
|
|
|
#define EVENT_HASHSIZE 128
|
|
|
|
|
2013-03-11 07:14:03 +00:00
|
|
|
DECLARE_RWSEM(trace_event_sem);
|
2009-05-26 18:25:22 +00:00
|
|
|
|
2008-12-24 04:24:12 +00:00
|
|
|
static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
|
|
|
|
|
2013-03-09 02:02:34 +00:00
|
|
|
enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter)
|
|
|
|
{
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct trace_entry *entry = iter->ent;
|
|
|
|
struct bputs_entry *field;
|
|
|
|
|
|
|
|
trace_assign_type(field, entry);
|
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_puts(s, field->str);
|
2013-03-09 02:02:34 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2013-03-09 02:02:34 +00:00
|
|
|
}
|
|
|
|
|
2009-03-19 16:20:38 +00:00
|
|
|
enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
|
|
|
|
{
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct trace_entry *entry = iter->ent;
|
|
|
|
struct bprint_entry *field;
|
|
|
|
|
|
|
|
trace_assign_type(field, entry);
|
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_bprintf(s, field->fmt, field->buf);
|
2009-03-19 16:20:38 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2009-03-19 16:20:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
|
|
|
|
{
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct trace_entry *entry = iter->ent;
|
|
|
|
struct print_entry *field;
|
|
|
|
|
|
|
|
trace_assign_type(field, entry);
|
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_puts(s, field->buf);
|
2009-03-19 16:20:38 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2009-03-19 16:20:38 +00:00
|
|
|
}
|
|
|
|
|
2009-05-26 18:25:22 +00:00
|
|
|
const char *
|
2015-05-04 22:12:44 +00:00
|
|
|
trace_print_flags_seq(struct trace_seq *p, const char *delim,
|
|
|
|
unsigned long flags,
|
|
|
|
const struct trace_print_flags *flag_array)
|
2009-05-26 18:25:22 +00:00
|
|
|
{
|
|
|
|
unsigned long mask;
|
|
|
|
const char *str;
|
2014-06-26 13:42:41 +00:00
|
|
|
const char *ret = trace_seq_buffer_ptr(p);
|
2012-02-19 11:16:07 +00:00
|
|
|
int i, first = 1;
|
2009-05-26 18:25:22 +00:00
|
|
|
|
|
|
|
for (i = 0; flag_array[i].name && flags; i++) {
|
|
|
|
|
|
|
|
mask = flag_array[i].mask;
|
|
|
|
if ((flags & mask) != mask)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
str = flag_array[i].name;
|
|
|
|
flags &= ~mask;
|
2012-02-19 11:16:07 +00:00
|
|
|
if (!first && delim)
|
2009-05-26 18:25:22 +00:00
|
|
|
trace_seq_puts(p, delim);
|
2012-02-19 11:16:07 +00:00
|
|
|
else
|
|
|
|
first = 0;
|
2009-05-26 18:25:22 +00:00
|
|
|
trace_seq_puts(p, str);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check for left over flags */
|
|
|
|
if (flags) {
|
2012-02-21 01:37:32 +00:00
|
|
|
if (!first && delim)
|
2009-05-26 18:25:22 +00:00
|
|
|
trace_seq_puts(p, delim);
|
|
|
|
trace_seq_printf(p, "0x%lx", flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_seq_putc(p, 0);
|
|
|
|
|
2009-06-03 13:52:03 +00:00
|
|
|
return ret;
|
2009-05-26 18:25:22 +00:00
|
|
|
}
|
2015-05-04 22:12:44 +00:00
|
|
|
EXPORT_SYMBOL(trace_print_flags_seq);
|
2009-05-26 18:25:22 +00:00
|
|
|
|
2009-05-20 23:21:47 +00:00
|
|
|
const char *
|
2015-05-04 22:12:44 +00:00
|
|
|
trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
|
|
|
|
const struct trace_print_flags *symbol_array)
|
2009-05-20 23:21:47 +00:00
|
|
|
{
|
|
|
|
int i;
|
2014-06-26 13:42:41 +00:00
|
|
|
const char *ret = trace_seq_buffer_ptr(p);
|
2009-05-20 23:21:47 +00:00
|
|
|
|
|
|
|
for (i = 0; symbol_array[i].name; i++) {
|
|
|
|
|
|
|
|
if (val != symbol_array[i].mask)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
trace_seq_puts(p, symbol_array[i].name);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-06-26 13:42:41 +00:00
|
|
|
if (ret == (const char *)(trace_seq_buffer_ptr(p)))
|
2009-05-20 23:21:47 +00:00
|
|
|
trace_seq_printf(p, "0x%lx", val);
|
2014-11-24 00:34:19 +00:00
|
|
|
|
2009-05-20 23:21:47 +00:00
|
|
|
trace_seq_putc(p, 0);
|
|
|
|
|
2009-06-03 13:52:03 +00:00
|
|
|
return ret;
|
2009-05-20 23:21:47 +00:00
|
|
|
}
|
2015-05-04 22:12:44 +00:00
|
|
|
EXPORT_SYMBOL(trace_print_symbols_seq);
|
2009-05-20 23:21:47 +00:00
|
|
|
|
2011-04-19 01:35:28 +00:00
|
|
|
#if BITS_PER_LONG == 32
|
2017-02-22 23:39:47 +00:00
|
|
|
const char *
|
|
|
|
trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
|
|
|
|
unsigned long long flags,
|
|
|
|
const struct trace_print_flags_u64 *flag_array)
|
|
|
|
{
|
|
|
|
unsigned long long mask;
|
|
|
|
const char *str;
|
|
|
|
const char *ret = trace_seq_buffer_ptr(p);
|
|
|
|
int i, first = 1;
|
|
|
|
|
|
|
|
for (i = 0; flag_array[i].name && flags; i++) {
|
|
|
|
|
|
|
|
mask = flag_array[i].mask;
|
|
|
|
if ((flags & mask) != mask)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
str = flag_array[i].name;
|
|
|
|
flags &= ~mask;
|
|
|
|
if (!first && delim)
|
|
|
|
trace_seq_puts(p, delim);
|
|
|
|
else
|
|
|
|
first = 0;
|
|
|
|
trace_seq_puts(p, str);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check for left over flags */
|
|
|
|
if (flags) {
|
|
|
|
if (!first && delim)
|
|
|
|
trace_seq_puts(p, delim);
|
|
|
|
trace_seq_printf(p, "0x%llx", flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_seq_putc(p, 0);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(trace_print_flags_seq_u64);
|
|
|
|
|
2011-04-19 01:35:28 +00:00
|
|
|
const char *
|
2015-05-04 22:12:44 +00:00
|
|
|
trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
|
2011-04-19 01:35:28 +00:00
|
|
|
const struct trace_print_flags_u64 *symbol_array)
|
|
|
|
{
|
|
|
|
int i;
|
2014-06-26 13:42:41 +00:00
|
|
|
const char *ret = trace_seq_buffer_ptr(p);
|
2011-04-19 01:35:28 +00:00
|
|
|
|
|
|
|
for (i = 0; symbol_array[i].name; i++) {
|
|
|
|
|
|
|
|
if (val != symbol_array[i].mask)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
trace_seq_puts(p, symbol_array[i].name);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-06-26 13:42:41 +00:00
|
|
|
if (ret == (const char *)(trace_seq_buffer_ptr(p)))
|
2011-04-19 01:35:28 +00:00
|
|
|
trace_seq_printf(p, "0x%llx", val);
|
|
|
|
|
|
|
|
trace_seq_putc(p, 0);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2015-05-04 22:12:44 +00:00
|
|
|
EXPORT_SYMBOL(trace_print_symbols_seq_u64);
|
2011-04-19 01:35:28 +00:00
|
|
|
#endif
|
|
|
|
|
tracing: Add __bitmask() macro to trace events to cpumasks and other bitmasks
Being able to show a cpumask of events can be useful as some events
may affect only some CPUs. There is no standard way to record the
cpumask and converting it to a string is rather expensive during
the trace as traces happen in hotpaths. It would be better to record
the raw event mask and be able to parse it at print time.
The following macros were added for use with the TRACE_EVENT() macro:
__bitmask()
__assign_bitmask()
__get_bitmask()
To test this, I added this to the sched_migrate_task event, which
looked like this:
TRACE_EVENT(sched_migrate_task,
TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus),
TP_ARGS(p, dest_cpu, cpus),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
__field( int, orig_cpu )
__field( int, dest_cpu )
__bitmask( cpumask, num_possible_cpus() )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio;
__entry->orig_cpu = task_cpu(p);
__entry->dest_cpu = dest_cpu;
__assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus());
),
TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s",
__entry->comm, __entry->pid, __entry->prio,
__entry->orig_cpu, __entry->dest_cpu,
__get_bitmask(cpumask))
);
With the output of:
ksmtuned-3613 [003] d..2 485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f
migration/1-13 [001] d..5 485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f
awk-3615 [002] d.H5 485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff
migration/2-18 [002] d..5 485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f
Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com
Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home
Suggested-by: Javi Merino <javi.merino@arm.com>
Tested-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2014-05-06 17:10:24 +00:00
|
|
|
const char *
|
2015-05-04 22:12:44 +00:00
|
|
|
trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
|
|
|
|
unsigned int bitmask_size)
|
tracing: Add __bitmask() macro to trace events to cpumasks and other bitmasks
Being able to show a cpumask of events can be useful as some events
may affect only some CPUs. There is no standard way to record the
cpumask and converting it to a string is rather expensive during
the trace as traces happen in hotpaths. It would be better to record
the raw event mask and be able to parse it at print time.
The following macros were added for use with the TRACE_EVENT() macro:
__bitmask()
__assign_bitmask()
__get_bitmask()
To test this, I added this to the sched_migrate_task event, which
looked like this:
TRACE_EVENT(sched_migrate_task,
TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus),
TP_ARGS(p, dest_cpu, cpus),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
__field( int, orig_cpu )
__field( int, dest_cpu )
__bitmask( cpumask, num_possible_cpus() )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio;
__entry->orig_cpu = task_cpu(p);
__entry->dest_cpu = dest_cpu;
__assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus());
),
TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s",
__entry->comm, __entry->pid, __entry->prio,
__entry->orig_cpu, __entry->dest_cpu,
__get_bitmask(cpumask))
);
With the output of:
ksmtuned-3613 [003] d..2 485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f
migration/1-13 [001] d..5 485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f
awk-3615 [002] d.H5 485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff
migration/2-18 [002] d..5 485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f
Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com
Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home
Suggested-by: Javi Merino <javi.merino@arm.com>
Tested-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2014-05-06 17:10:24 +00:00
|
|
|
{
|
2014-06-26 13:42:41 +00:00
|
|
|
const char *ret = trace_seq_buffer_ptr(p);
|
tracing: Add __bitmask() macro to trace events to cpumasks and other bitmasks
Being able to show a cpumask of events can be useful as some events
may affect only some CPUs. There is no standard way to record the
cpumask and converting it to a string is rather expensive during
the trace as traces happen in hotpaths. It would be better to record
the raw event mask and be able to parse it at print time.
The following macros were added for use with the TRACE_EVENT() macro:
__bitmask()
__assign_bitmask()
__get_bitmask()
To test this, I added this to the sched_migrate_task event, which
looked like this:
TRACE_EVENT(sched_migrate_task,
TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus),
TP_ARGS(p, dest_cpu, cpus),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
__field( int, orig_cpu )
__field( int, dest_cpu )
__bitmask( cpumask, num_possible_cpus() )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio;
__entry->orig_cpu = task_cpu(p);
__entry->dest_cpu = dest_cpu;
__assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus());
),
TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s",
__entry->comm, __entry->pid, __entry->prio,
__entry->orig_cpu, __entry->dest_cpu,
__get_bitmask(cpumask))
);
With the output of:
ksmtuned-3613 [003] d..2 485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f
migration/1-13 [001] d..5 485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f
awk-3615 [002] d.H5 485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff
migration/2-18 [002] d..5 485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f
Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com
Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home
Suggested-by: Javi Merino <javi.merino@arm.com>
Tested-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2014-05-06 17:10:24 +00:00
|
|
|
|
|
|
|
trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8);
|
|
|
|
trace_seq_putc(p, 0);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2015-05-04 22:12:44 +00:00
|
|
|
EXPORT_SYMBOL_GPL(trace_print_bitmask_seq);
|
tracing: Add __bitmask() macro to trace events to cpumasks and other bitmasks
Being able to show a cpumask of events can be useful as some events
may affect only some CPUs. There is no standard way to record the
cpumask and converting it to a string is rather expensive during
the trace as traces happen in hotpaths. It would be better to record
the raw event mask and be able to parse it at print time.
The following macros were added for use with the TRACE_EVENT() macro:
__bitmask()
__assign_bitmask()
__get_bitmask()
To test this, I added this to the sched_migrate_task event, which
looked like this:
TRACE_EVENT(sched_migrate_task,
TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus),
TP_ARGS(p, dest_cpu, cpus),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
__field( int, orig_cpu )
__field( int, dest_cpu )
__bitmask( cpumask, num_possible_cpus() )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio;
__entry->orig_cpu = task_cpu(p);
__entry->dest_cpu = dest_cpu;
__assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus());
),
TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s",
__entry->comm, __entry->pid, __entry->prio,
__entry->orig_cpu, __entry->dest_cpu,
__get_bitmask(cpumask))
);
With the output of:
ksmtuned-3613 [003] d..2 485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f
migration/1-13 [001] d..5 485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f
awk-3615 [002] d.H5 485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff
migration/2-18 [002] d..5 485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f
Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com
Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home
Suggested-by: Javi Merino <javi.merino@arm.com>
Tested-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2014-05-06 17:10:24 +00:00
|
|
|
|
2017-02-02 16:09:54 +00:00
|
|
|
/**
|
|
|
|
* trace_print_hex_seq - print buffer as hex sequence
|
|
|
|
* @p: trace seq struct to write to
|
|
|
|
* @buf: The buffer to print
|
|
|
|
* @buf_len: Length of @buf in bytes
|
|
|
|
* @concatenate: Print @buf as single hex string or with spacing
|
|
|
|
*
|
|
|
|
* Prints the passed buffer as a hex sequence either as a whole,
|
|
|
|
* single hex string if @concatenate is true or with spacing after
|
|
|
|
* each byte in case @concatenate is false.
|
|
|
|
*/
|
2010-04-01 11:40:58 +00:00
|
|
|
const char *
|
2017-01-25 01:28:16 +00:00
|
|
|
trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len,
|
2017-02-02 16:09:54 +00:00
|
|
|
bool concatenate)
|
2010-04-01 11:40:58 +00:00
|
|
|
{
|
|
|
|
int i;
|
2014-06-26 13:42:41 +00:00
|
|
|
const char *ret = trace_seq_buffer_ptr(p);
|
2019-08-06 15:15:43 +00:00
|
|
|
const char *fmt = concatenate ? "%*phN" : "%*ph";
|
2010-04-01 11:40:58 +00:00
|
|
|
|
2023-04-26 03:22:57 +00:00
|
|
|
for (i = 0; i < buf_len; i += 16) {
|
|
|
|
if (!concatenate && i != 0)
|
|
|
|
trace_seq_putc(p, ' ');
|
2019-08-06 15:15:43 +00:00
|
|
|
trace_seq_printf(p, fmt, min(buf_len - i, 16), &buf[i]);
|
2023-04-26 03:22:57 +00:00
|
|
|
}
|
2010-04-01 11:40:58 +00:00
|
|
|
trace_seq_putc(p, 0);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2015-05-04 22:12:44 +00:00
|
|
|
EXPORT_SYMBOL(trace_print_hex_seq);
|
2010-04-01 11:40:58 +00:00
|
|
|
|
2015-01-28 12:48:53 +00:00
|
|
|
const char *
|
2015-05-04 22:12:44 +00:00
|
|
|
trace_print_array_seq(struct trace_seq *p, const void *buf, int count,
|
|
|
|
size_t el_size)
|
2015-01-28 12:48:53 +00:00
|
|
|
{
|
|
|
|
const char *ret = trace_seq_buffer_ptr(p);
|
|
|
|
const char *prefix = "";
|
|
|
|
void *ptr = (void *)buf;
|
2015-04-29 15:18:46 +00:00
|
|
|
size_t buf_len = count * el_size;
|
2015-01-28 12:48:53 +00:00
|
|
|
|
|
|
|
trace_seq_putc(p, '{');
|
|
|
|
|
|
|
|
while (ptr < buf + buf_len) {
|
|
|
|
switch (el_size) {
|
|
|
|
case 1:
|
|
|
|
trace_seq_printf(p, "%s0x%x", prefix,
|
|
|
|
*(u8 *)ptr);
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
trace_seq_printf(p, "%s0x%x", prefix,
|
|
|
|
*(u16 *)ptr);
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
trace_seq_printf(p, "%s0x%x", prefix,
|
|
|
|
*(u32 *)ptr);
|
|
|
|
break;
|
|
|
|
case 8:
|
|
|
|
trace_seq_printf(p, "%s0x%llx", prefix,
|
|
|
|
*(u64 *)ptr);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
trace_seq_printf(p, "BAD SIZE:%zu 0x%x", el_size,
|
|
|
|
*(u8 *)ptr);
|
|
|
|
el_size = 1;
|
|
|
|
}
|
|
|
|
prefix = ",";
|
|
|
|
ptr += el_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_seq_putc(p, '}');
|
|
|
|
trace_seq_putc(p, 0);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2015-05-04 22:12:44 +00:00
|
|
|
EXPORT_SYMBOL(trace_print_array_seq);
|
2015-01-28 12:48:53 +00:00
|
|
|
|
2019-11-07 12:45:38 +00:00
|
|
|
const char *
|
|
|
|
trace_print_hex_dump_seq(struct trace_seq *p, const char *prefix_str,
|
|
|
|
int prefix_type, int rowsize, int groupsize,
|
|
|
|
const void *buf, size_t len, bool ascii)
|
|
|
|
{
|
|
|
|
const char *ret = trace_seq_buffer_ptr(p);
|
|
|
|
|
|
|
|
trace_seq_putc(p, '\n');
|
|
|
|
trace_seq_hex_dump(p, prefix_str, prefix_type,
|
|
|
|
rowsize, groupsize, buf, len, ascii);
|
|
|
|
trace_seq_putc(p, 0);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(trace_print_hex_dump_seq);
|
|
|
|
|
2015-05-05 18:18:11 +00:00
|
|
|
int trace_raw_output_prep(struct trace_iterator *iter,
|
|
|
|
struct trace_event *trace_event)
|
2013-02-21 02:32:38 +00:00
|
|
|
{
|
2015-05-05 15:45:27 +00:00
|
|
|
struct trace_event_call *event;
|
2013-02-21 02:32:38 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct trace_seq *p = &iter->tmp_seq;
|
|
|
|
struct trace_entry *entry;
|
|
|
|
|
2015-05-05 15:45:27 +00:00
|
|
|
event = container_of(trace_event, struct trace_event_call, event);
|
2013-02-21 02:32:38 +00:00
|
|
|
entry = iter->ent;
|
|
|
|
|
|
|
|
if (entry->type != event->event.type) {
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
return TRACE_TYPE_UNHANDLED;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_seq_init(p);
|
2015-05-13 18:20:14 +00:00
|
|
|
trace_seq_printf(s, "%s: ", trace_event_name(event));
|
2014-11-12 15:29:54 +00:00
|
|
|
|
2014-11-14 16:42:06 +00:00
|
|
|
return trace_handle_return(s);
|
2013-02-21 02:32:38 +00:00
|
|
|
}
|
2015-05-05 18:18:11 +00:00
|
|
|
EXPORT_SYMBOL(trace_raw_output_prep);
|
2013-02-21 02:32:38 +00:00
|
|
|
|
2020-10-15 14:55:07 +00:00
|
|
|
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...)
|
|
|
|
{
|
tracing: Check "%s" dereference via the field and not the TP_printk format
The TP_printk() portion of a trace event is executed at the time a event
is read from the trace. This can happen seconds, minutes, hours, days,
months, years possibly later since the event was recorded. If the print
format contains a dereference to a string via "%s", and that string was
allocated, there's a chance that string could be freed before it is read
by the trace file.
To protect against such bugs, there are two functions that verify the
event. The first one is test_event_printk(), which is called when the
event is created. It reads the TP_printk() format as well as its arguments
to make sure nothing may be dereferencing a pointer that was not copied
into the ring buffer along with the event. If it is, it will trigger a
WARN_ON().
For strings that use "%s", it is not so easy. The string may not reside in
the ring buffer but may still be valid. Strings that are static and part
of the kernel proper which will not be freed for the life of the running
system, are safe to dereference. But to know if it is a pointer to a
static string or to something on the heap can not be determined until the
event is triggered.
This brings us to the second function that tests for the bad dereferencing
of strings, trace_check_vprintf(). It would walk through the printf format
looking for "%s", and when it finds it, it would validate that the pointer
is safe to read. If not, it would produces a WARN_ON() as well and write
into the ring buffer "[UNSAFE-MEMORY]".
The problem with this is how it used va_list to have vsnprintf() handle
all the cases that it didn't need to check. Instead of re-implementing
vsnprintf(), it would make a copy of the format up to the %s part, and
call vsnprintf() with the current va_list ap variable, where the ap would
then be ready to point at the string in question.
For architectures that passed va_list by reference this was possible. For
architectures that passed it by copy it was not. A test_can_verify()
function was used to differentiate between the two, and if it wasn't
possible, it would disable it.
Even for architectures where this was feasible, it was a stretch to rely
on such a method that is undocumented, and could cause issues later on
with new optimizations of the compiler.
Instead, the first function test_event_printk() was updated to look at
"%s" as well. If the "%s" argument is a pointer outside the event in the
ring buffer, it would find the field type of the event that is the problem
and mark the structure with a new flag called "needs_test". The event
itself will be marked by TRACE_EVENT_FL_TEST_STR to let it be known that
this event has a field that needs to be verified before the event can be
printed using the printf format.
When the event fields are created from the field type structure, the
fields would copy the field type's "needs_test" value.
Finally, before being printed, a new function ignore_event() is called
which will check if the event has the TEST_STR flag set (if not, it
returns false). If the flag is set, it then iterates through the events
fields looking for the ones that have the "needs_test" flag set.
Then it uses the offset field from the field structure to find the pointer
in the ring buffer event. It runs the tests to make sure that pointer is
safe to print and if not, it triggers the WARN_ON() and also adds to the
trace output that the event in question has an unsafe memory access.
The ignore_event() makes the trace_check_vprintf() obsolete so it is
removed.
Link: https://lore.kernel.org/all/CAHk-=wh3uOnqnZPpR0PeLZZtyWbZLboZ7cHLCKRWsocvs9Y7hQ@mail.gmail.com/
Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/20241217024720.848621576@goodmis.org
Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2024-12-17 02:41:22 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
2020-10-15 14:55:07 +00:00
|
|
|
va_list ap;
|
|
|
|
|
tracing: Check "%s" dereference via the field and not the TP_printk format
The TP_printk() portion of a trace event is executed at the time a event
is read from the trace. This can happen seconds, minutes, hours, days,
months, years possibly later since the event was recorded. If the print
format contains a dereference to a string via "%s", and that string was
allocated, there's a chance that string could be freed before it is read
by the trace file.
To protect against such bugs, there are two functions that verify the
event. The first one is test_event_printk(), which is called when the
event is created. It reads the TP_printk() format as well as its arguments
to make sure nothing may be dereferencing a pointer that was not copied
into the ring buffer along with the event. If it is, it will trigger a
WARN_ON().
For strings that use "%s", it is not so easy. The string may not reside in
the ring buffer but may still be valid. Strings that are static and part
of the kernel proper which will not be freed for the life of the running
system, are safe to dereference. But to know if it is a pointer to a
static string or to something on the heap can not be determined until the
event is triggered.
This brings us to the second function that tests for the bad dereferencing
of strings, trace_check_vprintf(). It would walk through the printf format
looking for "%s", and when it finds it, it would validate that the pointer
is safe to read. If not, it would produces a WARN_ON() as well and write
into the ring buffer "[UNSAFE-MEMORY]".
The problem with this is how it used va_list to have vsnprintf() handle
all the cases that it didn't need to check. Instead of re-implementing
vsnprintf(), it would make a copy of the format up to the %s part, and
call vsnprintf() with the current va_list ap variable, where the ap would
then be ready to point at the string in question.
For architectures that passed va_list by reference this was possible. For
architectures that passed it by copy it was not. A test_can_verify()
function was used to differentiate between the two, and if it wasn't
possible, it would disable it.
Even for architectures where this was feasible, it was a stretch to rely
on such a method that is undocumented, and could cause issues later on
with new optimizations of the compiler.
Instead, the first function test_event_printk() was updated to look at
"%s" as well. If the "%s" argument is a pointer outside the event in the
ring buffer, it would find the field type of the event that is the problem
and mark the structure with a new flag called "needs_test". The event
itself will be marked by TRACE_EVENT_FL_TEST_STR to let it be known that
this event has a field that needs to be verified before the event can be
printed using the printf format.
When the event fields are created from the field type structure, the
fields would copy the field type's "needs_test" value.
Finally, before being printed, a new function ignore_event() is called
which will check if the event has the TEST_STR flag set (if not, it
returns false). If the flag is set, it then iterates through the events
fields looking for the ones that have the "needs_test" flag set.
Then it uses the offset field from the field structure to find the pointer
in the ring buffer event. It runs the tests to make sure that pointer is
safe to print and if not, it triggers the WARN_ON() and also adds to the
trace output that the event in question has an unsafe memory access.
The ignore_event() makes the trace_check_vprintf() obsolete so it is
removed.
Link: https://lore.kernel.org/all/CAHk-=wh3uOnqnZPpR0PeLZZtyWbZLboZ7cHLCKRWsocvs9Y7hQ@mail.gmail.com/
Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/20241217024720.848621576@goodmis.org
Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2024-12-17 02:41:22 +00:00
|
|
|
if (ignore_event(iter))
|
|
|
|
return;
|
|
|
|
|
2020-10-15 14:55:07 +00:00
|
|
|
va_start(ap, fmt);
|
tracing: Check "%s" dereference via the field and not the TP_printk format
The TP_printk() portion of a trace event is executed at the time a event
is read from the trace. This can happen seconds, minutes, hours, days,
months, years possibly later since the event was recorded. If the print
format contains a dereference to a string via "%s", and that string was
allocated, there's a chance that string could be freed before it is read
by the trace file.
To protect against such bugs, there are two functions that verify the
event. The first one is test_event_printk(), which is called when the
event is created. It reads the TP_printk() format as well as its arguments
to make sure nothing may be dereferencing a pointer that was not copied
into the ring buffer along with the event. If it is, it will trigger a
WARN_ON().
For strings that use "%s", it is not so easy. The string may not reside in
the ring buffer but may still be valid. Strings that are static and part
of the kernel proper which will not be freed for the life of the running
system, are safe to dereference. But to know if it is a pointer to a
static string or to something on the heap can not be determined until the
event is triggered.
This brings us to the second function that tests for the bad dereferencing
of strings, trace_check_vprintf(). It would walk through the printf format
looking for "%s", and when it finds it, it would validate that the pointer
is safe to read. If not, it would produces a WARN_ON() as well and write
into the ring buffer "[UNSAFE-MEMORY]".
The problem with this is how it used va_list to have vsnprintf() handle
all the cases that it didn't need to check. Instead of re-implementing
vsnprintf(), it would make a copy of the format up to the %s part, and
call vsnprintf() with the current va_list ap variable, where the ap would
then be ready to point at the string in question.
For architectures that passed va_list by reference this was possible. For
architectures that passed it by copy it was not. A test_can_verify()
function was used to differentiate between the two, and if it wasn't
possible, it would disable it.
Even for architectures where this was feasible, it was a stretch to rely
on such a method that is undocumented, and could cause issues later on
with new optimizations of the compiler.
Instead, the first function test_event_printk() was updated to look at
"%s" as well. If the "%s" argument is a pointer outside the event in the
ring buffer, it would find the field type of the event that is the problem
and mark the structure with a new flag called "needs_test". The event
itself will be marked by TRACE_EVENT_FL_TEST_STR to let it be known that
this event has a field that needs to be verified before the event can be
printed using the printf format.
When the event fields are created from the field type structure, the
fields would copy the field type's "needs_test" value.
Finally, before being printed, a new function ignore_event() is called
which will check if the event has the TEST_STR flag set (if not, it
returns false). If the flag is set, it then iterates through the events
fields looking for the ones that have the "needs_test" flag set.
Then it uses the offset field from the field structure to find the pointer
in the ring buffer event. It runs the tests to make sure that pointer is
safe to print and if not, it triggers the WARN_ON() and also adds to the
trace output that the event in question has an unsafe memory access.
The ignore_event() makes the trace_check_vprintf() obsolete so it is
removed.
Link: https://lore.kernel.org/all/CAHk-=wh3uOnqnZPpR0PeLZZtyWbZLboZ7cHLCKRWsocvs9Y7hQ@mail.gmail.com/
Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/20241217024720.848621576@goodmis.org
Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2024-12-17 02:41:22 +00:00
|
|
|
trace_seq_vprintf(s, trace_event_format(iter, fmt), ap);
|
2020-10-15 14:55:07 +00:00
|
|
|
va_end(ap);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(trace_event_printf);
|
|
|
|
|
2022-12-05 10:21:52 +00:00
|
|
|
static __printf(3, 0)
|
|
|
|
int trace_output_raw(struct trace_iterator *iter, char *name,
|
|
|
|
char *fmt, va_list ap)
|
2012-08-09 23:16:14 +00:00
|
|
|
{
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(s, "%s: ", name);
|
2020-10-15 14:55:07 +00:00
|
|
|
trace_seq_vprintf(s, trace_event_format(iter, fmt), ap);
|
2012-08-09 23:16:14 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2012-08-09 23:16:14 +00:00
|
|
|
}
|
|
|
|
|
2015-05-05 18:18:11 +00:00
|
|
|
int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...)
|
2012-08-09 23:16:14 +00:00
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
va_start(ap, fmt);
|
2015-05-05 18:18:11 +00:00
|
|
|
ret = trace_output_raw(iter, name, fmt, ap);
|
2012-08-09 23:16:14 +00:00
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2015-05-05 18:18:11 +00:00
|
|
|
EXPORT_SYMBOL_GPL(trace_output_call);
|
2012-08-09 23:16:14 +00:00
|
|
|
|
2021-09-14 14:42:40 +00:00
|
|
|
static inline const char *kretprobed(const char *name, unsigned long addr)
|
2008-12-24 04:24:12 +00:00
|
|
|
{
|
2021-09-14 14:42:40 +00:00
|
|
|
if (is_kretprobe_trampoline(addr))
|
2008-12-24 04:24:12 +00:00
|
|
|
return "[unknown/kretprobe'd]";
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2020-11-06 02:32:46 +00:00
|
|
|
void
|
|
|
|
trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
|
2008-12-24 04:24:12 +00:00
|
|
|
{
|
2017-06-22 21:04:55 +00:00
|
|
|
#ifdef CONFIG_KALLSYMS
|
2018-10-29 22:35:42 +00:00
|
|
|
char str[KSYM_SYMBOL_LEN];
|
2008-12-24 04:24:12 +00:00
|
|
|
const char *name;
|
|
|
|
|
2018-10-29 22:35:40 +00:00
|
|
|
if (offset)
|
|
|
|
sprint_symbol(str, address);
|
|
|
|
else
|
|
|
|
kallsyms_lookup(address, NULL, NULL, NULL, str);
|
2021-09-14 14:42:40 +00:00
|
|
|
name = kretprobed(str, address);
|
2008-12-24 04:24:12 +00:00
|
|
|
|
2017-06-22 21:04:55 +00:00
|
|
|
if (name && strlen(name)) {
|
2018-10-29 22:35:42 +00:00
|
|
|
trace_seq_puts(s, name);
|
2017-06-22 21:04:55 +00:00
|
|
|
return;
|
|
|
|
}
|
2008-12-24 04:24:12 +00:00
|
|
|
#endif
|
2018-10-29 22:35:42 +00:00
|
|
|
trace_seq_printf(s, "0x%08lx", address);
|
2008-12-24 04:24:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef CONFIG_64BIT
|
|
|
|
# define IP_FMT "%08lx"
|
|
|
|
#else
|
|
|
|
# define IP_FMT "%016lx"
|
|
|
|
#endif
|
|
|
|
|
2015-09-28 14:16:12 +00:00
|
|
|
static int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
|
|
|
|
unsigned long ip, unsigned long sym_flags)
|
2008-12-24 04:24:12 +00:00
|
|
|
{
|
|
|
|
struct file *file = NULL;
|
|
|
|
unsigned long vmstart = 0;
|
|
|
|
int ret = 1;
|
|
|
|
|
2009-11-25 15:10:14 +00:00
|
|
|
if (s->full)
|
|
|
|
return 0;
|
|
|
|
|
2008-12-24 04:24:12 +00:00
|
|
|
if (mm) {
|
|
|
|
const struct vm_area_struct *vma;
|
|
|
|
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_read_lock(mm);
|
2008-12-24 04:24:12 +00:00
|
|
|
vma = find_vma(mm, ip);
|
|
|
|
if (vma) {
|
|
|
|
file = vma->vm_file;
|
|
|
|
vmstart = vma->vm_start;
|
|
|
|
}
|
|
|
|
if (file) {
|
2023-10-09 15:37:11 +00:00
|
|
|
ret = trace_seq_path(s, file_user_path(file));
|
2008-12-24 04:24:12 +00:00
|
|
|
if (ret)
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(s, "[+0x%lx]",
|
|
|
|
ip - vmstart);
|
2008-12-24 04:24:12 +00:00
|
|
|
}
|
2020-06-09 04:33:25 +00:00
|
|
|
mmap_read_unlock(mm);
|
2008-12-24 04:24:12 +00:00
|
|
|
}
|
|
|
|
if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(s, " <" IP_FMT ">", ip);
|
|
|
|
return !trace_seq_has_overflowed(s);
|
2008-12-24 04:24:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
|
|
|
|
{
|
2014-11-12 15:29:54 +00:00
|
|
|
if (!ip) {
|
|
|
|
trace_seq_putc(s, '0');
|
|
|
|
goto out;
|
|
|
|
}
|
2008-12-24 04:24:12 +00:00
|
|
|
|
2020-11-06 02:32:46 +00:00
|
|
|
trace_seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
|
2008-12-24 04:24:12 +00:00
|
|
|
|
|
|
|
if (sym_flags & TRACE_ITER_SYM_ADDR)
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(s, " <" IP_FMT ">", ip);
|
|
|
|
|
|
|
|
out:
|
|
|
|
return !trace_seq_has_overflowed(s);
|
2008-12-24 04:24:12 +00:00
|
|
|
}
|
|
|
|
|
2009-09-11 18:24:13 +00:00
|
|
|
/**
|
|
|
|
* trace_print_lat_fmt - print the irq, preempt and lockdep fields
|
|
|
|
* @s: trace seq struct to write to
|
|
|
|
* @entry: The trace entry field from the ring buffer
|
|
|
|
*
|
|
|
|
* Prints the generic fields of irqs off, in hard or softirq, preempt
|
2011-03-09 15:41:56 +00:00
|
|
|
* count.
|
2009-09-11 18:24:13 +00:00
|
|
|
*/
|
|
|
|
int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
|
2009-02-02 22:29:21 +00:00
|
|
|
{
|
2010-12-04 00:13:26 +00:00
|
|
|
char hardsoft_irq;
|
|
|
|
char need_resched;
|
|
|
|
char irqs_off;
|
|
|
|
int hardirq;
|
|
|
|
int softirq;
|
2021-12-13 10:08:53 +00:00
|
|
|
int bh_off;
|
2016-03-18 15:28:04 +00:00
|
|
|
int nmi;
|
2009-02-02 22:29:21 +00:00
|
|
|
|
2016-03-18 15:28:04 +00:00
|
|
|
nmi = entry->flags & TRACE_FLAG_NMI;
|
2009-02-02 22:29:21 +00:00
|
|
|
hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
|
|
|
|
softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
|
2021-12-13 10:08:53 +00:00
|
|
|
bh_off = entry->flags & TRACE_FLAG_BH_OFF;
|
2009-02-03 22:20:41 +00:00
|
|
|
|
2010-12-04 00:13:26 +00:00
|
|
|
irqs_off =
|
2021-12-13 10:08:53 +00:00
|
|
|
(entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
|
2010-12-04 00:13:26 +00:00
|
|
|
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
|
2021-12-13 10:08:53 +00:00
|
|
|
bh_off ? 'b' :
|
2010-12-04 00:13:26 +00:00
|
|
|
'.';
|
2013-10-04 15:28:26 +00:00
|
|
|
|
2024-11-22 20:28:49 +00:00
|
|
|
switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY |
|
2013-10-04 15:28:26 +00:00
|
|
|
TRACE_FLAG_PREEMPT_RESCHED)) {
|
2024-11-22 20:28:49 +00:00
|
|
|
case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED:
|
|
|
|
need_resched = 'B';
|
|
|
|
break;
|
2013-10-04 15:28:26 +00:00
|
|
|
case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
|
|
|
|
need_resched = 'N';
|
|
|
|
break;
|
2024-11-22 20:28:49 +00:00
|
|
|
case TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED:
|
|
|
|
need_resched = 'L';
|
|
|
|
break;
|
|
|
|
case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY:
|
|
|
|
need_resched = 'b';
|
|
|
|
break;
|
2013-10-04 15:28:26 +00:00
|
|
|
case TRACE_FLAG_NEED_RESCHED:
|
|
|
|
need_resched = 'n';
|
|
|
|
break;
|
|
|
|
case TRACE_FLAG_PREEMPT_RESCHED:
|
|
|
|
need_resched = 'p';
|
|
|
|
break;
|
2024-11-22 20:28:49 +00:00
|
|
|
case TRACE_FLAG_NEED_RESCHED_LAZY:
|
|
|
|
need_resched = 'l';
|
|
|
|
break;
|
2013-10-04 15:28:26 +00:00
|
|
|
default:
|
|
|
|
need_resched = '.';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2010-12-04 00:13:26 +00:00
|
|
|
hardsoft_irq =
|
2016-03-18 15:28:04 +00:00
|
|
|
(nmi && hardirq) ? 'Z' :
|
|
|
|
nmi ? 'z' :
|
2010-12-04 00:13:26 +00:00
|
|
|
(hardirq && softirq) ? 'H' :
|
2016-03-18 15:28:04 +00:00
|
|
|
hardirq ? 'h' :
|
|
|
|
softirq ? 's' :
|
|
|
|
'.' ;
|
2010-12-04 00:13:26 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(s, "%c%c%c",
|
|
|
|
irqs_off, need_resched, hardsoft_irq);
|
2009-02-02 22:29:21 +00:00
|
|
|
|
2021-08-10 13:26:25 +00:00
|
|
|
if (entry->preempt_count & 0xf)
|
|
|
|
trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
|
|
|
|
else
|
|
|
|
trace_seq_putc(s, '.');
|
|
|
|
|
|
|
|
if (entry->preempt_count & 0xf0)
|
|
|
|
trace_seq_printf(s, "%x", entry->preempt_count >> 4);
|
2009-09-11 17:55:35 +00:00
|
|
|
else
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_putc(s, '.');
|
2009-09-27 11:02:07 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return !trace_seq_has_overflowed(s);
|
2009-02-02 22:29:21 +00:00
|
|
|
}
|
|
|
|
|
2009-09-11 18:24:13 +00:00
|
|
|
static int
|
|
|
|
lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
|
|
|
|
{
|
|
|
|
char comm[TASK_COMM_LEN];
|
|
|
|
|
|
|
|
trace_find_cmdline(entry->pid, comm);
|
|
|
|
|
2020-09-04 08:23:31 +00:00
|
|
|
trace_seq_printf(s, "%8.8s-%-7d %3d",
|
2014-11-12 15:29:54 +00:00
|
|
|
comm, entry->pid, cpu);
|
2009-09-11 18:24:13 +00:00
|
|
|
|
|
|
|
return trace_print_lat_fmt(s, entry);
|
|
|
|
}
|
|
|
|
|
2014-11-24 00:34:19 +00:00
|
|
|
#undef MARK
|
|
|
|
#define MARK(v, s) {.val = v, .sym = s}
|
|
|
|
/* trace overhead mark */
|
|
|
|
static const struct trace_mark {
|
|
|
|
unsigned long long val; /* unit: nsec */
|
|
|
|
char sym;
|
|
|
|
} mark[] = {
|
|
|
|
MARK(1000000000ULL , '$'), /* 1 sec */
|
2015-07-11 14:51:40 +00:00
|
|
|
MARK(100000000ULL , '@'), /* 100 msec */
|
|
|
|
MARK(10000000ULL , '*'), /* 10 msec */
|
2014-11-24 00:34:19 +00:00
|
|
|
MARK(1000000ULL , '#'), /* 1000 usecs */
|
|
|
|
MARK(100000ULL , '!'), /* 100 usecs */
|
|
|
|
MARK(10000ULL , '+'), /* 10 usecs */
|
|
|
|
};
|
|
|
|
#undef MARK
|
|
|
|
|
|
|
|
char trace_find_mark(unsigned long long d)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int size = ARRAY_SIZE(mark);
|
|
|
|
|
|
|
|
for (i = 0; i < size; i++) {
|
2015-07-11 14:51:40 +00:00
|
|
|
if (d > mark[i].val)
|
2014-11-24 00:34:19 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (i == size) ? ' ' : mark[i].sym;
|
|
|
|
}
|
2009-02-02 22:29:21 +00:00
|
|
|
|
2009-02-03 22:20:41 +00:00
|
|
|
static int
|
2012-11-13 20:18:22 +00:00
|
|
|
lat_print_timestamp(struct trace_iterator *iter, u64 next_ts)
|
2009-02-02 22:29:21 +00:00
|
|
|
{
|
2015-09-30 13:42:05 +00:00
|
|
|
struct trace_array *tr = iter->tr;
|
|
|
|
unsigned long verbose = tr->trace_flags & TRACE_ITER_VERBOSE;
|
2012-11-13 20:18:22 +00:00
|
|
|
unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
|
2020-01-09 23:53:48 +00:00
|
|
|
unsigned long long abs_ts = iter->ts - iter->array_buffer->time_start;
|
2012-11-13 20:18:22 +00:00
|
|
|
unsigned long long rel_ts = next_ts - iter->ts;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
|
|
|
|
if (in_ns) {
|
|
|
|
abs_ts = ns2usecs(abs_ts);
|
|
|
|
rel_ts = ns2usecs(rel_ts);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (verbose && in_ns) {
|
|
|
|
unsigned long abs_usec = do_div(abs_ts, USEC_PER_MSEC);
|
|
|
|
unsigned long abs_msec = (unsigned long)abs_ts;
|
|
|
|
unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC);
|
|
|
|
unsigned long rel_msec = (unsigned long)rel_ts;
|
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(
|
|
|
|
s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
|
|
|
|
ns2usecs(iter->ts),
|
|
|
|
abs_msec, abs_usec,
|
|
|
|
rel_msec, rel_usec);
|
|
|
|
|
2012-11-13 20:18:22 +00:00
|
|
|
} else if (verbose && !in_ns) {
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(
|
|
|
|
s, "[%016llx] %lld (+%lld): ",
|
|
|
|
iter->ts, abs_ts, rel_ts);
|
|
|
|
|
2012-11-13 20:18:22 +00:00
|
|
|
} else if (!verbose && in_ns) {
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(
|
|
|
|
s, " %4lldus%c: ",
|
|
|
|
abs_ts,
|
2014-11-24 00:34:19 +00:00
|
|
|
trace_find_mark(rel_ts * NSEC_PER_USEC));
|
2014-11-12 15:29:54 +00:00
|
|
|
|
2012-11-13 20:18:22 +00:00
|
|
|
} else { /* !verbose && !in_ns */
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(s, " %4lld: ", abs_ts);
|
2012-11-13 20:18:22 +00:00
|
|
|
}
|
2014-11-12 15:29:54 +00:00
|
|
|
|
|
|
|
return !trace_seq_has_overflowed(s);
|
2009-02-02 22:29:21 +00:00
|
|
|
}
|
|
|
|
|
2021-04-15 18:18:49 +00:00
|
|
|
static void trace_print_time(struct trace_seq *s, struct trace_iterator *iter,
|
|
|
|
unsigned long long ts)
|
|
|
|
{
|
|
|
|
unsigned long secs, usec_rem;
|
|
|
|
unsigned long long t;
|
|
|
|
|
|
|
|
if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
|
|
|
|
t = ns2usecs(ts);
|
|
|
|
usec_rem = do_div(t, USEC_PER_SEC);
|
|
|
|
secs = (unsigned long)t;
|
|
|
|
trace_seq_printf(s, " %5lu.%06lu", secs, usec_rem);
|
|
|
|
} else
|
|
|
|
trace_seq_printf(s, " %12llu", ts);
|
|
|
|
}
|
|
|
|
|
2009-02-02 22:29:21 +00:00
|
|
|
int trace_print_context(struct trace_iterator *iter)
|
|
|
|
{
|
2015-09-30 13:42:05 +00:00
|
|
|
struct trace_array *tr = iter->tr;
|
2009-02-02 22:29:21 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct trace_entry *entry = iter->ent;
|
2009-03-16 23:20:15 +00:00
|
|
|
char comm[TASK_COMM_LEN];
|
|
|
|
|
|
|
|
trace_find_cmdline(entry->pid, comm);
|
2009-02-02 22:29:21 +00:00
|
|
|
|
2020-09-04 08:23:31 +00:00
|
|
|
trace_seq_printf(s, "%16s-%-7d ", comm, entry->pid);
|
tracing: Add irq, preempt-count and need resched info to default trace output
People keep asking how to get the preempt count, irq, and need resched info
and we keep telling them to enable the latency format. Some developers think
that traces without this info is completely useless, and for a lot of tasks
it is useless.
The first option was to enable the latency trace as the default format, but
the header for the latency format is pretty useless for most tracers and
it also does the timestamp in straight microseconds from the time the trace
started. This is sometimes more difficult to read as the default trace is
seconds from the start of boot up.
Latency format:
# tracer: nop
#
# nop latency trace v1.1.5 on 3.2.0-rc1-test+
# --------------------------------------------------------------------
# latency: 0 us, #159771/64234230, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
# -----------------
# | task: -0 (uid:0 nice:0 policy:0 rt_prio:0)
# -----------------
#
# _------=> CPU#
# / _-----=> irqs-off
# | / _----=> need-resched
# || / _---=> hardirq/softirq
# ||| / _--=> preempt-depth
# |||| / delay
# cmd pid ||||| time | caller
# \ / ||||| \ | /
migratio-6 0...2 41778231us+: rcu_note_context_switch <-__schedule
migratio-6 0...2 41778233us : trace_rcu_utilization <-rcu_note_context_switch
migratio-6 0...2 41778235us+: rcu_sched_qs <-rcu_note_context_switch
migratio-6 0d..2 41778236us+: rcu_preempt_qs <-rcu_note_context_switch
migratio-6 0...2 41778238us : trace_rcu_utilization <-rcu_note_context_switch
migratio-6 0...2 41778239us+: debug_lockdep_rcu_enabled <-__schedule
default format:
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
migration/0-6 [000] 50.025810: rcu_note_context_switch <-__schedule
migration/0-6 [000] 50.025812: trace_rcu_utilization <-rcu_note_context_switch
migration/0-6 [000] 50.025813: rcu_sched_qs <-rcu_note_context_switch
migration/0-6 [000] 50.025815: rcu_preempt_qs <-rcu_note_context_switch
migration/0-6 [000] 50.025817: trace_rcu_utilization <-rcu_note_context_switch
migration/0-6 [000] 50.025818: debug_lockdep_rcu_enabled <-__schedule
migration/0-6 [000] 50.025820: debug_lockdep_rcu_enabled <-__schedule
The latency format header has latency information that is pretty meaningless
for most tracers. Although some of the header is useful, and we can add that
later to the default format as well.
What is really useful with the latency format is the irqs-off, need-resched
hard/softirq context and the preempt count.
This commit adds the option irq-info which is on by default that adds this
information:
# tracer: nop
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
<idle>-0 [000] d..2 49.309305: cpuidle_get_driver <-cpuidle_idle_call
<idle>-0 [000] d..2 49.309307: mwait_idle <-cpu_idle
<idle>-0 [000] d..2 49.309309: need_resched <-mwait_idle
<idle>-0 [000] d..2 49.309310: test_ti_thread_flag <-need_resched
<idle>-0 [000] d..2 49.309312: trace_power_start.constprop.13 <-mwait_idle
<idle>-0 [000] d..2 49.309313: trace_cpu_idle <-mwait_idle
<idle>-0 [000] d..2 49.309315: need_resched <-mwait_idle
If a user wants the old format, they can disable the 'irq-info' option:
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
<idle>-0 [000] 49.309305: cpuidle_get_driver <-cpuidle_idle_call
<idle>-0 [000] 49.309307: mwait_idle <-cpu_idle
<idle>-0 [000] 49.309309: need_resched <-mwait_idle
<idle>-0 [000] 49.309310: test_ti_thread_flag <-need_resched
<idle>-0 [000] 49.309312: trace_power_start.constprop.13 <-mwait_idle
<idle>-0 [000] 49.309313: trace_cpu_idle <-mwait_idle
<idle>-0 [000] 49.309315: need_resched <-mwait_idle
Requested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2011-11-17 14:34:33 +00:00
|
|
|
|
2017-06-26 05:38:43 +00:00
|
|
|
if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
|
|
|
|
unsigned int tgid = trace_find_tgid(entry->pid);
|
|
|
|
|
|
|
|
if (!tgid)
|
2020-09-04 08:23:31 +00:00
|
|
|
trace_seq_printf(s, "(-------) ");
|
2017-06-26 05:38:43 +00:00
|
|
|
else
|
2020-09-04 08:23:31 +00:00
|
|
|
trace_seq_printf(s, "(%7d) ", tgid);
|
2017-06-26 05:38:43 +00:00
|
|
|
}
|
|
|
|
|
2018-06-26 00:08:22 +00:00
|
|
|
trace_seq_printf(s, "[%03d] ", iter->cpu);
|
|
|
|
|
2015-09-30 13:42:05 +00:00
|
|
|
if (tr->trace_flags & TRACE_ITER_IRQ_INFO)
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_print_lat_fmt(s, entry);
|
tracing: Add irq, preempt-count and need resched info to default trace output
People keep asking how to get the preempt count, irq, and need resched info
and we keep telling them to enable the latency format. Some developers think
that traces without this info is completely useless, and for a lot of tasks
it is useless.
The first option was to enable the latency trace as the default format, but
the header for the latency format is pretty useless for most tracers and
it also does the timestamp in straight microseconds from the time the trace
started. This is sometimes more difficult to read as the default trace is
seconds from the start of boot up.
Latency format:
# tracer: nop
#
# nop latency trace v1.1.5 on 3.2.0-rc1-test+
# --------------------------------------------------------------------
# latency: 0 us, #159771/64234230, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
# -----------------
# | task: -0 (uid:0 nice:0 policy:0 rt_prio:0)
# -----------------
#
# _------=> CPU#
# / _-----=> irqs-off
# | / _----=> need-resched
# || / _---=> hardirq/softirq
# ||| / _--=> preempt-depth
# |||| / delay
# cmd pid ||||| time | caller
# \ / ||||| \ | /
migratio-6 0...2 41778231us+: rcu_note_context_switch <-__schedule
migratio-6 0...2 41778233us : trace_rcu_utilization <-rcu_note_context_switch
migratio-6 0...2 41778235us+: rcu_sched_qs <-rcu_note_context_switch
migratio-6 0d..2 41778236us+: rcu_preempt_qs <-rcu_note_context_switch
migratio-6 0...2 41778238us : trace_rcu_utilization <-rcu_note_context_switch
migratio-6 0...2 41778239us+: debug_lockdep_rcu_enabled <-__schedule
default format:
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
migration/0-6 [000] 50.025810: rcu_note_context_switch <-__schedule
migration/0-6 [000] 50.025812: trace_rcu_utilization <-rcu_note_context_switch
migration/0-6 [000] 50.025813: rcu_sched_qs <-rcu_note_context_switch
migration/0-6 [000] 50.025815: rcu_preempt_qs <-rcu_note_context_switch
migration/0-6 [000] 50.025817: trace_rcu_utilization <-rcu_note_context_switch
migration/0-6 [000] 50.025818: debug_lockdep_rcu_enabled <-__schedule
migration/0-6 [000] 50.025820: debug_lockdep_rcu_enabled <-__schedule
The latency format header has latency information that is pretty meaningless
for most tracers. Although some of the header is useful, and we can add that
later to the default format as well.
What is really useful with the latency format is the irqs-off, need-resched
hard/softirq context and the preempt count.
This commit adds the option irq-info which is on by default that adds this
information:
# tracer: nop
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
<idle>-0 [000] d..2 49.309305: cpuidle_get_driver <-cpuidle_idle_call
<idle>-0 [000] d..2 49.309307: mwait_idle <-cpu_idle
<idle>-0 [000] d..2 49.309309: need_resched <-mwait_idle
<idle>-0 [000] d..2 49.309310: test_ti_thread_flag <-need_resched
<idle>-0 [000] d..2 49.309312: trace_power_start.constprop.13 <-mwait_idle
<idle>-0 [000] d..2 49.309313: trace_cpu_idle <-mwait_idle
<idle>-0 [000] d..2 49.309315: need_resched <-mwait_idle
If a user wants the old format, they can disable the 'irq-info' option:
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
<idle>-0 [000] 49.309305: cpuidle_get_driver <-cpuidle_idle_call
<idle>-0 [000] 49.309307: mwait_idle <-cpu_idle
<idle>-0 [000] 49.309309: need_resched <-mwait_idle
<idle>-0 [000] 49.309310: test_ti_thread_flag <-need_resched
<idle>-0 [000] 49.309312: trace_power_start.constprop.13 <-mwait_idle
<idle>-0 [000] 49.309313: trace_cpu_idle <-mwait_idle
<idle>-0 [000] 49.309315: need_resched <-mwait_idle
Requested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2011-11-17 14:34:33 +00:00
|
|
|
|
2021-04-15 18:18:49 +00:00
|
|
|
trace_print_time(s, iter, iter->ts);
|
|
|
|
trace_seq_puts(s, ": ");
|
2014-11-12 15:29:54 +00:00
|
|
|
|
|
|
|
return !trace_seq_has_overflowed(s);
|
2009-02-02 22:29:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int trace_print_lat_context(struct trace_iterator *iter)
|
|
|
|
{
|
2020-03-17 21:32:23 +00:00
|
|
|
struct trace_entry *entry, *next_entry;
|
2015-09-30 13:42:05 +00:00
|
|
|
struct trace_array *tr = iter->tr;
|
2009-02-02 22:29:21 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
2015-09-30 13:42:05 +00:00
|
|
|
unsigned long verbose = (tr->trace_flags & TRACE_ITER_VERBOSE);
|
2020-03-17 21:32:23 +00:00
|
|
|
u64 next_ts;
|
2009-02-02 22:29:21 +00:00
|
|
|
|
2020-03-17 21:32:23 +00:00
|
|
|
next_entry = trace_find_next_entry(iter, NULL, &next_ts);
|
2009-02-02 22:29:21 +00:00
|
|
|
if (!next_entry)
|
|
|
|
next_ts = iter->ts;
|
|
|
|
|
2020-03-17 21:32:23 +00:00
|
|
|
/* trace_find_next_entry() may change iter->ent */
|
|
|
|
entry = iter->ent;
|
|
|
|
|
2009-02-02 22:29:21 +00:00
|
|
|
if (verbose) {
|
2009-03-16 23:20:15 +00:00
|
|
|
char comm[TASK_COMM_LEN];
|
|
|
|
|
|
|
|
trace_find_cmdline(entry->pid, comm);
|
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(
|
2020-09-04 08:23:31 +00:00
|
|
|
s, "%16s %7d %3d %d %08x %08lx ",
|
2014-11-12 15:29:54 +00:00
|
|
|
comm, entry->pid, iter->cpu, entry->flags,
|
2021-08-10 13:26:25 +00:00
|
|
|
entry->preempt_count & 0xf, iter->idx);
|
2009-02-02 22:29:21 +00:00
|
|
|
} else {
|
2014-11-12 15:29:54 +00:00
|
|
|
lat_print_generic(s, entry, iter->cpu);
|
2009-02-02 22:29:21 +00:00
|
|
|
}
|
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
lat_print_timestamp(iter, next_ts);
|
2012-11-13 20:18:22 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return !trace_seq_has_overflowed(s);
|
2009-02-02 22:29:21 +00:00
|
|
|
}
|
|
|
|
|
2008-12-24 04:24:12 +00:00
|
|
|
/**
|
|
|
|
* ftrace_find_event - find a registered event
|
|
|
|
* @type: the type of event to look for
|
|
|
|
*
|
|
|
|
* Returns an event of type @type otherwise NULL
|
2009-05-18 11:35:34 +00:00
|
|
|
* Called with trace_event_read_lock() held.
|
2008-12-24 04:24:12 +00:00
|
|
|
*/
|
|
|
|
struct trace_event *ftrace_find_event(int type)
|
|
|
|
{
|
|
|
|
struct trace_event *event;
|
|
|
|
unsigned key;
|
|
|
|
|
|
|
|
key = type & (EVENT_HASHSIZE - 1);
|
|
|
|
|
hlist: drop the node parameter from iterators
I'm not sure why, but the hlist for each entry iterators were conceived
list_for_each_entry(pos, head, member)
The hlist ones were greedy and wanted an extra parameter:
hlist_for_each_entry(tpos, pos, head, member)
Why did they need an extra pos parameter? I'm not quite sure. Not only
they don't really need it, it also prevents the iterator from looking
exactly like the list iterator, which is unfortunate.
Besides the semantic patch, there was some manual work required:
- Fix up the actual hlist iterators in linux/list.h
- Fix up the declaration of other iterators based on the hlist ones.
- A very small amount of places were using the 'node' parameter, this
was modified to use 'obj->member' instead.
- Coccinelle didn't handle the hlist_for_each_entry_safe iterator
properly, so those had to be fixed up manually.
The semantic patch which is mostly the work of Peter Senna Tschudin is here:
@@
iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host;
type T;
expression a,c,d,e;
identifier b;
statement S;
@@
-T b;
<+... when != b
(
hlist_for_each_entry(a,
- b,
c, d) S
|
hlist_for_each_entry_continue(a,
- b,
c) S
|
hlist_for_each_entry_from(a,
- b,
c) S
|
hlist_for_each_entry_rcu(a,
- b,
c, d) S
|
hlist_for_each_entry_rcu_bh(a,
- b,
c, d) S
|
hlist_for_each_entry_continue_rcu_bh(a,
- b,
c) S
|
for_each_busy_worker(a, c,
- b,
d) S
|
ax25_uid_for_each(a,
- b,
c) S
|
ax25_for_each(a,
- b,
c) S
|
inet_bind_bucket_for_each(a,
- b,
c) S
|
sctp_for_each_hentry(a,
- b,
c) S
|
sk_for_each(a,
- b,
c) S
|
sk_for_each_rcu(a,
- b,
c) S
|
sk_for_each_from
-(a, b)
+(a)
S
+ sk_for_each_from(a) S
|
sk_for_each_safe(a,
- b,
c, d) S
|
sk_for_each_bound(a,
- b,
c) S
|
hlist_for_each_entry_safe(a,
- b,
c, d, e) S
|
hlist_for_each_entry_continue_rcu(a,
- b,
c) S
|
nr_neigh_for_each(a,
- b,
c) S
|
nr_neigh_for_each_safe(a,
- b,
c, d) S
|
nr_node_for_each(a,
- b,
c) S
|
nr_node_for_each_safe(a,
- b,
c, d) S
|
- for_each_gfn_sp(a, c, d, b) S
+ for_each_gfn_sp(a, c, d) S
|
- for_each_gfn_indirect_valid_sp(a, c, d, b) S
+ for_each_gfn_indirect_valid_sp(a, c, d) S
|
for_each_host(a,
- b,
c) S
|
for_each_host_safe(a,
- b,
c, d) S
|
for_each_mesh_entry(a,
- b,
c, d) S
)
...+>
[akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c]
[akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c]
[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: fix warnings]
[akpm@linux-foudnation.org: redo intrusive kvm changes]
Tested-by: Peter Senna Tschudin <peter.senna@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 01:06:00 +00:00
|
|
|
hlist_for_each_entry(event, &event_hash[key], node) {
|
2008-12-24 04:24:12 +00:00
|
|
|
if (event->type == type)
|
|
|
|
return event;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2022-11-10 02:03:19 +00:00
|
|
|
static DEFINE_IDA(trace_event_ida);
|
2009-04-24 16:20:52 +00:00
|
|
|
|
2022-11-10 02:03:19 +00:00
|
|
|
static void free_trace_event_type(int type)
|
2009-04-24 16:20:52 +00:00
|
|
|
{
|
2022-11-10 02:03:19 +00:00
|
|
|
if (type >= __TRACE_LAST_TYPE)
|
|
|
|
ida_free(&trace_event_ida, type);
|
|
|
|
}
|
2009-04-24 16:20:52 +00:00
|
|
|
|
2022-11-10 02:03:19 +00:00
|
|
|
static int alloc_trace_event_type(void)
|
|
|
|
{
|
|
|
|
int next;
|
2009-04-24 16:20:52 +00:00
|
|
|
|
2022-11-10 02:03:19 +00:00
|
|
|
/* Skip static defined type numbers */
|
|
|
|
next = ida_alloc_range(&trace_event_ida, __TRACE_LAST_TYPE,
|
|
|
|
TRACE_EVENT_TYPE_MAX, GFP_KERNEL);
|
|
|
|
if (next < 0)
|
2009-04-24 16:20:52 +00:00
|
|
|
return 0;
|
2020-07-03 02:06:09 +00:00
|
|
|
return next;
|
2009-04-24 16:20:52 +00:00
|
|
|
}
|
|
|
|
|
2009-05-18 11:35:34 +00:00
|
|
|
void trace_event_read_lock(void)
|
|
|
|
{
|
2013-03-11 07:14:03 +00:00
|
|
|
down_read(&trace_event_sem);
|
2009-05-18 11:35:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void trace_event_read_unlock(void)
|
|
|
|
{
|
2013-03-11 07:14:03 +00:00
|
|
|
up_read(&trace_event_sem);
|
2009-05-18 11:35:34 +00:00
|
|
|
}
|
|
|
|
|
2008-12-24 04:24:12 +00:00
|
|
|
/**
|
2015-05-05 13:39:12 +00:00
|
|
|
* register_trace_event - register output for an event type
|
2008-12-24 04:24:12 +00:00
|
|
|
* @event: the event type to register
|
|
|
|
*
|
|
|
|
* Event types are stored in a hash and this hash is used to
|
|
|
|
* find a way to print an event. If the @event->type is set
|
|
|
|
* then it will use that type, otherwise it will assign a
|
|
|
|
* type to use.
|
|
|
|
*
|
|
|
|
* If you assign your own type, please make sure it is added
|
|
|
|
* to the trace_type enum in trace.h, to avoid collisions
|
|
|
|
* with the dynamic types.
|
|
|
|
*
|
|
|
|
* Returns the event type number or zero on error.
|
|
|
|
*/
|
2015-05-05 13:39:12 +00:00
|
|
|
int register_trace_event(struct trace_event *event)
|
2008-12-24 04:24:12 +00:00
|
|
|
{
|
|
|
|
unsigned key;
|
|
|
|
int ret = 0;
|
|
|
|
|
2013-03-11 07:14:03 +00:00
|
|
|
down_write(&trace_event_sem);
|
2008-12-24 04:24:12 +00:00
|
|
|
|
2009-04-24 16:20:52 +00:00
|
|
|
if (WARN_ON(!event))
|
2009-03-19 19:26:14 +00:00
|
|
|
goto out;
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
if (WARN_ON(!event->funcs))
|
|
|
|
goto out;
|
|
|
|
|
2009-04-24 16:20:52 +00:00
|
|
|
if (!event->type) {
|
2022-11-10 02:03:19 +00:00
|
|
|
event->type = alloc_trace_event_type();
|
|
|
|
if (!event->type)
|
2009-04-24 16:20:52 +00:00
|
|
|
goto out;
|
2022-04-24 13:19:32 +00:00
|
|
|
} else if (WARN(event->type > __TRACE_LAST_TYPE,
|
|
|
|
"Need to add type to trace.h")) {
|
2008-12-24 04:24:12 +00:00
|
|
|
goto out;
|
2009-04-24 16:20:52 +00:00
|
|
|
} else {
|
|
|
|
/* Is this event already used */
|
|
|
|
if (ftrace_find_event(event->type))
|
|
|
|
goto out;
|
|
|
|
}
|
2008-12-24 04:24:12 +00:00
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
if (event->funcs->trace == NULL)
|
|
|
|
event->funcs->trace = trace_nop_print;
|
|
|
|
if (event->funcs->raw == NULL)
|
|
|
|
event->funcs->raw = trace_nop_print;
|
|
|
|
if (event->funcs->hex == NULL)
|
|
|
|
event->funcs->hex = trace_nop_print;
|
|
|
|
if (event->funcs->binary == NULL)
|
|
|
|
event->funcs->binary = trace_nop_print;
|
2009-02-04 22:16:39 +00:00
|
|
|
|
2008-12-24 04:24:12 +00:00
|
|
|
key = event->type & (EVENT_HASHSIZE - 1);
|
|
|
|
|
2009-05-18 11:35:34 +00:00
|
|
|
hlist_add_head(&event->node, &event_hash[key]);
|
2008-12-24 04:24:12 +00:00
|
|
|
|
|
|
|
ret = event->type;
|
|
|
|
out:
|
2013-03-11 07:14:03 +00:00
|
|
|
up_write(&trace_event_sem);
|
2008-12-24 04:24:12 +00:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2015-05-05 13:39:12 +00:00
|
|
|
EXPORT_SYMBOL_GPL(register_trace_event);
|
2008-12-24 04:24:12 +00:00
|
|
|
|
2009-06-09 21:29:07 +00:00
|
|
|
/*
|
2013-03-11 07:14:03 +00:00
|
|
|
* Used by module code with the trace_event_sem held for write.
|
2009-06-09 21:29:07 +00:00
|
|
|
*/
|
2015-05-05 13:39:12 +00:00
|
|
|
int __unregister_trace_event(struct trace_event *event)
|
2009-06-09 21:29:07 +00:00
|
|
|
{
|
|
|
|
hlist_del(&event->node);
|
2022-11-10 02:03:19 +00:00
|
|
|
free_trace_event_type(event->type);
|
2009-06-09 21:29:07 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-12-24 04:24:12 +00:00
|
|
|
/**
|
2015-05-05 13:39:12 +00:00
|
|
|
* unregister_trace_event - remove a no longer used event
|
2008-12-24 04:24:12 +00:00
|
|
|
* @event: the event to remove
|
|
|
|
*/
|
2015-05-05 13:39:12 +00:00
|
|
|
int unregister_trace_event(struct trace_event *event)
|
2008-12-24 04:24:12 +00:00
|
|
|
{
|
2013-03-11 07:14:03 +00:00
|
|
|
down_write(&trace_event_sem);
|
2015-05-05 13:39:12 +00:00
|
|
|
__unregister_trace_event(event);
|
2013-03-11 07:14:03 +00:00
|
|
|
up_write(&trace_event_sem);
|
2008-12-24 04:24:12 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2015-05-05 13:39:12 +00:00
|
|
|
EXPORT_SYMBOL_GPL(unregister_trace_event);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Standard events
|
|
|
|
*/
|
|
|
|
|
2023-03-28 18:51:56 +00:00
|
|
|
static void print_array(struct trace_iterator *iter, void *pos,
|
|
|
|
struct ftrace_event_field *field)
|
|
|
|
{
|
|
|
|
int offset;
|
|
|
|
int len;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
offset = *(int *)pos & 0xffff;
|
|
|
|
len = *(int *)pos >> 16;
|
|
|
|
|
|
|
|
if (field)
|
2023-04-19 21:41:40 +00:00
|
|
|
offset += field->offset + sizeof(int);
|
2023-03-28 18:51:56 +00:00
|
|
|
|
2023-04-19 21:41:40 +00:00
|
|
|
if (offset + len > iter->ent_size) {
|
2023-03-28 18:51:56 +00:00
|
|
|
trace_seq_puts(&iter->seq, "<OVERFLOW>");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-04-19 21:41:40 +00:00
|
|
|
pos = (void *)iter->ent + offset;
|
|
|
|
|
2023-03-28 18:51:56 +00:00
|
|
|
for (i = 0; i < len; i++, pos++) {
|
|
|
|
if (i)
|
|
|
|
trace_seq_putc(&iter->seq, ',');
|
|
|
|
trace_seq_printf(&iter->seq, "%02x", *(unsigned char *)pos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void print_fields(struct trace_iterator *iter, struct trace_event_call *call,
|
|
|
|
struct list_head *head)
|
|
|
|
{
|
|
|
|
struct ftrace_event_field *field;
|
|
|
|
int offset;
|
|
|
|
int len;
|
|
|
|
int ret;
|
|
|
|
void *pos;
|
|
|
|
|
2023-05-25 08:52:32 +00:00
|
|
|
list_for_each_entry_reverse(field, head, link) {
|
2023-03-28 18:51:56 +00:00
|
|
|
trace_seq_printf(&iter->seq, " %s=", field->name);
|
|
|
|
if (field->offset + field->size > iter->ent_size) {
|
|
|
|
trace_seq_puts(&iter->seq, "<OVERFLOW>");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
pos = (void *)iter->ent + field->offset;
|
|
|
|
|
|
|
|
switch (field->filter_type) {
|
|
|
|
case FILTER_COMM:
|
|
|
|
case FILTER_STATIC_STRING:
|
|
|
|
trace_seq_printf(&iter->seq, "%.*s", field->size, (char *)pos);
|
|
|
|
break;
|
|
|
|
case FILTER_RDYN_STRING:
|
|
|
|
case FILTER_DYN_STRING:
|
|
|
|
offset = *(int *)pos & 0xffff;
|
|
|
|
len = *(int *)pos >> 16;
|
|
|
|
|
|
|
|
if (field->filter_type == FILTER_RDYN_STRING)
|
2023-04-19 21:41:40 +00:00
|
|
|
offset += field->offset + sizeof(int);
|
2023-03-28 18:51:56 +00:00
|
|
|
|
2023-04-19 21:41:40 +00:00
|
|
|
if (offset + len > iter->ent_size) {
|
2023-03-28 18:51:56 +00:00
|
|
|
trace_seq_puts(&iter->seq, "<OVERFLOW>");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
pos = (void *)iter->ent + offset;
|
|
|
|
trace_seq_printf(&iter->seq, "%.*s", len, (char *)pos);
|
|
|
|
break;
|
|
|
|
case FILTER_PTR_STRING:
|
|
|
|
if (!iter->fmt_size)
|
|
|
|
trace_iter_expand_format(iter);
|
|
|
|
pos = *(void **)pos;
|
|
|
|
ret = strncpy_from_kernel_nofault(iter->fmt, pos,
|
|
|
|
iter->fmt_size);
|
|
|
|
if (ret < 0)
|
|
|
|
trace_seq_printf(&iter->seq, "(0x%px)", pos);
|
|
|
|
else
|
|
|
|
trace_seq_printf(&iter->seq, "(0x%px:%s)",
|
|
|
|
pos, iter->fmt);
|
|
|
|
break;
|
|
|
|
case FILTER_TRACE_FN:
|
|
|
|
pos = *(void **)pos;
|
|
|
|
trace_seq_printf(&iter->seq, "%pS", pos);
|
|
|
|
break;
|
|
|
|
case FILTER_CPU:
|
|
|
|
case FILTER_OTHER:
|
|
|
|
switch (field->size) {
|
|
|
|
case 1:
|
|
|
|
if (isprint(*(char *)pos)) {
|
|
|
|
trace_seq_printf(&iter->seq, "'%c'",
|
|
|
|
*(unsigned char *)pos);
|
|
|
|
}
|
|
|
|
trace_seq_printf(&iter->seq, "(%d)",
|
|
|
|
*(unsigned char *)pos);
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
trace_seq_printf(&iter->seq, "0x%x (%d)",
|
|
|
|
*(unsigned short *)pos,
|
|
|
|
*(unsigned short *)pos);
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
/* dynamic array info is 4 bytes */
|
|
|
|
if (strstr(field->type, "__data_loc")) {
|
|
|
|
print_array(iter, pos, NULL);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strstr(field->type, "__rel_loc")) {
|
|
|
|
print_array(iter, pos, field);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_seq_printf(&iter->seq, "0x%x (%d)",
|
|
|
|
*(unsigned int *)pos,
|
|
|
|
*(unsigned int *)pos);
|
|
|
|
break;
|
|
|
|
case 8:
|
|
|
|
trace_seq_printf(&iter->seq, "0x%llx (%lld)",
|
|
|
|
*(unsigned long long *)pos,
|
|
|
|
*(unsigned long long *)pos);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
trace_seq_puts(&iter->seq, "<INVALID-SIZE>");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
trace_seq_puts(&iter->seq, "<INVALID-TYPE>");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
trace_seq_putc(&iter->seq, '\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
enum print_line_t print_event_fields(struct trace_iterator *iter,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct trace_event_call *call;
|
|
|
|
struct list_head *head;
|
|
|
|
|
|
|
|
/* ftrace defined events have separate call structures */
|
|
|
|
if (event->type <= __TRACE_LAST_TYPE) {
|
|
|
|
bool found = false;
|
|
|
|
|
|
|
|
down_read(&trace_event_sem);
|
|
|
|
list_for_each_entry(call, &ftrace_events, list) {
|
|
|
|
if (call->event.type == event->type) {
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* No need to search all events */
|
|
|
|
if (call->event.type > __TRACE_LAST_TYPE)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
up_read(&trace_event_sem);
|
|
|
|
if (!found) {
|
|
|
|
trace_seq_printf(&iter->seq, "UNKNOWN TYPE %d\n", event->type);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
call = container_of(event, struct trace_event_call, event);
|
|
|
|
}
|
|
|
|
head = trace_get_fields(call);
|
|
|
|
|
|
|
|
trace_seq_printf(&iter->seq, "%s:", trace_event_name(call));
|
|
|
|
|
|
|
|
if (head && !list_empty(head))
|
|
|
|
print_fields(iter, call, head);
|
|
|
|
else
|
|
|
|
trace_seq_puts(&iter->seq, "No fields found\n");
|
|
|
|
|
|
|
|
out:
|
|
|
|
return trace_handle_return(&iter->seq);
|
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type);
|
2011-03-25 11:05:18 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(&iter->seq);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2021-04-15 20:34:26 +00:00
|
|
|
static void print_fn_trace(struct trace_seq *s, unsigned long ip,
|
2024-06-12 23:19:46 +00:00
|
|
|
unsigned long parent_ip, long delta, int flags)
|
2021-04-15 20:34:26 +00:00
|
|
|
{
|
2024-06-12 23:19:46 +00:00
|
|
|
ip += delta;
|
|
|
|
parent_ip += delta;
|
|
|
|
|
2021-04-15 20:34:26 +00:00
|
|
|
seq_print_ip_sym(s, ip, flags);
|
|
|
|
|
|
|
|
if ((flags & TRACE_ITER_PRINT_PARENT) && parent_ip) {
|
|
|
|
trace_seq_puts(s, " <-");
|
|
|
|
seq_print_ip_sym(s, parent_ip, flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-24 04:24:13 +00:00
|
|
|
/* TRACE_FN */
|
2010-04-22 22:46:14 +00:00
|
|
|
static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
|
|
|
struct ftrace_entry *field;
|
2009-02-02 22:30:12 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2024-06-12 23:19:46 +00:00
|
|
|
print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta, flags);
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_putc(s, '\n');
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
|
|
|
struct ftrace_entry *field;
|
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(&iter->seq, "%lx %lx\n",
|
|
|
|
field->ip,
|
|
|
|
field->parent_ip);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(&iter->seq);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
|
|
|
struct ftrace_entry *field;
|
2009-02-02 22:30:12 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
SEQ_PUT_HEX_FIELD(s, field->ip);
|
|
|
|
SEQ_PUT_HEX_FIELD(s, field->parent_ip);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
|
|
|
struct ftrace_entry *field;
|
2009-02-02 22:30:12 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
SEQ_PUT_FIELD(s, field->ip);
|
|
|
|
SEQ_PUT_FIELD(s, field->parent_ip);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event_functions trace_fn_funcs = {
|
2008-12-24 04:24:13 +00:00
|
|
|
.trace = trace_fn_trace,
|
|
|
|
.raw = trace_fn_raw,
|
|
|
|
.hex = trace_fn_hex,
|
|
|
|
.binary = trace_fn_bin,
|
|
|
|
};
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event trace_fn_event = {
|
|
|
|
.type = TRACE_FN,
|
|
|
|
.funcs = &trace_fn_funcs,
|
|
|
|
};
|
|
|
|
|
2008-12-24 04:24:13 +00:00
|
|
|
/* TRACE_CTX an TRACE_WAKE */
|
2009-02-04 00:05:50 +00:00
|
|
|
static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
|
|
|
|
char *delim)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
|
|
|
struct ctx_switch_entry *field;
|
2009-03-16 23:20:15 +00:00
|
|
|
char comm[TASK_COMM_LEN];
|
2008-12-24 04:24:13 +00:00
|
|
|
int S, T;
|
|
|
|
|
2009-03-16 23:20:15 +00:00
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2017-09-29 11:50:16 +00:00
|
|
|
T = task_index_to_char(field->next_state);
|
|
|
|
S = task_index_to_char(field->prev_state);
|
2009-03-16 23:20:15 +00:00
|
|
|
trace_find_cmdline(field->next_pid, comm);
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(&iter->seq,
|
2020-09-04 08:23:31 +00:00
|
|
|
" %7d:%3d:%c %s [%03d] %7d:%3d:%c %s\n",
|
2014-11-12 15:29:54 +00:00
|
|
|
field->prev_pid,
|
|
|
|
field->prev_prio,
|
|
|
|
S, delim,
|
|
|
|
field->next_cpu,
|
|
|
|
field->next_pid,
|
|
|
|
field->next_prio,
|
|
|
|
T, comm);
|
|
|
|
|
|
|
|
return trace_handle_return(&iter->seq);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
2009-02-02 22:30:12 +00:00
|
|
|
return trace_ctxwake_print(iter, "==>");
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2009-02-04 00:05:50 +00:00
|
|
|
static enum print_line_t trace_wake_print(struct trace_iterator *iter,
|
2010-04-22 22:46:14 +00:00
|
|
|
int flags, struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
2009-02-02 22:30:12 +00:00
|
|
|
return trace_ctxwake_print(iter, " +");
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
|
|
|
struct ctx_switch_entry *field;
|
|
|
|
int T;
|
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
|
|
|
if (!S)
|
2017-09-29 11:50:16 +00:00
|
|
|
S = task_index_to_char(field->prev_state);
|
|
|
|
T = task_index_to_char(field->next_state);
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
|
|
|
|
field->prev_pid,
|
|
|
|
field->prev_prio,
|
|
|
|
S,
|
|
|
|
field->next_cpu,
|
|
|
|
field->next_pid,
|
|
|
|
field->next_prio,
|
|
|
|
T);
|
|
|
|
|
|
|
|
return trace_handle_return(&iter->seq);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
2009-02-02 22:30:12 +00:00
|
|
|
return trace_ctxwake_raw(iter, 0);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
2009-02-02 22:30:12 +00:00
|
|
|
return trace_ctxwake_raw(iter, '+');
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
|
|
|
struct ctx_switch_entry *field;
|
2009-02-02 22:30:12 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
2008-12-24 04:24:13 +00:00
|
|
|
int T;
|
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
|
|
|
if (!S)
|
2017-09-29 11:50:16 +00:00
|
|
|
S = task_index_to_char(field->prev_state);
|
|
|
|
T = task_index_to_char(field->next_state);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
SEQ_PUT_HEX_FIELD(s, field->prev_pid);
|
|
|
|
SEQ_PUT_HEX_FIELD(s, field->prev_prio);
|
|
|
|
SEQ_PUT_HEX_FIELD(s, S);
|
|
|
|
SEQ_PUT_HEX_FIELD(s, field->next_cpu);
|
|
|
|
SEQ_PUT_HEX_FIELD(s, field->next_pid);
|
|
|
|
SEQ_PUT_HEX_FIELD(s, field->next_prio);
|
|
|
|
SEQ_PUT_HEX_FIELD(s, T);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
2009-02-02 22:30:12 +00:00
|
|
|
return trace_ctxwake_hex(iter, 0);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
2009-02-02 22:30:12 +00:00
|
|
|
return trace_ctxwake_hex(iter, '+');
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2009-02-04 00:05:50 +00:00
|
|
|
static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
|
2010-04-22 22:46:14 +00:00
|
|
|
int flags, struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
|
|
|
struct ctx_switch_entry *field;
|
2009-02-02 22:30:12 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
SEQ_PUT_FIELD(s, field->prev_pid);
|
|
|
|
SEQ_PUT_FIELD(s, field->prev_prio);
|
|
|
|
SEQ_PUT_FIELD(s, field->prev_state);
|
|
|
|
SEQ_PUT_FIELD(s, field->next_cpu);
|
|
|
|
SEQ_PUT_FIELD(s, field->next_pid);
|
|
|
|
SEQ_PUT_FIELD(s, field->next_prio);
|
|
|
|
SEQ_PUT_FIELD(s, field->next_state);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event_functions trace_ctx_funcs = {
|
2008-12-24 04:24:13 +00:00
|
|
|
.trace = trace_ctx_print,
|
|
|
|
.raw = trace_ctx_raw,
|
|
|
|
.hex = trace_ctx_hex,
|
|
|
|
.binary = trace_ctxwake_bin,
|
|
|
|
};
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event trace_ctx_event = {
|
|
|
|
.type = TRACE_CTX,
|
|
|
|
.funcs = &trace_ctx_funcs,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct trace_event_functions trace_wake_funcs = {
|
2008-12-24 04:24:13 +00:00
|
|
|
.trace = trace_wake_print,
|
|
|
|
.raw = trace_wake_raw,
|
|
|
|
.hex = trace_wake_hex,
|
|
|
|
.binary = trace_ctxwake_bin,
|
|
|
|
};
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event trace_wake_event = {
|
|
|
|
.type = TRACE_WAKE,
|
|
|
|
.funcs = &trace_wake_funcs,
|
|
|
|
};
|
|
|
|
|
2008-12-24 04:24:13 +00:00
|
|
|
/* TRACE_STACK */
|
|
|
|
|
2009-02-04 00:05:50 +00:00
|
|
|
static enum print_line_t trace_stack_print(struct trace_iterator *iter,
|
2010-04-22 22:46:14 +00:00
|
|
|
int flags, struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
|
|
|
struct stack_entry *field;
|
2009-02-02 22:30:12 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
2011-07-14 20:36:53 +00:00
|
|
|
unsigned long *p;
|
|
|
|
unsigned long *end;
|
2024-06-12 23:19:47 +00:00
|
|
|
long delta = iter->tr->text_delta;
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2011-07-14 20:36:53 +00:00
|
|
|
end = (unsigned long *)((long)iter->ent + iter->ent_size);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_puts(s, "<stack trace>\n");
|
2011-07-14 20:36:53 +00:00
|
|
|
|
2019-06-10 04:00:16 +00:00
|
|
|
for (p = field->caller; p && p < end && *p != ULONG_MAX; p++) {
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
if (trace_seq_has_overflowed(s))
|
|
|
|
break;
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_puts(s, " => ");
|
2024-10-21 07:14:53 +00:00
|
|
|
if ((*p) == FTRACE_TRAMPOLINE_MARKER) {
|
|
|
|
trace_seq_puts(s, "[FTRACE TRAMPOLINE]\n");
|
|
|
|
continue;
|
|
|
|
}
|
2024-06-12 23:19:47 +00:00
|
|
|
seq_print_ip_sym(s, (*p) + delta, flags);
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_putc(s, '\n');
|
|
|
|
}
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event_functions trace_stack_funcs = {
|
2008-12-24 04:24:13 +00:00
|
|
|
.trace = trace_stack_print,
|
|
|
|
};
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event trace_stack_event = {
|
|
|
|
.type = TRACE_STACK,
|
|
|
|
.funcs = &trace_stack_funcs,
|
|
|
|
};
|
|
|
|
|
2008-12-24 04:24:13 +00:00
|
|
|
/* TRACE_USER_STACK */
|
2009-02-04 00:05:50 +00:00
|
|
|
static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
|
2010-04-22 22:46:14 +00:00
|
|
|
int flags, struct trace_event *event)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
2015-09-30 13:42:05 +00:00
|
|
|
struct trace_array *tr = iter->tr;
|
2008-12-24 04:24:13 +00:00
|
|
|
struct userstack_entry *field;
|
2009-02-02 22:30:12 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
2015-09-28 14:11:44 +00:00
|
|
|
struct mm_struct *mm = NULL;
|
|
|
|
unsigned int i;
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2009-02-02 22:30:12 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_puts(s, "<user stack trace>\n");
|
2015-09-28 14:11:44 +00:00
|
|
|
|
2015-09-30 13:42:05 +00:00
|
|
|
if (tr->trace_flags & TRACE_ITER_SYM_USEROBJ) {
|
2015-09-28 14:11:44 +00:00
|
|
|
struct task_struct *task;
|
|
|
|
/*
|
|
|
|
* we do the lookup on the thread group leader,
|
|
|
|
* since individual threads might have already quit!
|
|
|
|
*/
|
|
|
|
rcu_read_lock();
|
|
|
|
task = find_task_by_vpid(field->tgid);
|
|
|
|
if (task)
|
|
|
|
mm = get_task_mm(task);
|
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
|
|
|
|
unsigned long ip = field->caller[i];
|
|
|
|
|
2019-06-30 08:54:38 +00:00
|
|
|
if (!ip || trace_seq_has_overflowed(s))
|
2015-09-28 14:11:44 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
trace_seq_puts(s, " => ");
|
|
|
|
seq_print_user_ip(s, mm, ip, flags);
|
|
|
|
trace_seq_putc(s, '\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mm)
|
|
|
|
mmput(mm);
|
2008-12-24 04:24:13 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event_functions trace_user_stack_funcs = {
|
2008-12-24 04:24:13 +00:00
|
|
|
.trace = trace_user_stack_print,
|
|
|
|
};
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event trace_user_stack_event = {
|
|
|
|
.type = TRACE_USER_STACK,
|
|
|
|
.funcs = &trace_user_stack_funcs,
|
|
|
|
};
|
|
|
|
|
2016-06-23 16:45:36 +00:00
|
|
|
/* TRACE_HWLAT */
|
|
|
|
static enum print_line_t
|
|
|
|
trace_hwlat_print(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct trace_entry *entry = iter->ent;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct hwlat_entry *field;
|
|
|
|
|
|
|
|
trace_assign_type(field, entry);
|
|
|
|
|
2020-02-12 17:21:03 +00:00
|
|
|
trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%lld.%09ld count:%d",
|
2016-06-23 16:45:36 +00:00
|
|
|
field->seqnum,
|
|
|
|
field->duration,
|
|
|
|
field->outer_duration,
|
2017-05-08 22:59:13 +00:00
|
|
|
(long long)field->timestamp.tv_sec,
|
2020-02-12 17:21:03 +00:00
|
|
|
field->timestamp.tv_nsec, field->count);
|
2016-06-23 16:45:36 +00:00
|
|
|
|
2016-08-04 16:49:53 +00:00
|
|
|
if (field->nmi_count) {
|
|
|
|
/*
|
|
|
|
* The generic sched_clock() is not NMI safe, thus
|
|
|
|
* we only record the count and not the time.
|
|
|
|
*/
|
|
|
|
if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK))
|
|
|
|
trace_seq_printf(s, " nmi-total:%llu",
|
|
|
|
field->nmi_total_ts);
|
|
|
|
trace_seq_printf(s, " nmi-count:%u",
|
|
|
|
field->nmi_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_seq_putc(s, '\n');
|
|
|
|
|
2016-06-23 16:45:36 +00:00
|
|
|
return trace_handle_return(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum print_line_t
|
|
|
|
trace_hwlat_raw(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct hwlat_entry *field;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
|
|
|
|
trace_assign_type(field, iter->ent);
|
|
|
|
|
2017-05-08 22:59:13 +00:00
|
|
|
trace_seq_printf(s, "%llu %lld %lld %09ld %u\n",
|
2016-06-23 16:45:36 +00:00
|
|
|
field->duration,
|
|
|
|
field->outer_duration,
|
2017-05-08 22:59:13 +00:00
|
|
|
(long long)field->timestamp.tv_sec,
|
2016-06-23 16:45:36 +00:00
|
|
|
field->timestamp.tv_nsec,
|
|
|
|
field->seqnum);
|
|
|
|
|
|
|
|
return trace_handle_return(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct trace_event_functions trace_hwlat_funcs = {
|
|
|
|
.trace = trace_hwlat_print,
|
|
|
|
.raw = trace_hwlat_raw,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct trace_event trace_hwlat_event = {
|
|
|
|
.type = TRACE_HWLAT,
|
|
|
|
.funcs = &trace_hwlat_funcs,
|
|
|
|
};
|
|
|
|
|
trace: Add osnoise tracer
In the context of high-performance computing (HPC), the Operating System
Noise (*osnoise*) refers to the interference experienced by an application
due to activities inside the operating system. In the context of Linux,
NMIs, IRQs, SoftIRQs, and any other system thread can cause noise to the
system. Moreover, hardware-related jobs can also cause noise, for example,
via SMIs.
The osnoise tracer leverages the hwlat_detector by running a similar
loop with preemption, SoftIRQs and IRQs enabled, thus allowing all
the sources of *osnoise* during its execution. Using the same approach
of hwlat, osnoise takes note of the entry and exit point of any
source of interferences, increasing a per-cpu interference counter. The
osnoise tracer also saves an interference counter for each source of
interference. The interference counter for NMI, IRQs, SoftIRQs, and
threads is increased anytime the tool observes these interferences' entry
events. When a noise happens without any interference from the operating
system level, the hardware noise counter increases, pointing to a
hardware-related noise. In this way, osnoise can account for any
source of interference. At the end of the period, the osnoise tracer
prints the sum of all noise, the max single noise, the percentage of CPU
available for the thread, and the counters for the noise sources.
Usage
Write the ASCII text "osnoise" into the current_tracer file of the
tracing system (generally mounted at /sys/kernel/tracing).
For example::
[root@f32 ~]# cd /sys/kernel/tracing/
[root@f32 tracing]# echo osnoise > current_tracer
It is possible to follow the trace by reading the trace trace file::
[root@f32 tracing]# cat trace
# tracer: osnoise
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth MAX
# || / SINGLE Interference counters:
# |||| RUNTIME NOISE % OF CPU NOISE +-----------------------------+
# TASK-PID CPU# |||| TIMESTAMP IN US IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD
# | | | |||| | | | | | | | | | |
<...>-859 [000] .... 81.637220: 1000000 190 99.98100 9 18 0 1007 18 1
<...>-860 [001] .... 81.638154: 1000000 656 99.93440 74 23 0 1006 16 3
<...>-861 [002] .... 81.638193: 1000000 5675 99.43250 202 6 0 1013 25 21
<...>-862 [003] .... 81.638242: 1000000 125 99.98750 45 1 0 1011 23 0
<...>-863 [004] .... 81.638260: 1000000 1721 99.82790 168 7 0 1002 49 41
<...>-864 [005] .... 81.638286: 1000000 263 99.97370 57 6 0 1006 26 2
<...>-865 [006] .... 81.638302: 1000000 109 99.98910 21 3 0 1006 18 1
<...>-866 [007] .... 81.638326: 1000000 7816 99.21840 107 8 0 1016 39 19
In addition to the regular trace fields (from TASK-PID to TIMESTAMP), the
tracer prints a message at the end of each period for each CPU that is
running an osnoise/CPU thread. The osnoise specific fields report:
- The RUNTIME IN USE reports the amount of time in microseconds that
the osnoise thread kept looping reading the time.
- The NOISE IN US reports the sum of noise in microseconds observed
by the osnoise tracer during the associated runtime.
- The % OF CPU AVAILABLE reports the percentage of CPU available for
the osnoise thread during the runtime window.
- The MAX SINGLE NOISE IN US reports the maximum single noise observed
during the runtime window.
- The Interference counters display how many each of the respective
interference happened during the runtime window.
Note that the example above shows a high number of HW noise samples.
The reason being is that this sample was taken on a virtual machine,
and the host interference is detected as a hardware interference.
Tracer options
The tracer has a set of options inside the osnoise directory, they are:
- osnoise/cpus: CPUs at which a osnoise thread will execute.
- osnoise/period_us: the period of the osnoise thread.
- osnoise/runtime_us: how long an osnoise thread will look for noise.
- osnoise/stop_tracing_us: stop the system tracing if a single noise
higher than the configured value happens. Writing 0 disables this
option.
- osnoise/stop_tracing_total_us: stop the system tracing if total noise
higher than the configured value happens. Writing 0 disables this
option.
- tracing_threshold: the minimum delta between two time() reads to be
considered as noise, in us. When set to 0, the default value will
be used, which is currently 5 us.
Additional Tracing
In addition to the tracer, a set of tracepoints were added to
facilitate the identification of the osnoise source.
- osnoise:sample_threshold: printed anytime a noise is higher than
the configurable tolerance_ns.
- osnoise:nmi_noise: noise from NMI, including the duration.
- osnoise:irq_noise: noise from an IRQ, including the duration.
- osnoise:softirq_noise: noise from a SoftIRQ, including the
duration.
- osnoise:thread_noise: noise from a thread, including the duration.
Note that all the values are *net values*. For example, if while osnoise
is running, another thread preempts the osnoise thread, it will start a
thread_noise duration at the start. Then, an IRQ takes place, preempting
the thread_noise, starting a irq_noise. When the IRQ ends its execution,
it will compute its duration, and this duration will be subtracted from
the thread_noise, in such a way as to avoid the double accounting of the
IRQ execution. This logic is valid for all sources of noise.
Here is one example of the usage of these tracepoints::
osnoise/8-961 [008] d.h. 5789.857532: irq_noise: local_timer:236 start 5789.857529929 duration 1845 ns
osnoise/8-961 [008] dNh. 5789.858408: irq_noise: local_timer:236 start 5789.858404871 duration 2848 ns
migration/8-54 [008] d... 5789.858413: thread_noise: migration/8:54 start 5789.858409300 duration 3068 ns
osnoise/8-961 [008] .... 5789.858413: sample_threshold: start 5789.858404555 duration 8723 ns interferences 2
In this example, a noise sample of 8 microseconds was reported in the last
line, pointing to two interferences. Looking backward in the trace, the
two previous entries were about the migration thread running after a
timer IRQ execution. The first event is not part of the noise because
it took place one millisecond before.
It is worth noticing that the sum of the duration reported in the
tracepoints is smaller than eight us reported in the sample_threshold.
The reason roots in the overhead of the entry and exit code that happens
before and after any interference execution. This justifies the dual
approach: measuring thread and tracing.
Link: https://lkml.kernel.org/r/e649467042d60e7b62714c9c6751a56299d15119.1624372313.git.bristot@redhat.com
Cc: Phil Auld <pauld@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kate Carcia <kcarcia@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexandre Chartre <alexandre.chartre@oracle.com>
Cc: Clark Willaims <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
[
Made the following functions static:
trace_irqentry_callback()
trace_irqexit_callback()
trace_intel_irqentry_callback()
trace_intel_irqexit_callback()
Added to include/trace.h:
osnoise_arch_register()
osnoise_arch_unregister()
Fixed define logic for LATENCY_FS_NOTIFY
Reported-by: kernel test robot <lkp@intel.com>
]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2021-06-22 14:42:27 +00:00
|
|
|
/* TRACE_OSNOISE */
|
|
|
|
static enum print_line_t
|
|
|
|
trace_osnoise_print(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct trace_entry *entry = iter->ent;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct osnoise_entry *field;
|
|
|
|
u64 ratio, ratio_dec;
|
|
|
|
u64 net_runtime;
|
|
|
|
|
|
|
|
trace_assign_type(field, entry);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* compute the available % of cpu time.
|
|
|
|
*/
|
|
|
|
net_runtime = field->runtime - field->noise;
|
|
|
|
ratio = net_runtime * 10000000;
|
|
|
|
do_div(ratio, field->runtime);
|
|
|
|
ratio_dec = do_div(ratio, 100000);
|
|
|
|
|
|
|
|
trace_seq_printf(s, "%llu %10llu %3llu.%05llu %7llu",
|
|
|
|
field->runtime,
|
|
|
|
field->noise,
|
|
|
|
ratio, ratio_dec,
|
|
|
|
field->max_sample);
|
|
|
|
|
|
|
|
trace_seq_printf(s, " %6u", field->hw_count);
|
|
|
|
trace_seq_printf(s, " %6u", field->nmi_count);
|
|
|
|
trace_seq_printf(s, " %6u", field->irq_count);
|
|
|
|
trace_seq_printf(s, " %6u", field->softirq_count);
|
|
|
|
trace_seq_printf(s, " %6u", field->thread_count);
|
|
|
|
|
|
|
|
trace_seq_putc(s, '\n');
|
|
|
|
|
|
|
|
return trace_handle_return(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum print_line_t
|
|
|
|
trace_osnoise_raw(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct osnoise_entry *field;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
|
|
|
|
trace_assign_type(field, iter->ent);
|
|
|
|
|
|
|
|
trace_seq_printf(s, "%lld %llu %llu %u %u %u %u %u\n",
|
|
|
|
field->runtime,
|
|
|
|
field->noise,
|
|
|
|
field->max_sample,
|
|
|
|
field->hw_count,
|
|
|
|
field->nmi_count,
|
|
|
|
field->irq_count,
|
|
|
|
field->softirq_count,
|
|
|
|
field->thread_count);
|
|
|
|
|
|
|
|
return trace_handle_return(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct trace_event_functions trace_osnoise_funcs = {
|
|
|
|
.trace = trace_osnoise_print,
|
|
|
|
.raw = trace_osnoise_raw,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct trace_event trace_osnoise_event = {
|
|
|
|
.type = TRACE_OSNOISE,
|
|
|
|
.funcs = &trace_osnoise_funcs,
|
|
|
|
};
|
|
|
|
|
2021-06-22 14:42:28 +00:00
|
|
|
/* TRACE_TIMERLAT */
|
tracing/timerlat: Add user-space interface
Going a step further, we propose a way to use any user-space
workload as the task waiting for the timerlat timer. This is done
via a per-CPU file named osnoise/cpu$id/timerlat_fd file.
The tracef_fd allows a task to open at a time. When a task reads
the file, the timerlat timer is armed for future osnoise/timerlat_period_us
time. When the timer fires, it prints the IRQ latency and
wakes up the user-space thread waiting in the timerlat_fd.
The thread then starts to run, executes the timerlat measurement, prints
the thread scheduling latency and returns to user-space.
When the thread rereads the timerlat_fd, the tracer will print the
user-ret(urn) latency, which is an additional metric.
This additional metric is also traced by the tracer and can be used, for
example of measuring the context switch overhead from kernel-to-user and
user-to-kernel, or the response time for an arbitrary execution in
user-space.
The tracer supports one thread per CPU, the thread must be pinned to
the CPU, and it cannot migrate while holding the timerlat_fd. The reason
is that the tracer is per CPU (nothing prohibits the tracer from
allowing migrations in the future). The tracer monitors the migration
of the thread and disables the tracer if detected.
The timerlat_fd is only available for opening/reading when timerlat
tracer is enabled, and NO_OSNOISE_WORKLOAD is set.
The simplest way to activate this feature from user-space is:
-------------------------------- %< -----------------------------------
int main(void)
{
char buffer[1024];
int timerlat_fd;
int retval;
long cpu = 0; /* place in CPU 0 */
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(cpu, &set);
if (sched_setaffinity(gettid(), sizeof(set), &set) == -1)
return 1;
snprintf(buffer, sizeof(buffer),
"/sys/kernel/tracing/osnoise/per_cpu/cpu%ld/timerlat_fd",
cpu);
timerlat_fd = open(buffer, O_RDONLY);
if (timerlat_fd < 0) {
printf("error opening %s: %s\n", buffer, strerror(errno));
exit(1);
}
for (;;) {
retval = read(timerlat_fd, buffer, 1024);
if (retval < 0)
break;
}
close(timerlat_fd);
exit(0);
}
-------------------------------- >% -----------------------------------
When disabling timerlat, if there is a workload holding the timerlat_fd,
the SIGKILL will be sent to the thread.
Link: https://lkml.kernel.org/r/69fe66a863d2792ff4c3a149bf9e32e26468bb3a.1686063934.git.bristot@kernel.org
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: William White <chwhite@redhat.com>
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2023-06-06 15:12:27 +00:00
|
|
|
|
|
|
|
static char *timerlat_lat_context[] = {"irq", "thread", "user-ret"};
|
2021-06-22 14:42:28 +00:00
|
|
|
static enum print_line_t
|
|
|
|
trace_timerlat_print(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct trace_entry *entry = iter->ent;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct timerlat_entry *field;
|
|
|
|
|
|
|
|
trace_assign_type(field, entry);
|
|
|
|
|
|
|
|
trace_seq_printf(s, "#%-5u context %6s timer_latency %9llu ns\n",
|
|
|
|
field->seqnum,
|
tracing/timerlat: Add user-space interface
Going a step further, we propose a way to use any user-space
workload as the task waiting for the timerlat timer. This is done
via a per-CPU file named osnoise/cpu$id/timerlat_fd file.
The tracef_fd allows a task to open at a time. When a task reads
the file, the timerlat timer is armed for future osnoise/timerlat_period_us
time. When the timer fires, it prints the IRQ latency and
wakes up the user-space thread waiting in the timerlat_fd.
The thread then starts to run, executes the timerlat measurement, prints
the thread scheduling latency and returns to user-space.
When the thread rereads the timerlat_fd, the tracer will print the
user-ret(urn) latency, which is an additional metric.
This additional metric is also traced by the tracer and can be used, for
example of measuring the context switch overhead from kernel-to-user and
user-to-kernel, or the response time for an arbitrary execution in
user-space.
The tracer supports one thread per CPU, the thread must be pinned to
the CPU, and it cannot migrate while holding the timerlat_fd. The reason
is that the tracer is per CPU (nothing prohibits the tracer from
allowing migrations in the future). The tracer monitors the migration
of the thread and disables the tracer if detected.
The timerlat_fd is only available for opening/reading when timerlat
tracer is enabled, and NO_OSNOISE_WORKLOAD is set.
The simplest way to activate this feature from user-space is:
-------------------------------- %< -----------------------------------
int main(void)
{
char buffer[1024];
int timerlat_fd;
int retval;
long cpu = 0; /* place in CPU 0 */
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(cpu, &set);
if (sched_setaffinity(gettid(), sizeof(set), &set) == -1)
return 1;
snprintf(buffer, sizeof(buffer),
"/sys/kernel/tracing/osnoise/per_cpu/cpu%ld/timerlat_fd",
cpu);
timerlat_fd = open(buffer, O_RDONLY);
if (timerlat_fd < 0) {
printf("error opening %s: %s\n", buffer, strerror(errno));
exit(1);
}
for (;;) {
retval = read(timerlat_fd, buffer, 1024);
if (retval < 0)
break;
}
close(timerlat_fd);
exit(0);
}
-------------------------------- >% -----------------------------------
When disabling timerlat, if there is a workload holding the timerlat_fd,
the SIGKILL will be sent to the thread.
Link: https://lkml.kernel.org/r/69fe66a863d2792ff4c3a149bf9e32e26468bb3a.1686063934.git.bristot@kernel.org
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: William White <chwhite@redhat.com>
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2023-06-06 15:12:27 +00:00
|
|
|
timerlat_lat_context[field->context],
|
2021-06-22 14:42:28 +00:00
|
|
|
field->timer_latency);
|
|
|
|
|
|
|
|
return trace_handle_return(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum print_line_t
|
|
|
|
trace_timerlat_raw(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct timerlat_entry *field;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
|
|
|
|
trace_assign_type(field, iter->ent);
|
|
|
|
|
|
|
|
trace_seq_printf(s, "%u %d %llu\n",
|
|
|
|
field->seqnum,
|
|
|
|
field->context,
|
|
|
|
field->timer_latency);
|
|
|
|
|
|
|
|
return trace_handle_return(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct trace_event_functions trace_timerlat_funcs = {
|
|
|
|
.trace = trace_timerlat_print,
|
|
|
|
.raw = trace_timerlat_raw,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct trace_event trace_timerlat_event = {
|
|
|
|
.type = TRACE_TIMERLAT,
|
|
|
|
.funcs = &trace_timerlat_funcs,
|
|
|
|
};
|
|
|
|
|
2013-03-09 02:02:34 +00:00
|
|
|
/* TRACE_BPUTS */
|
|
|
|
static enum print_line_t
|
|
|
|
trace_bputs_print(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct trace_entry *entry = iter->ent;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct bputs_entry *field;
|
|
|
|
|
|
|
|
trace_assign_type(field, entry);
|
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
seq_print_ip_sym(s, field->ip, flags);
|
|
|
|
trace_seq_puts(s, ": ");
|
|
|
|
trace_seq_puts(s, field->str);
|
2013-03-09 02:02:34 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2013-03-09 02:02:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static enum print_line_t
|
|
|
|
trace_bputs_raw(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct bputs_entry *field;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
|
|
|
|
trace_assign_type(field, iter->ent);
|
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(s, ": %lx : ", field->ip);
|
|
|
|
trace_seq_puts(s, field->str);
|
2013-03-09 02:02:34 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2013-03-09 02:02:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct trace_event_functions trace_bputs_funcs = {
|
|
|
|
.trace = trace_bputs_print,
|
|
|
|
.raw = trace_bputs_raw,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct trace_event trace_bputs_event = {
|
|
|
|
.type = TRACE_BPUTS,
|
|
|
|
.funcs = &trace_bputs_funcs,
|
|
|
|
};
|
|
|
|
|
2009-03-12 17:24:49 +00:00
|
|
|
/* TRACE_BPRINT */
|
2009-03-06 16:21:47 +00:00
|
|
|
static enum print_line_t
|
2010-04-22 22:46:14 +00:00
|
|
|
trace_bprint_print(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2009-03-06 16:21:47 +00:00
|
|
|
{
|
|
|
|
struct trace_entry *entry = iter->ent;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
2009-03-12 17:24:49 +00:00
|
|
|
struct bprint_entry *field;
|
2009-03-06 16:21:47 +00:00
|
|
|
|
|
|
|
trace_assign_type(field, entry);
|
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
seq_print_ip_sym(s, field->ip, flags);
|
|
|
|
trace_seq_puts(s, ": ");
|
|
|
|
trace_seq_bprintf(s, field->fmt, field->buf);
|
2009-03-06 16:21:47 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2009-03-06 16:21:47 +00:00
|
|
|
}
|
|
|
|
|
2009-03-06 16:21:49 +00:00
|
|
|
|
2009-03-12 17:24:49 +00:00
|
|
|
static enum print_line_t
|
2010-04-22 22:46:14 +00:00
|
|
|
trace_bprint_raw(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2009-03-06 16:21:47 +00:00
|
|
|
{
|
2009-03-12 17:24:49 +00:00
|
|
|
struct bprint_entry *field;
|
2009-03-06 16:21:47 +00:00
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
|
2009-03-06 16:21:49 +00:00
|
|
|
trace_assign_type(field, iter->ent);
|
2009-03-06 16:21:47 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
trace_seq_printf(s, ": %lx : ", field->ip);
|
|
|
|
trace_seq_bprintf(s, field->fmt, field->buf);
|
2009-03-06 16:21:47 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2009-03-06 16:21:47 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event_functions trace_bprint_funcs = {
|
|
|
|
.trace = trace_bprint_print,
|
|
|
|
.raw = trace_bprint_raw,
|
|
|
|
};
|
2009-03-06 16:21:49 +00:00
|
|
|
|
2009-03-12 17:24:49 +00:00
|
|
|
static struct trace_event trace_bprint_event = {
|
|
|
|
.type = TRACE_BPRINT,
|
2010-04-22 22:46:14 +00:00
|
|
|
.funcs = &trace_bprint_funcs,
|
2009-03-12 17:24:49 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/* TRACE_PRINT */
|
|
|
|
static enum print_line_t trace_print_print(struct trace_iterator *iter,
|
2010-04-22 22:46:14 +00:00
|
|
|
int flags, struct trace_event *event)
|
2009-03-12 17:24:49 +00:00
|
|
|
{
|
|
|
|
struct print_entry *field;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
2024-08-23 01:39:05 +00:00
|
|
|
unsigned long ip;
|
2009-03-12 17:24:49 +00:00
|
|
|
|
|
|
|
trace_assign_type(field, iter->ent);
|
|
|
|
|
2024-08-23 01:39:05 +00:00
|
|
|
ip = field->ip + iter->tr->text_delta;
|
|
|
|
|
|
|
|
seq_print_ip_sym(s, ip, flags);
|
2024-03-04 22:43:41 +00:00
|
|
|
trace_seq_printf(s, ": %s", field->buf);
|
2009-03-12 17:24:49 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(s);
|
2009-03-12 17:24:49 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
2009-03-12 17:24:49 +00:00
|
|
|
{
|
|
|
|
struct print_entry *field;
|
|
|
|
|
|
|
|
trace_assign_type(field, iter->ent);
|
|
|
|
|
2024-03-04 22:43:41 +00:00
|
|
|
trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf);
|
2009-03-12 17:24:49 +00:00
|
|
|
|
2014-11-12 15:29:54 +00:00
|
|
|
return trace_handle_return(&iter->seq);
|
2009-03-12 17:24:49 +00:00
|
|
|
}
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event_functions trace_print_funcs = {
|
2009-03-06 16:21:49 +00:00
|
|
|
.trace = trace_print_print,
|
|
|
|
.raw = trace_print_raw,
|
2009-03-06 16:21:47 +00:00
|
|
|
};
|
|
|
|
|
2010-04-22 22:46:14 +00:00
|
|
|
static struct trace_event trace_print_event = {
|
|
|
|
.type = TRACE_PRINT,
|
|
|
|
.funcs = &trace_print_funcs,
|
|
|
|
};
|
|
|
|
|
2016-07-06 19:25:08 +00:00
|
|
|
static enum print_line_t trace_raw_data(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct raw_data_entry *field;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
trace_assign_type(field, iter->ent);
|
|
|
|
|
|
|
|
trace_seq_printf(&iter->seq, "# %x buf:", field->id);
|
|
|
|
|
|
|
|
for (i = 0; i < iter->ent_size - offsetof(struct raw_data_entry, buf); i++)
|
|
|
|
trace_seq_printf(&iter->seq, " %02x",
|
|
|
|
(unsigned char)field->buf[i]);
|
|
|
|
|
|
|
|
trace_seq_putc(&iter->seq, '\n');
|
|
|
|
|
|
|
|
return trace_handle_return(&iter->seq);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct trace_event_functions trace_raw_data_funcs = {
|
|
|
|
.trace = trace_raw_data,
|
|
|
|
.raw = trace_raw_data,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct trace_event trace_raw_data_event = {
|
|
|
|
.type = TRACE_RAW_DATA,
|
|
|
|
.funcs = &trace_raw_data_funcs,
|
|
|
|
};
|
|
|
|
|
2021-04-15 18:18:50 +00:00
|
|
|
static enum print_line_t
|
|
|
|
trace_func_repeats_raw(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct func_repeats_entry *field;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
|
|
|
|
trace_assign_type(field, iter->ent);
|
|
|
|
|
|
|
|
trace_seq_printf(s, "%lu %lu %u %llu\n",
|
|
|
|
field->ip,
|
|
|
|
field->parent_ip,
|
|
|
|
field->count,
|
|
|
|
FUNC_REPEATS_GET_DELTA_TS(field));
|
|
|
|
|
|
|
|
return trace_handle_return(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static enum print_line_t
|
|
|
|
trace_func_repeats_print(struct trace_iterator *iter, int flags,
|
|
|
|
struct trace_event *event)
|
|
|
|
{
|
|
|
|
struct func_repeats_entry *field;
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
|
|
|
|
trace_assign_type(field, iter->ent);
|
|
|
|
|
2024-06-12 23:19:46 +00:00
|
|
|
print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta, flags);
|
2021-04-15 18:18:50 +00:00
|
|
|
trace_seq_printf(s, " (repeats: %u, last_ts:", field->count);
|
|
|
|
trace_print_time(s, iter,
|
|
|
|
iter->ts - FUNC_REPEATS_GET_DELTA_TS(field));
|
|
|
|
trace_seq_puts(s, ")\n");
|
|
|
|
|
|
|
|
return trace_handle_return(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct trace_event_functions trace_func_repeats_funcs = {
|
|
|
|
.trace = trace_func_repeats_print,
|
|
|
|
.raw = trace_func_repeats_raw,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct trace_event trace_func_repeats_event = {
|
|
|
|
.type = TRACE_FUNC_REPEATS,
|
|
|
|
.funcs = &trace_func_repeats_funcs,
|
|
|
|
};
|
2009-03-12 17:24:49 +00:00
|
|
|
|
2008-12-24 04:24:13 +00:00
|
|
|
static struct trace_event *events[] __initdata = {
|
|
|
|
&trace_fn_event,
|
|
|
|
&trace_ctx_event,
|
|
|
|
&trace_wake_event,
|
|
|
|
&trace_stack_event,
|
|
|
|
&trace_user_stack_event,
|
2013-03-09 02:02:34 +00:00
|
|
|
&trace_bputs_event,
|
2009-03-12 17:24:49 +00:00
|
|
|
&trace_bprint_event,
|
2008-12-24 04:24:13 +00:00
|
|
|
&trace_print_event,
|
2016-06-23 16:45:36 +00:00
|
|
|
&trace_hwlat_event,
|
trace: Add osnoise tracer
In the context of high-performance computing (HPC), the Operating System
Noise (*osnoise*) refers to the interference experienced by an application
due to activities inside the operating system. In the context of Linux,
NMIs, IRQs, SoftIRQs, and any other system thread can cause noise to the
system. Moreover, hardware-related jobs can also cause noise, for example,
via SMIs.
The osnoise tracer leverages the hwlat_detector by running a similar
loop with preemption, SoftIRQs and IRQs enabled, thus allowing all
the sources of *osnoise* during its execution. Using the same approach
of hwlat, osnoise takes note of the entry and exit point of any
source of interferences, increasing a per-cpu interference counter. The
osnoise tracer also saves an interference counter for each source of
interference. The interference counter for NMI, IRQs, SoftIRQs, and
threads is increased anytime the tool observes these interferences' entry
events. When a noise happens without any interference from the operating
system level, the hardware noise counter increases, pointing to a
hardware-related noise. In this way, osnoise can account for any
source of interference. At the end of the period, the osnoise tracer
prints the sum of all noise, the max single noise, the percentage of CPU
available for the thread, and the counters for the noise sources.
Usage
Write the ASCII text "osnoise" into the current_tracer file of the
tracing system (generally mounted at /sys/kernel/tracing).
For example::
[root@f32 ~]# cd /sys/kernel/tracing/
[root@f32 tracing]# echo osnoise > current_tracer
It is possible to follow the trace by reading the trace trace file::
[root@f32 tracing]# cat trace
# tracer: osnoise
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth MAX
# || / SINGLE Interference counters:
# |||| RUNTIME NOISE % OF CPU NOISE +-----------------------------+
# TASK-PID CPU# |||| TIMESTAMP IN US IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD
# | | | |||| | | | | | | | | | |
<...>-859 [000] .... 81.637220: 1000000 190 99.98100 9 18 0 1007 18 1
<...>-860 [001] .... 81.638154: 1000000 656 99.93440 74 23 0 1006 16 3
<...>-861 [002] .... 81.638193: 1000000 5675 99.43250 202 6 0 1013 25 21
<...>-862 [003] .... 81.638242: 1000000 125 99.98750 45 1 0 1011 23 0
<...>-863 [004] .... 81.638260: 1000000 1721 99.82790 168 7 0 1002 49 41
<...>-864 [005] .... 81.638286: 1000000 263 99.97370 57 6 0 1006 26 2
<...>-865 [006] .... 81.638302: 1000000 109 99.98910 21 3 0 1006 18 1
<...>-866 [007] .... 81.638326: 1000000 7816 99.21840 107 8 0 1016 39 19
In addition to the regular trace fields (from TASK-PID to TIMESTAMP), the
tracer prints a message at the end of each period for each CPU that is
running an osnoise/CPU thread. The osnoise specific fields report:
- The RUNTIME IN USE reports the amount of time in microseconds that
the osnoise thread kept looping reading the time.
- The NOISE IN US reports the sum of noise in microseconds observed
by the osnoise tracer during the associated runtime.
- The % OF CPU AVAILABLE reports the percentage of CPU available for
the osnoise thread during the runtime window.
- The MAX SINGLE NOISE IN US reports the maximum single noise observed
during the runtime window.
- The Interference counters display how many each of the respective
interference happened during the runtime window.
Note that the example above shows a high number of HW noise samples.
The reason being is that this sample was taken on a virtual machine,
and the host interference is detected as a hardware interference.
Tracer options
The tracer has a set of options inside the osnoise directory, they are:
- osnoise/cpus: CPUs at which a osnoise thread will execute.
- osnoise/period_us: the period of the osnoise thread.
- osnoise/runtime_us: how long an osnoise thread will look for noise.
- osnoise/stop_tracing_us: stop the system tracing if a single noise
higher than the configured value happens. Writing 0 disables this
option.
- osnoise/stop_tracing_total_us: stop the system tracing if total noise
higher than the configured value happens. Writing 0 disables this
option.
- tracing_threshold: the minimum delta between two time() reads to be
considered as noise, in us. When set to 0, the default value will
be used, which is currently 5 us.
Additional Tracing
In addition to the tracer, a set of tracepoints were added to
facilitate the identification of the osnoise source.
- osnoise:sample_threshold: printed anytime a noise is higher than
the configurable tolerance_ns.
- osnoise:nmi_noise: noise from NMI, including the duration.
- osnoise:irq_noise: noise from an IRQ, including the duration.
- osnoise:softirq_noise: noise from a SoftIRQ, including the
duration.
- osnoise:thread_noise: noise from a thread, including the duration.
Note that all the values are *net values*. For example, if while osnoise
is running, another thread preempts the osnoise thread, it will start a
thread_noise duration at the start. Then, an IRQ takes place, preempting
the thread_noise, starting a irq_noise. When the IRQ ends its execution,
it will compute its duration, and this duration will be subtracted from
the thread_noise, in such a way as to avoid the double accounting of the
IRQ execution. This logic is valid for all sources of noise.
Here is one example of the usage of these tracepoints::
osnoise/8-961 [008] d.h. 5789.857532: irq_noise: local_timer:236 start 5789.857529929 duration 1845 ns
osnoise/8-961 [008] dNh. 5789.858408: irq_noise: local_timer:236 start 5789.858404871 duration 2848 ns
migration/8-54 [008] d... 5789.858413: thread_noise: migration/8:54 start 5789.858409300 duration 3068 ns
osnoise/8-961 [008] .... 5789.858413: sample_threshold: start 5789.858404555 duration 8723 ns interferences 2
In this example, a noise sample of 8 microseconds was reported in the last
line, pointing to two interferences. Looking backward in the trace, the
two previous entries were about the migration thread running after a
timer IRQ execution. The first event is not part of the noise because
it took place one millisecond before.
It is worth noticing that the sum of the duration reported in the
tracepoints is smaller than eight us reported in the sample_threshold.
The reason roots in the overhead of the entry and exit code that happens
before and after any interference execution. This justifies the dual
approach: measuring thread and tracing.
Link: https://lkml.kernel.org/r/e649467042d60e7b62714c9c6751a56299d15119.1624372313.git.bristot@redhat.com
Cc: Phil Auld <pauld@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kate Carcia <kcarcia@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexandre Chartre <alexandre.chartre@oracle.com>
Cc: Clark Willaims <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
[
Made the following functions static:
trace_irqentry_callback()
trace_irqexit_callback()
trace_intel_irqentry_callback()
trace_intel_irqexit_callback()
Added to include/trace.h:
osnoise_arch_register()
osnoise_arch_unregister()
Fixed define logic for LATENCY_FS_NOTIFY
Reported-by: kernel test robot <lkp@intel.com>
]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2021-06-22 14:42:27 +00:00
|
|
|
&trace_osnoise_event,
|
2021-06-22 14:42:28 +00:00
|
|
|
&trace_timerlat_event,
|
2016-07-06 19:25:08 +00:00
|
|
|
&trace_raw_data_event,
|
2021-04-15 18:18:50 +00:00
|
|
|
&trace_func_repeats_event,
|
2008-12-24 04:24:13 +00:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2023-01-04 21:14:12 +00:00
|
|
|
__init int init_events(void)
|
2008-12-24 04:24:13 +00:00
|
|
|
{
|
|
|
|
struct trace_event *event;
|
|
|
|
int i, ret;
|
|
|
|
|
|
|
|
for (i = 0; events[i]; i++) {
|
|
|
|
event = events[i];
|
2015-05-05 13:39:12 +00:00
|
|
|
ret = register_trace_event(event);
|
2022-04-24 13:19:32 +00:00
|
|
|
WARN_ONCE(!ret, "event %d failed to register", event->type);
|
2008-12-24 04:24:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|