fgraph: Remove unnecessary disabling of interrupts and recursion

The function graph tracer disables interrupts as well as prevents
recursion via NMIs when recording the graph tracer code. There's no reason
to do this today. That disabling goes back to 2008 when the function graph
tracer was first introduced and recursion protection wasn't part of the
code.

Today, there's no reason to disable interrupts or prevent the code from
recursing as the infrastructure can easily handle it.

Before this change:

~# echo function_graph > /sys/kernel/tracing/current_tracer
~# perf stat -r 10 ./hackbench 10
Time: 4.240
Time: 4.236
Time: 4.106
Time: 4.014
Time: 4.314
Time: 3.830
Time: 4.063
Time: 4.323
Time: 3.763
Time: 3.727

 Performance counter stats for '/work/c/hackbench 10' (10 runs):

         33,937.20 msec task-clock                       #    7.008 CPUs utilized               ( +-  1.85% )
            18,220      context-switches                 #  536.874 /sec                        ( +-  6.41% )
               624      cpu-migrations                   #   18.387 /sec                        ( +-  9.07% )
            11,319      page-faults                      #  333.528 /sec                        ( +-  1.97% )
    76,657,643,617      cycles                           #    2.259 GHz                         ( +-  0.40% )
   141,403,302,768      instructions                     #    1.84  insn per cycle              ( +-  0.37% )
    25,518,463,888      branches                         #  751.932 M/sec                       ( +-  0.35% )
       156,151,050      branch-misses                    #    0.61% of all branches             ( +-  0.63% )

            4.8423 +- 0.0892 seconds time elapsed  ( +-  1.84% )

After this change:

~# echo function_graph > /sys/kernel/tracing/current_tracer
~# perf stat -r 10 ./hackbench 10
Time: 3.340
Time: 3.192
Time: 3.129
Time: 2.579
Time: 2.589
Time: 2.798
Time: 2.791
Time: 2.955
Time: 3.044
Time: 3.065

 Performance counter stats for './hackbench 10' (10 runs):

         24,416.30 msec task-clock                       #    6.996 CPUs utilized               ( +-  2.74% )
            16,764      context-switches                 #  686.590 /sec                        ( +-  5.85% )
               469      cpu-migrations                   #   19.208 /sec                        ( +-  6.14% )
            11,519      page-faults                      #  471.775 /sec                        ( +-  1.92% )
    53,895,628,450      cycles                           #    2.207 GHz                         ( +-  0.52% )
   105,552,664,638      instructions                     #    1.96  insn per cycle              ( +-  0.47% )
    17,808,672,667      branches                         #  729.376 M/sec                       ( +-  0.48% )
       133,075,435      branch-misses                    #    0.75% of all branches             ( +-  0.59% )

             3.490 +- 0.112 seconds time elapsed  ( +-  3.22% )

Also removed unneeded "unlikely()" around the retaddr code.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: https://lore.kernel.org/20241223184941.204074053@goodmis.org
Fixes: 9cd2992f2d6c8 ("fgraph: Have set_graph_notrace only affect function_graph tracer") # Performance only
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
This commit is contained in:
Steven Rostedt 2024-12-23 13:46:19 -05:00 committed by Steven Rostedt (Google)
parent 4bbf9020be
commit 7d137e604a

View File

@ -181,10 +181,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
struct fgraph_times *ftimes;
unsigned long flags;
unsigned int trace_ctx;
long disabled;
int ret;
int ret = 0;
int cpu;
if (*task_var & TRACE_GRAPH_NOTRACE)
@ -235,25 +234,21 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
if (tracing_thresh)
return 1;
local_irq_save(flags);
preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
trace_ctx = tracing_gen_ctx_flags(flags);
if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR))) {
disabled = atomic_read(&data->disabled);
if (likely(!disabled)) {
trace_ctx = tracing_gen_ctx();
if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR)) {
unsigned long retaddr = ftrace_graph_top_ret_addr(current);
ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr);
} else
} else {
ret = __trace_graph_entry(tr, trace, trace_ctx);
} else {
ret = 0;
}
}
atomic_dec(&data->disabled);
local_irq_restore(flags);
preempt_enable_notrace();
return ret;
}
@ -320,7 +315,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
struct fgraph_times *ftimes;
unsigned long flags;
unsigned int trace_ctx;
long disabled;
int size;
@ -341,16 +335,15 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
trace->calltime = ftimes->calltime;
local_irq_save(flags);
preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
trace_ctx = tracing_gen_ctx_flags(flags);
disabled = atomic_read(&data->disabled);
if (likely(!disabled)) {
trace_ctx = tracing_gen_ctx();
__trace_graph_return(tr, trace, trace_ctx);
}
atomic_dec(&data->disabled);
local_irq_restore(flags);
preempt_enable_notrace();
}
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,