perf script: Add capstone support for '-F +brstackdisasm'

Support capstone output for the '-F +brstackinsn' branch dump.

The new output is enabled with the new field 'brstackdisasm'.

This was possible before with --xed, but now also allow it for users
that don't have xed using the builtin capstone support.

Before:

  perf record -b emacs -Q --batch '()'
  perf script -F +brstackinsn
  ...
            emacs   55778 1814366.755945:     151564 cycles:P:      7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s>        intel_check_word.constprop.0+237:
          00007f0ab2d1711d        insn: 75 e6                     # PRED 3 cycles [3]
          00007f0ab2d17105        insn: 73 51
          00007f0ab2d17107        insn: 48 89 c1
          00007f0ab2d1710a        insn: 48 39 ca
          00007f0ab2d1710d        insn: 73 96
          00007f0ab2d1710f        insn: 48 8d 04 11
          00007f0ab2d17113        insn: 48 d1 e8
          00007f0ab2d17116        insn: 49 8d 34 c1
          00007f0ab2d1711a        insn: 44 3a 06
          00007f0ab2d1711d        insn: 75 e6                     # PRED 3 cycles [6] 3.00 IPC
          00007f0ab2d17105        insn: 73 51                     # PRED 1 cycles [7] 1.00 IPC
          00007f0ab2d17158        insn: 48 8d 50 01
          00007f0ab2d1715c        insn: eb 92                     # PRED 1 cycles [8] 2.00 IPC
          00007f0ab2d170f0        insn: 48 39 ca
          00007f0ab2d170f3        insn: 73 b0                     # PRED 1 cycles [9] 2.00 IPC

After (perf must be compiled with capstone):

  perf script -F +brstackdisasm

  ...
             emacs   55778 1814366.755945:     151564 cycles:P:      7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s>        intel_check_word.constprop.0+237:
          00007f0ab2d1711d        jne intel_check_word.constprop.0+0xd5   # PRED 3 cycles [3]
          00007f0ab2d17105        jae intel_check_word.constprop.0+0x128
          00007f0ab2d17107        movq %rax, %rcx
          00007f0ab2d1710a        cmpq %rcx, %rdx
          00007f0ab2d1710d        jae intel_check_word.constprop.0+0x75
          00007f0ab2d1710f        leaq (%rcx, %rdx), %rax
          00007f0ab2d17113        shrq $1, %rax
          00007f0ab2d17116        leaq (%r9, %rax, 8), %rsi
          00007f0ab2d1711a        cmpb (%rsi), %r8b
          00007f0ab2d1711d        jne intel_check_word.constprop.0+0xd5   # PRED 3 cycles [6] 3.00 IPC
          00007f0ab2d17105        jae intel_check_word.constprop.0+0x128  # PRED 1 cycles [7] 1.00 IPC
          00007f0ab2d17158        leaq 1(%rax), %rdx
          00007f0ab2d1715c        jmp intel_check_word.constprop.0+0xc0   # PRED 1 cycles [8] 2.00 IPC
          00007f0ab2d170f0        cmpq %rcx, %rdx
          00007f0ab2d170f3        jae intel_check_word.constprop.0+0x75   # PRED 1 cycles [9] 2.00 IPC

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/20240401210925.209671-3-ak@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Andi Kleen 2024-04-01 14:08:04 -07:00 committed by Arnaldo Carvalho de Melo
parent 38ab60132b
commit d812044688
5 changed files with 86 additions and 9 deletions

View File

@ -132,9 +132,9 @@ OPTIONS
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff,
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm,
brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm,
insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size,
code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat.
code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat,
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
@ -257,6 +257,9 @@ OPTIONS
cant know the next sequential instruction after an unconditional branch unless
you calculate that based on its length.
brstackdisasm acts like brstackinsn, but will print disassembled instructions if
perf is built with the capstone library.
The brstackoff field will print an offset into a specific dso/binary.
With the metric option perf script can compute metrics for

View File

@ -136,6 +136,7 @@ enum perf_output_field {
PERF_OUTPUT_RETIRE_LAT = 1ULL << 40,
PERF_OUTPUT_DSOFF = 1ULL << 41,
PERF_OUTPUT_DISASM = 1ULL << 42,
PERF_OUTPUT_BRSTACKDISASM = 1ULL << 43,
};
struct perf_script {
@ -210,6 +211,7 @@ struct output_option {
{.str = "vcpu", .field = PERF_OUTPUT_VCPU},
{.str = "cgroup", .field = PERF_OUTPUT_CGROUP},
{.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT},
{.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM},
};
enum {
@ -510,7 +512,8 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
"selected. Hence, no address to lookup the source line number.\n");
return -EINVAL;
}
if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set &&
if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM))
&& !allow_user_set &&
!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) {
pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
"Hint: run 'perf record -b ...'\n");
@ -1162,6 +1165,20 @@ out:
return ret;
}
static const char *any_dump_insn(struct perf_event_attr *attr __maybe_unused,
struct perf_insn *x, uint64_t ip,
u8 *inbuf, int inlen, int *lenp)
{
#ifdef HAVE_LIBCAPSTONE_SUPPORT
if (PRINT_FIELD(BRSTACKDISASM)) {
const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp);
if (p)
return p;
}
#endif
return dump_insn(x, ip, inbuf, inlen, lenp);
}
static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
struct perf_insn *x, u8 *inbuf, int len,
int insn, FILE *fp, int *total_cycles,
@ -1170,7 +1187,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
{
int ilen = 0;
int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip,
dump_insn(x, ip, inbuf, len, &ilen));
any_dump_insn(attr, x, ip, inbuf, len, &ilen));
if (PRINT_FIELD(BRSTACKINSNLEN))
printed += fprintf(fp, "ilen: %d\t", ilen);
@ -1262,6 +1279,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
nr = max_blocks + 1;
x.thread = thread;
x.machine = machine;
x.cpu = sample->cpu;
printed += fprintf(fp, "%c", '\n');
@ -1313,7 +1331,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
} else {
ilen = 0;
printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip,
dump_insn(&x, ip, buffer + off, len - off, &ilen));
any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen));
if (PRINT_FIELD(BRSTACKINSNLEN))
printed += fprintf(fp, "\tilen: %d", ilen);
printed += fprintf(fp, "\n");
@ -1361,7 +1379,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
goto out;
ilen = 0;
printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip,
dump_insn(&x, sample->ip, buffer, len, &ilen));
any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen));
if (PRINT_FIELD(BRSTACKINSNLEN))
printed += fprintf(fp, "\tilen: %d", ilen);
printed += fprintf(fp, "\n");
@ -1372,7 +1390,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
for (off = 0; off <= end - start; off += ilen) {
ilen = 0;
printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off,
dump_insn(&x, start + off, buffer + off, len - off, &ilen));
any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen));
if (PRINT_FIELD(BRSTACKINSNLEN))
printed += fprintf(fp, "\tilen: %d", ilen);
printed += fprintf(fp, "\n");
@ -1534,7 +1552,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
printed += fprintf(fp, "\t\t");
printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al);
}
if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN))
if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM))
printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
return printed;
@ -3940,7 +3958,7 @@ int cmd_script(int argc, const char **argv)
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff,"
"addr,symoff,srcline,period,iregs,uregs,brstack,"
"brstacksym,flags,data_src,weight,bpf-output,brstackinsn,"
"brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth,"
"brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth,"
"phys_addr,metric,misc,srccode,ipc,tod,data_page_size,"
"code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat",
parse_output_fields),

View File

@ -11,6 +11,7 @@ struct thread;
struct perf_insn {
/* Initialized by callers: */
struct thread *thread;
struct machine *machine;
u8 cpumode;
bool is64bit;
int cpu;

View File

@ -12,6 +12,7 @@
#include "machine.h"
#include "thread.h"
#include "print_insn.h"
#include "dump-insn.h"
#include "map.h"
#include "dso.h"
@ -71,6 +72,57 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
return 0;
}
static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x)
{
struct addr_location al;
bool printed = false;
if (insn->detail && insn->detail->x86.op_count == 1) {
cs_x86_op *op = &insn->detail->x86.operands[0];
addr_location__init(&al);
if (op->type == X86_OP_IMM &&
thread__find_symbol(thread, x->cpumode, op->imm, &al) &&
al.sym &&
al.addr < al.sym->end) {
snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic,
al.sym->name, al.addr - al.sym->start, op->imm);
printed = true;
}
addr_location__exit(&al);
}
if (!printed)
snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str);
}
const char *cs_dump_insn(struct perf_insn *x, uint64_t ip,
u8 *inbuf, int inlen, int *lenp)
{
int ret;
int count;
cs_insn *insn;
csh cs_handle;
ret = capstone_init(x->machine, &cs_handle, x->is64bit);
if (ret < 0)
return NULL;
count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn);
if (count > 0) {
if (machine__normalized_is(x->machine, "x86"))
dump_insn_x86(x->thread, &insn[0], x);
else
snprintf(x->out, sizeof(x->out), "%s %s",
insn[0].mnemonic, insn[0].op_str);
*lenp = insn->size;
cs_free(insn, count);
} else {
return NULL;
}
return x->out;
}
static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
cs_insn *insn, FILE *fp)
{

View File

@ -8,9 +8,12 @@
struct perf_sample;
struct thread;
struct machine;
struct perf_insn;
size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
struct machine *machine, FILE *fp, struct addr_location *al);
size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp);
const char *cs_dump_insn(struct perf_insn *x, uint64_t ip,
u8 *inbuf, int inlen, int *lenp);
#endif /* PERF_PRINT_INSN_H */