mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-17 10:46:33 +00:00
perf script: Add capstone support for '-F +brstackdisasm'
Support capstone output for the '-F +brstackinsn' branch dump. The new output is enabled with the new field 'brstackdisasm'. This was possible before with --xed, but now also allow it for users that don't have xed using the builtin capstone support. Before: perf record -b emacs -Q --batch '()' perf script -F +brstackinsn ... emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237: 00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [3] 00007f0ab2d17105 insn: 73 51 00007f0ab2d17107 insn: 48 89 c1 00007f0ab2d1710a insn: 48 39 ca 00007f0ab2d1710d insn: 73 96 00007f0ab2d1710f insn: 48 8d 04 11 00007f0ab2d17113 insn: 48 d1 e8 00007f0ab2d17116 insn: 49 8d 34 c1 00007f0ab2d1711a insn: 44 3a 06 00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [6] 3.00 IPC 00007f0ab2d17105 insn: 73 51 # PRED 1 cycles [7] 1.00 IPC 00007f0ab2d17158 insn: 48 8d 50 01 00007f0ab2d1715c insn: eb 92 # PRED 1 cycles [8] 2.00 IPC 00007f0ab2d170f0 insn: 48 39 ca 00007f0ab2d170f3 insn: 73 b0 # PRED 1 cycles [9] 2.00 IPC After (perf must be compiled with capstone): perf script -F +brstackdisasm ... emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237: 00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [3] 00007f0ab2d17105 jae intel_check_word.constprop.0+0x128 00007f0ab2d17107 movq %rax, %rcx 00007f0ab2d1710a cmpq %rcx, %rdx 00007f0ab2d1710d jae intel_check_word.constprop.0+0x75 00007f0ab2d1710f leaq (%rcx, %rdx), %rax 00007f0ab2d17113 shrq $1, %rax 00007f0ab2d17116 leaq (%r9, %rax, 8), %rsi 00007f0ab2d1711a cmpb (%rsi), %r8b 00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [6] 3.00 IPC 00007f0ab2d17105 jae intel_check_word.constprop.0+0x128 # PRED 1 cycles [7] 1.00 IPC 00007f0ab2d17158 leaq 1(%rax), %rdx 00007f0ab2d1715c jmp intel_check_word.constprop.0+0xc0 # PRED 1 cycles [8] 2.00 IPC 00007f0ab2d170f0 cmpq %rcx, %rdx 00007f0ab2d170f3 jae intel_check_word.constprop.0+0x75 # PRED 1 cycles [9] 2.00 IPC Reviewed-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Andi Kleen <ak@linux.intel.com> Link: https://lore.kernel.org/r/20240401210925.209671-3-ak@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
38ab60132b
commit
d812044688
@ -132,9 +132,9 @@ OPTIONS
|
||||
Comma separated list of fields to print. Options are:
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff,
|
||||
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
|
||||
brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm,
|
||||
brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm,
|
||||
insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size,
|
||||
code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat.
|
||||
code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat,
|
||||
|
||||
Field list can be prepended with the type, trace, sw or hw,
|
||||
to indicate to which event type the field list applies.
|
||||
@ -257,6 +257,9 @@ OPTIONS
|
||||
can’t know the next sequential instruction after an unconditional branch unless
|
||||
you calculate that based on its length.
|
||||
|
||||
brstackdisasm acts like brstackinsn, but will print disassembled instructions if
|
||||
perf is built with the capstone library.
|
||||
|
||||
The brstackoff field will print an offset into a specific dso/binary.
|
||||
|
||||
With the metric option perf script can compute metrics for
|
||||
|
@ -136,6 +136,7 @@ enum perf_output_field {
|
||||
PERF_OUTPUT_RETIRE_LAT = 1ULL << 40,
|
||||
PERF_OUTPUT_DSOFF = 1ULL << 41,
|
||||
PERF_OUTPUT_DISASM = 1ULL << 42,
|
||||
PERF_OUTPUT_BRSTACKDISASM = 1ULL << 43,
|
||||
};
|
||||
|
||||
struct perf_script {
|
||||
@ -210,6 +211,7 @@ struct output_option {
|
||||
{.str = "vcpu", .field = PERF_OUTPUT_VCPU},
|
||||
{.str = "cgroup", .field = PERF_OUTPUT_CGROUP},
|
||||
{.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT},
|
||||
{.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM},
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -510,7 +512,8 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
|
||||
"selected. Hence, no address to lookup the source line number.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set &&
|
||||
if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM))
|
||||
&& !allow_user_set &&
|
||||
!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) {
|
||||
pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
|
||||
"Hint: run 'perf record -b ...'\n");
|
||||
@ -1162,6 +1165,20 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *any_dump_insn(struct perf_event_attr *attr __maybe_unused,
|
||||
struct perf_insn *x, uint64_t ip,
|
||||
u8 *inbuf, int inlen, int *lenp)
|
||||
{
|
||||
#ifdef HAVE_LIBCAPSTONE_SUPPORT
|
||||
if (PRINT_FIELD(BRSTACKDISASM)) {
|
||||
const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp);
|
||||
if (p)
|
||||
return p;
|
||||
}
|
||||
#endif
|
||||
return dump_insn(x, ip, inbuf, inlen, lenp);
|
||||
}
|
||||
|
||||
static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
|
||||
struct perf_insn *x, u8 *inbuf, int len,
|
||||
int insn, FILE *fp, int *total_cycles,
|
||||
@ -1170,7 +1187,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
|
||||
{
|
||||
int ilen = 0;
|
||||
int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip,
|
||||
dump_insn(x, ip, inbuf, len, &ilen));
|
||||
any_dump_insn(attr, x, ip, inbuf, len, &ilen));
|
||||
|
||||
if (PRINT_FIELD(BRSTACKINSNLEN))
|
||||
printed += fprintf(fp, "ilen: %d\t", ilen);
|
||||
@ -1262,6 +1279,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
|
||||
nr = max_blocks + 1;
|
||||
|
||||
x.thread = thread;
|
||||
x.machine = machine;
|
||||
x.cpu = sample->cpu;
|
||||
|
||||
printed += fprintf(fp, "%c", '\n');
|
||||
@ -1313,7 +1331,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
|
||||
} else {
|
||||
ilen = 0;
|
||||
printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip,
|
||||
dump_insn(&x, ip, buffer + off, len - off, &ilen));
|
||||
any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen));
|
||||
if (PRINT_FIELD(BRSTACKINSNLEN))
|
||||
printed += fprintf(fp, "\tilen: %d", ilen);
|
||||
printed += fprintf(fp, "\n");
|
||||
@ -1361,7 +1379,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
|
||||
goto out;
|
||||
ilen = 0;
|
||||
printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip,
|
||||
dump_insn(&x, sample->ip, buffer, len, &ilen));
|
||||
any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen));
|
||||
if (PRINT_FIELD(BRSTACKINSNLEN))
|
||||
printed += fprintf(fp, "\tilen: %d", ilen);
|
||||
printed += fprintf(fp, "\n");
|
||||
@ -1372,7 +1390,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
|
||||
for (off = 0; off <= end - start; off += ilen) {
|
||||
ilen = 0;
|
||||
printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off,
|
||||
dump_insn(&x, start + off, buffer + off, len - off, &ilen));
|
||||
any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen));
|
||||
if (PRINT_FIELD(BRSTACKINSNLEN))
|
||||
printed += fprintf(fp, "\tilen: %d", ilen);
|
||||
printed += fprintf(fp, "\n");
|
||||
@ -1534,7 +1552,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
|
||||
printed += fprintf(fp, "\t\t");
|
||||
printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al);
|
||||
}
|
||||
if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN))
|
||||
if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM))
|
||||
printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
|
||||
|
||||
return printed;
|
||||
@ -3940,7 +3958,7 @@ int cmd_script(int argc, const char **argv)
|
||||
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff,"
|
||||
"addr,symoff,srcline,period,iregs,uregs,brstack,"
|
||||
"brstacksym,flags,data_src,weight,bpf-output,brstackinsn,"
|
||||
"brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth,"
|
||||
"brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth,"
|
||||
"phys_addr,metric,misc,srccode,ipc,tod,data_page_size,"
|
||||
"code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat",
|
||||
parse_output_fields),
|
||||
|
@ -11,6 +11,7 @@ struct thread;
|
||||
struct perf_insn {
|
||||
/* Initialized by callers: */
|
||||
struct thread *thread;
|
||||
struct machine *machine;
|
||||
u8 cpumode;
|
||||
bool is64bit;
|
||||
int cpu;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "machine.h"
|
||||
#include "thread.h"
|
||||
#include "print_insn.h"
|
||||
#include "dump-insn.h"
|
||||
#include "map.h"
|
||||
#include "dso.h"
|
||||
|
||||
@ -71,6 +72,57 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x)
|
||||
{
|
||||
struct addr_location al;
|
||||
bool printed = false;
|
||||
|
||||
if (insn->detail && insn->detail->x86.op_count == 1) {
|
||||
cs_x86_op *op = &insn->detail->x86.operands[0];
|
||||
|
||||
addr_location__init(&al);
|
||||
if (op->type == X86_OP_IMM &&
|
||||
thread__find_symbol(thread, x->cpumode, op->imm, &al) &&
|
||||
al.sym &&
|
||||
al.addr < al.sym->end) {
|
||||
snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic,
|
||||
al.sym->name, al.addr - al.sym->start, op->imm);
|
||||
printed = true;
|
||||
}
|
||||
addr_location__exit(&al);
|
||||
}
|
||||
|
||||
if (!printed)
|
||||
snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str);
|
||||
}
|
||||
|
||||
const char *cs_dump_insn(struct perf_insn *x, uint64_t ip,
|
||||
u8 *inbuf, int inlen, int *lenp)
|
||||
{
|
||||
int ret;
|
||||
int count;
|
||||
cs_insn *insn;
|
||||
csh cs_handle;
|
||||
|
||||
ret = capstone_init(x->machine, &cs_handle, x->is64bit);
|
||||
if (ret < 0)
|
||||
return NULL;
|
||||
|
||||
count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn);
|
||||
if (count > 0) {
|
||||
if (machine__normalized_is(x->machine, "x86"))
|
||||
dump_insn_x86(x->thread, &insn[0], x);
|
||||
else
|
||||
snprintf(x->out, sizeof(x->out), "%s %s",
|
||||
insn[0].mnemonic, insn[0].op_str);
|
||||
*lenp = insn->size;
|
||||
cs_free(insn, count);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
return x->out;
|
||||
}
|
||||
|
||||
static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
|
||||
cs_insn *insn, FILE *fp)
|
||||
{
|
||||
|
@ -8,9 +8,12 @@
|
||||
struct perf_sample;
|
||||
struct thread;
|
||||
struct machine;
|
||||
struct perf_insn;
|
||||
|
||||
size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
|
||||
struct machine *machine, FILE *fp, struct addr_location *al);
|
||||
size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp);
|
||||
const char *cs_dump_insn(struct perf_insn *x, uint64_t ip,
|
||||
u8 *inbuf, int inlen, int *lenp);
|
||||
|
||||
#endif /* PERF_PRINT_INSN_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user