perf/urgent fixes and one improvement:

Fixes:
 
 - Fix prev/next_prio formatting for deadline tasks in libtraceevent (Daniel Bristot de Oliveira)
 
 - Robustify reading of build-ids from /sys/kernel/note (Arnaldo Carvalho de Melo)
 
 - Fix building some sample/bpf in Alpine Linux 3.4 (Arnaldo Carvalho de Melo)
 
 - Fix 'make install-bin' to install libtraceevent plugins (Arnaldo Carvalho de Melo)
 
 - Fix 'perf record --switch-output' documentation and comment (Jiri Olsa)
 
 - 'perf probe' fixes for cross arch probing (Masami Hiramatsu)
 
 Improvement:
 
 - Show total scheduling time in 'perf sched timehist' (Namhyumg Kim)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABCAAGBQJYbS6KAAoJENZQFvNTUqpAWqkP/1xdvbDqeTA2YjcWG+ffQJye
 LULSNQpRShvBGpB3zsOzOebOtw0R5oWb7YAp4JFZNyMIk5qcnS9gVwdw9Qx1lc/W
 Gq8OuhI28Pr9AHE37eTJ1+12HI2aAOylwKFqUpEWMzoaUqBDRh5JV2MeVGVQXBRe
 YBJ4Qwk1a7Ng74uzN/pdiC6V7pjddPEoLZEDG5p35vrgtNAY1JGlX5rvNR7CO6nN
 72QHRnB4r1d9fgNGS5oO5zEU1q5+JN5phe+gDANdk/8pCqDegRjZpHnj5gcm71jr
 mEvbstmlV1XPZSA9aFVu2L5LZuv6asS02HjrbGydqO2DUTpDgrynHsjAuqqaTnwk
 kLGwA2I+Kyh8g155H+HrQANONe2TTOXRkBVWIoFBWkhd1JUS/p3GE2qvqx5N36sr
 U26d9i/Bk/pbOYOdRUFMc4lckZY2HrrRWj8dVr06qLr2rlbYoLv8hpqvdyHr9MJS
 Id2Yqqd1JVzBDU3/+GfdFDBjltVtpSG1mdc6LWy9IpXFMJoSOQNsrbjf+mgMadeV
 EbtY8MdDZ525EB8niv9E9Vfd56LiWTKCjg9Mc758+U+Ns50YlQFsDq2hBO2+jQ/k
 W3QNX+WJiLgxqQK8adveXoYgCSWPNVttIvg8Lj1KNk8Y//vUC1KSrvDfaKiROzLB
 iF0u5mLRAQ+Pfec6X5PP
 =o9KL
 -----END PGP SIGNATURE-----

Merge tag 'perf-urgent-for-mingo-4.10-20170104' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/urgent fixes and one improvement from Arnaldo Carvalho de Melo:

Fixes:

  - Fix prev/next_prio formatting for deadline tasks in libtraceevent (Daniel Bristot de Oliveira)

  - Robustify reading of build-ids from /sys/kernel/note (Arnaldo Carvalho de Melo)

  - Fix building some sample/bpf in Alpine Linux 3.4 (Arnaldo Carvalho de Melo)

  - Fix 'make install-bin' to install libtraceevent plugins (Arnaldo Carvalho de Melo)

  - Fix 'perf record --switch-output' documentation and comment (Jiri Olsa)

  - Fix 'perf probe' for cross arch probing (Masami Hiramatsu)

Improvement:

  - Show total scheduling time in 'perf sched timehist' (Namhyumg Kim)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Ingo Molnar 2017-01-05 08:33:02 +01:00
commit 4e06d4f083
11 changed files with 108 additions and 47 deletions

View File

@ -4,7 +4,7 @@
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <net/ethernet.h>
#include <linux/if_ether.h>
#include <net/if.h>
#include <linux/if_packet.h>
#include <arpa/inet.h>

View File

@ -9,7 +9,6 @@
#include <string.h>
#include <fcntl.h>
#include <poll.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <linux/bpf.h>
#include <errno.h>

View File

@ -213,6 +213,9 @@ static int get_value(struct parse_opt_ctx_t *p,
else
err = get_arg(p, opt, flags, (const char **)opt->value);
if (opt->set)
*(bool *)opt->set = true;
/* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */
if (opt->flags & PARSE_OPT_NOEMPTY) {
const char *val = *(const char **)opt->value;

View File

@ -137,6 +137,11 @@ struct option {
{ .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
.value = check_vtype(v, const char **), (a), .help = (h), \
.flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \
{ .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
.value = check_vtype(v, const char **), (a), .help = (h), \
.flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \
.set = check_vtype(os, bool *)}
#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
#define OPT_DATE(s, l, v, h) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }

View File

@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s,
trace_seq_printf(s, "%lld ", val);
if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
trace_seq_printf(s, "[%lld] ", val);
trace_seq_printf(s, "[%d] ", (int) val);
if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0)
write_state(s, val);
@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s,
trace_seq_printf(s, "%lld", val);
if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
trace_seq_printf(s, " [%lld]", val);
trace_seq_printf(s, " [%d]", (int) val);
return 0;
}

View File

@ -430,6 +430,10 @@ that gets then processed, possibly via a perf script, to decide if that
particular perf.data snapshot should be kept or not.
Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
The reason for the latter two is to reduce the data file switching
overhead. You can still switch them on with:
--switch-output --no-no-buildid --no-no-buildid-cache
--dry-run::
Parse options then exit. --dry-run can be used to detect errors in cmdline

View File

@ -704,9 +704,9 @@ install-tests: all install-gtk
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
install-bin: install-tools install-tests
install-bin: install-tools install-tests install-traceevent-plugins
install: install-bin try-install-man install-traceevent-plugins
install: install-bin try-install-man
install-python_ext:
$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'

View File

@ -1405,7 +1405,7 @@ static bool dry_run;
* perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
* using pipes, etc.
*/
struct option __record_options[] = {
static struct option __record_options[] = {
OPT_CALLBACK('e', "event", &record.evlist, "event",
"event selector. use 'perf list' to list available events",
parse_events_option),
@ -1636,7 +1636,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
* overhead. Still generate buildid if they are required
* explicitly using
*
* perf record --signal-trigger --no-no-buildid \
* perf record --switch-output --no-no-buildid \
* --no-no-buildid-cache
*
* Following code equals to:

View File

@ -209,6 +209,7 @@ struct perf_sched {
u64 skipped_samples;
const char *time_str;
struct perf_time_interval ptime;
struct perf_time_interval hist_time;
};
/* per thread run time data */
@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool *tool,
timehist_print_sample(sched, sample, &al, thread, t);
out:
if (sched->hist_time.start == 0 && t >= ptime->start)
sched->hist_time.start = t;
if (ptime->end == 0 || t <= ptime->end)
sched->hist_time.end = t;
if (tr) {
/* time of this sched_switch event becomes last time task seen */
tr->last_time = sample->time;
@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched *sched,
struct thread *t;
struct thread_runtime *r;
int i;
u64 hist_time = sched->hist_time.end - sched->hist_time.start;
memset(&totals, 0, sizeof(totals));
@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched *sched,
totals.sched_count += r->run_stats.n;
printf(" CPU %2d idle for ", i);
print_sched_time(r->total_run_time, 6);
printf(" msec\n");
printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time);
} else
printf(" CPU %2d idle entire time window\n", i);
}
@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched *sched,
printf("\n"
" Total number of unique tasks: %" PRIu64 "\n"
"Total number of context switches: %" PRIu64 "\n"
" Total run time (msec): ",
"Total number of context switches: %" PRIu64 "\n",
totals.task_count, totals.sched_count);
printf(" Total run time (msec): ");
print_sched_time(totals.total_run_time, 2);
printf("\n");
printf(" Total scheduling time (msec): ");
print_sched_time(hist_time, 2);
printf(" (x %d)\n", sched->max_cpu);
}
typedef int (*sched_handler)(struct perf_tool *tool,

View File

@ -163,7 +163,7 @@ static struct map *kernel_get_module_map(const char *module)
/* A file path -- this is an offline module */
if (module && strchr(module, '/'))
return machine__findnew_module_map(host_machine, 0, module);
return dso__new_map(module);
if (!module)
module = "kernel";
@ -173,6 +173,7 @@ static struct map *kernel_get_module_map(const char *module)
if (strncmp(pos->dso->short_name + 1, module,
pos->dso->short_name_len - 2) == 0 &&
module[pos->dso->short_name_len - 2] == '\0') {
map__get(pos);
return pos;
}
}
@ -188,15 +189,6 @@ struct map *get_target_map(const char *target, bool user)
return kernel_get_module_map(target);
}
static void put_target_map(struct map *map, bool user)
{
if (map && user) {
/* Only the user map needs to be released */
map__put(map);
}
}
static int convert_exec_to_group(const char *exec, char **result)
{
char *ptr1, *ptr2, *exec_copy;
@ -267,21 +259,6 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
return true;
}
/*
* NOTE:
* '.gnu.linkonce.this_module' section of kernel module elf directly
* maps to 'struct module' from linux/module.h. This section contains
* actual module name which will be used by kernel after loading it.
* But, we cannot use 'struct module' here since linux/module.h is not
* exposed to user-space. Offset of 'name' has remained same from long
* time, so hardcoding it here.
*/
#ifdef __LP64__
#define MOD_NAME_OFFSET 24
#else
#define MOD_NAME_OFFSET 12
#endif
/*
* @module can be module name of module file path. In case of path,
* inspect elf and find out what is actual module name.
@ -296,6 +273,7 @@ static char *find_module_name(const char *module)
Elf_Data *data;
Elf_Scn *sec;
char *mod_name = NULL;
int name_offset;
fd = open(module, O_RDONLY);
if (fd < 0)
@ -317,7 +295,21 @@ static char *find_module_name(const char *module)
if (!data || !data->d_buf)
goto ret_err;
mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET);
/*
* NOTE:
* '.gnu.linkonce.this_module' section of kernel module elf directly
* maps to 'struct module' from linux/module.h. This section contains
* actual module name which will be used by kernel after loading it.
* But, we cannot use 'struct module' here since linux/module.h is not
* exposed to user-space. Offset of 'name' has remained same from long
* time, so hardcoding it here.
*/
if (ehdr.e_ident[EI_CLASS] == ELFCLASS32)
name_offset = 12;
else /* expect ELFCLASS64 by default */
name_offset = 24;
mod_name = strdup((char *)data->d_buf + name_offset);
ret_err:
elf_end(elf);
@ -412,7 +404,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
}
out:
put_target_map(map, uprobes);
map__put(map);
return ret;
}
@ -618,6 +610,51 @@ error:
return ret ? : -ENOENT;
}
/*
* Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions
* and generate new symbols with suffixes such as .constprop.N or .isra.N
* etc. Since those symbols are not recorded in DWARF, we have to find
* correct generated symbols from offline ELF binary.
* For online kernel or uprobes we don't need this because those are
* rebased on _text, or already a section relative address.
*/
static int
post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
int ntevs, const char *pathname)
{
struct symbol *sym;
struct map *map;
unsigned long stext = 0;
u64 addr;
int i;
/* Prepare a map for offline binary */
map = dso__new_map(pathname);
if (!map || get_text_start_address(pathname, &stext) < 0) {
pr_warning("Failed to get ELF symbols for %s\n", pathname);
return -EINVAL;
}
for (i = 0; i < ntevs; i++) {
addr = tevs[i].point.address + tevs[i].point.offset - stext;
sym = map__find_symbol(map, addr);
if (!sym)
continue;
if (!strcmp(sym->name, tevs[i].point.symbol))
continue;
/* If we have no realname, use symbol for it */
if (!tevs[i].point.realname)
tevs[i].point.realname = tevs[i].point.symbol;
else
free(tevs[i].point.symbol);
tevs[i].point.symbol = strdup(sym->name);
tevs[i].point.offset = addr - sym->start;
}
map__put(map);
return 0;
}
static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
int ntevs, const char *exec)
{
@ -679,7 +716,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
/* Skip post process if the target is an offline kernel */
if (symbol_conf.ignore_vmlinux_buildid)
return 0;
return post_process_offline_probe_trace_events(tevs, ntevs,
symbol_conf.vmlinux_name);
reloc_sym = kernel_get_ref_reloc_sym();
if (!reloc_sym) {
@ -2869,7 +2907,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
}
out:
put_target_map(map, pev->uprobes);
map__put(map);
free(syms);
return ret;
@ -3362,10 +3400,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
return ret;
/* Get a symbol map */
if (user)
map = dso__new_map(target);
else
map = kernel_get_module_map(target);
map = get_target_map(target, user);
if (!map) {
pr_err("Failed to get a map for %s\n", (target) ? : "kernel");
return -EINVAL;
@ -3397,9 +3432,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
}
end:
if (user) {
map__put(map);
}
map__put(map);
exit_probe_symbol_maps();
return ret;

View File

@ -537,6 +537,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
break;
} else {
int n = namesz + descsz;
if (n > (int)sizeof(bf)) {
n = sizeof(bf);
pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n",
__func__, filename, nhdr.n_namesz, nhdr.n_descsz);
}
if (read(fd, bf, n) != n)
break;
}