tools/perf: Correctly calculate sample period for inherited SAMPLE_READ values

Sample period calculation in deliver_sample_value is updated to
calculate the per-thread period delta for events that are inherit +
PERF_SAMPLE_READ. When the sampling event has this configuration, the
read_format.id is used with the tid from the sample to lookup the
storage of the previously accumulated counter total before calculating
the delta. All existing valid configurations where read_format.value
represents some global value continue to use just the read_format.id to
locate the storage of the previously accumulated total.

perf_sample_id is modified to support tracking per-thread
values, along with the existing global per-id values. In the
per-thread case, values are stored in a hash by tid within the
perf_sample_id, and are dynamically allocated as the number is not known
ahead of time.

Signed-off-by: Ben Gainey <ben.gainey@arm.com>
Cc: james.clark@arm.com
Link: https://lore.kernel.org/r/20241001121505.1009685-2-ben.gainey@arm.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
This commit is contained in:
Ben Gainey 2024-10-01 13:15:04 +01:00 committed by Namhyung Kim
parent ad321b19d2
commit 80c281fca2
3 changed files with 129 additions and 12 deletions

View File

@ -5,6 +5,7 @@
#include <perf/evsel.h>
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <linux/hash.h>
#include <linux/list.h>
#include <internal/evsel.h>
#include <linux/zalloc.h>
@ -23,6 +24,7 @@ void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr,
int idx)
{
INIT_LIST_HEAD(&evsel->node);
INIT_LIST_HEAD(&evsel->per_stream_periods);
evsel->attr = *attr;
evsel->idx = idx;
evsel->leader = evsel;
@ -531,10 +533,56 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
void perf_evsel__free_id(struct perf_evsel *evsel)
{
struct perf_sample_id_period *pos, *n;
xyarray__delete(evsel->sample_id);
evsel->sample_id = NULL;
zfree(&evsel->id);
evsel->ids = 0;
perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) {
list_del_init(&pos->node);
free(pos);
}
}
bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel)
{
return (evsel->attr.sample_type & PERF_SAMPLE_READ) &&
(evsel->attr.sample_type & PERF_SAMPLE_TID) &&
evsel->attr.inherit;
}
u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, bool per_thread)
{
struct hlist_head *head;
struct perf_sample_id_period *res;
int hash;
if (!per_thread)
return &sid->period;
hash = hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS);
head = &sid->periods[hash];
hlist_for_each_entry(res, head, hnode)
if (res->tid == tid)
return &res->period;
if (sid->evsel == NULL)
return NULL;
res = zalloc(sizeof(struct perf_sample_id_period));
if (res == NULL)
return NULL;
INIT_LIST_HEAD(&res->node);
res->tid = tid;
list_add_tail(&res->node, &sid->evsel->per_stream_periods);
hlist_add_head(&res->hnode, &sid->periods[hash]);
return &res->period;
}
void perf_counts_values__scale(struct perf_counts_values *count,

View File

@ -11,6 +11,32 @@
struct perf_thread_map;
struct xyarray;
/**
* The per-thread accumulated period storage node.
*/
struct perf_sample_id_period {
struct list_head node;
struct hlist_node hnode;
/* Holds total ID period value for PERF_SAMPLE_READ processing. */
u64 period;
/* The TID that the values belongs to */
u32 tid;
};
/**
* perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the
* per_stream_periods
* @evlist:perf_evsel instance to iterate
* @item: struct perf_sample_id_period iterator
* @tmp: struct perf_sample_id_period temp iterator
*/
#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \
list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node)
#define PERF_SAMPLE_ID__HLIST_BITS 4
#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS)
/*
* Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
* more than one entry in the evlist.
@ -34,8 +60,32 @@ struct perf_sample_id {
pid_t machine_pid;
struct perf_cpu vcpu;
/* Holds total ID period value for PERF_SAMPLE_READ processing. */
u64 period;
/*
* Per-thread, and global event counts are mutually exclusive:
* Whilst it is possible to combine events into a group with differing
* values of PERF_SAMPLE_READ, it is not valid to have inconsistent
* values for `inherit`. Therefore it is not possible to have a
* situation where a per-thread event is sampled as a global event;
* all !inherit groups are global, and all groups where the sampling
* event is inherit + PERF_SAMPLE_READ will be per-thread. Any event
* that is part of such a group that is inherit but not PERF_SAMPLE_READ
* will be read as per-thread. If such an event can also trigger a
* sample (such as with sample_period > 0) then it will not cause
* `read_format` to be included in its PERF_RECORD_SAMPLE, and
* therefore will not expose the per-thread group members as global.
*/
union {
/*
* Holds total ID period value for PERF_SAMPLE_READ processing
* (when period is not per-thread).
*/
u64 period;
/*
* Holds total ID period value for PERF_SAMPLE_READ processing
* (when period is per-thread).
*/
struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE];
};
};
struct perf_evsel {
@ -58,6 +108,10 @@ struct perf_evsel {
u32 ids;
struct perf_evsel *leader;
/* For events where the read_format value is per-thread rather than
* global, stores the per-thread cumulative period */
struct list_head per_stream_periods;
/* parse modifier helper */
int nr_members;
/*
@ -88,4 +142,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter);
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
void perf_evsel__free_id(struct perf_evsel *evsel);
bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel);
u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid,
bool per_thread);
#endif /* __LIBPERF_INTERNAL_EVSEL_H */

View File

@ -1171,18 +1171,24 @@ static int deliver_sample_value(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample,
struct sample_read_value *v,
struct machine *machine)
struct machine *machine,
bool per_thread)
{
struct perf_sample_id *sid = evlist__id2sid(evlist, v->id);
struct evsel *evsel;
u64 *storage = NULL;
if (sid) {
sample->id = v->id;
sample->period = v->value - sid->period;
sid->period = v->value;
storage = perf_sample_id__get_period_storage(sid, sample->tid, per_thread);
}
if (!sid || sid->evsel == NULL) {
if (storage) {
sample->id = v->id;
sample->period = v->value - *storage;
*storage = v->value;
}
if (!storage || sid->evsel == NULL) {
++evlist->stats.nr_unknown_id;
return 0;
}
@ -1203,17 +1209,19 @@ static int deliver_sample_group(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine,
u64 read_format)
u64 read_format,
bool per_thread)
{
int ret = -EINVAL;
struct sample_read_value *v = sample->read.group.values;
if (tool->dont_split_sample_group)
return deliver_sample_value(evlist, tool, event, sample, v, machine);
return deliver_sample_value(evlist, tool, event, sample, v, machine,
per_thread);
sample_read_group__for_each(v, sample->read.group.nr, read_format) {
ret = deliver_sample_value(evlist, tool, event, sample, v,
machine);
machine, per_thread);
if (ret)
break;
}
@ -1228,6 +1236,7 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool
/* We know evsel != NULL. */
u64 sample_type = evsel->core.attr.sample_type;
u64 read_format = evsel->core.attr.read_format;
bool per_thread = perf_evsel__attr_has_per_thread_sample_period(&evsel->core);
/* Standard sample delivery. */
if (!(sample_type & PERF_SAMPLE_READ))
@ -1236,10 +1245,11 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool
/* For PERF_SAMPLE_READ we have either single or group mode. */
if (read_format & PERF_FORMAT_GROUP)
return deliver_sample_group(evlist, tool, event, sample,
machine, read_format);
machine, read_format, per_thread);
else
return deliver_sample_value(evlist, tool, event, sample,
&sample->read.one, machine);
&sample->read.one, machine,
per_thread);
}
static int machines__deliver_event(struct machines *machines,