perf_events: Fix races in group composition

Group siblings don't pin each-other or the parent, so when we destroy
events we must make sure to clean up all cross referencing pointers.

In particular, for destruction of a group leader we must be able to
find all its siblings and remove their reference to it.

This means that detaching an event from its context must not detach it
from the group, otherwise we can end up failing to clear all pointers.

Solve this by clearly separating the attachment to a context and
attachment to a group, and keep the group composed until we destroy
the events.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Peter Zijlstra 2010-05-27 15:47:49 +02:00 committed by Ingo Molnar
parent ac9721f3f5
commit 8a49542c05
2 changed files with 71 additions and 24 deletions

View File

@ -631,6 +631,9 @@ struct swevent_hlist {
struct rcu_head rcu_head;
};
#define PERF_ATTACH_CONTEXT 0x01
#define PERF_ATTACH_GROUP 0x02
/**
* struct perf_event - performance event kernel representation:
*/
@ -646,6 +649,7 @@ struct perf_event {
const struct pmu *pmu;
enum perf_event_active_state state;
unsigned int attach_state;
atomic64_t count;
/*

View File

@ -283,14 +283,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
static void
list_add_event(struct perf_event *event, struct perf_event_context *ctx)
{
struct perf_event *group_leader = event->group_leader;
WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
event->attach_state |= PERF_ATTACH_CONTEXT;
/*
* Depending on whether it is a standalone or sibling event,
* add it straight to the context's event list, or to the group
* leader's sibling list:
* If we're a stand alone event or group leader, we go to the context
* list, group events are kept attached to the group so that
* perf_group_detach can, at all times, locate all siblings.
*/
if (group_leader == event) {
if (event->group_leader == event) {
struct list_head *list;
if (is_software_event(event))
@ -298,13 +299,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
list = ctx_group_list(event, ctx);
list_add_tail(&event->group_entry, list);
} else {
if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
!is_software_event(event))
group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
list_add_tail(&event->group_entry, &group_leader->sibling_list);
group_leader->nr_siblings++;
}
list_add_rcu(&event->event_entry, &ctx->event_list);
@ -313,6 +307,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
ctx->nr_stat++;
}
static void perf_group_attach(struct perf_event *event)
{
struct perf_event *group_leader = event->group_leader;
WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP);
event->attach_state |= PERF_ATTACH_GROUP;
if (group_leader == event)
return;
if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
!is_software_event(event))
group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
list_add_tail(&event->group_entry, &group_leader->sibling_list);
group_leader->nr_siblings++;
}
/*
* Remove a event from the lists for its context.
* Must be called with ctx->mutex and ctx->lock held.
@ -320,17 +332,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
static void
list_del_event(struct perf_event *event, struct perf_event_context *ctx)
{
if (list_empty(&event->group_entry))
/*
* We can have double detach due to exit/hot-unplug + close.
*/
if (!(event->attach_state & PERF_ATTACH_CONTEXT))
return;
event->attach_state &= ~PERF_ATTACH_CONTEXT;
ctx->nr_events--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
list_del_init(&event->group_entry);
list_del_rcu(&event->event_entry);
if (event->group_leader != event)
event->group_leader->nr_siblings--;
if (event->group_leader == event)
list_del_init(&event->group_entry);
update_group_times(event);
@ -345,21 +362,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
event->state = PERF_EVENT_STATE_OFF;
}
static void
perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx)
static void perf_group_detach(struct perf_event *event)
{
struct perf_event *sibling, *tmp;
struct list_head *list = NULL;
/*
* We can have double detach due to exit/hot-unplug + close.
*/
if (!(event->attach_state & PERF_ATTACH_GROUP))
return;
event->attach_state &= ~PERF_ATTACH_GROUP;
/*
* If this is a sibling, remove it from its group.
*/
if (event->group_leader != event) {
list_del_init(&event->group_entry);
event->group_leader->nr_siblings--;
return;
}
if (!list_empty(&event->group_entry))
list = &event->group_entry;
/*
* If this was a group event with sibling events then
* upgrade the siblings to singleton events by adding them
* to the context list directly:
* to whatever list we are on.
*/
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
struct list_head *list;
list = ctx_group_list(event, ctx);
list_move_tail(&sibling->group_entry, list);
if (list)
list_move_tail(&sibling->group_entry, list);
sibling->group_leader = sibling;
/* Inherit group flags from the previous leader */
@ -727,6 +762,7 @@ static void add_event_to_ctx(struct perf_event *event,
struct perf_event_context *ctx)
{
list_add_event(event, ctx);
perf_group_attach(event);
event->tstamp_enabled = ctx->time;
event->tstamp_running = ctx->time;
event->tstamp_stopped = ctx->time;
@ -1894,8 +1930,8 @@ int perf_event_release_kernel(struct perf_event *event)
*/
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
raw_spin_lock_irq(&ctx->lock);
perf_group_detach(event);
list_del_event(event, ctx);
perf_destroy_group(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
mutex_unlock(&ctx->mutex);
@ -5127,6 +5163,12 @@ SYSCALL_DEFINE5(perf_event_open,
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
/*
* Drop the reference on the group_event after placing the
* new event on the sibling_list. This ensures destruction
* of the group leader will find the pointer to itself in
* perf_group_detach().
*/
fput_light(group_file, fput_needed);
fd_install(event_fd, event_file);
return event_fd;
@ -5448,6 +5490,7 @@ static void perf_free_event(struct perf_event *event,
fput(parent->filp);
perf_group_detach(event);
list_del_event(event, ctx);
free_event(event);
}