mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-06 05:02:31 +00:00
perf: Optimize context reschedule for single PMU cases
Currently re-scheduling a context will reschedule all active PMUs for that context, even if it is known only a single event is added. Namhyung reported that changing this to only reschedule the affected PMU when possible provides significant performance gains under certain conditions. Therefore, allow partial context reschedules for a specific PMU, that of the event modified. While the patch looks somewhat noisy, it mostly just propagates a new @pmu argument through the callchain and modifies the epc loop to only pick the 'epc->pmu == @pmu' case. Reported-by: Namhyung Kim <namhyung@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Kan Liang <kan.liang@linux.intel.com> Reviewed-by: Namhyung Kim <namhyung@kernel.org> Link: https://lore.kernel.org/r/20240807115549.920950699@infradead.org
This commit is contained in:
parent
ea1992f36b
commit
2d17cf1abc
@ -685,30 +685,32 @@ do { \
|
||||
___p; \
|
||||
})
|
||||
|
||||
#define for_each_epc(_epc, _ctx, _pmu, _cgroup) \
|
||||
list_for_each_entry(_epc, &((_ctx)->pmu_ctx_list), pmu_ctx_entry) \
|
||||
if (_cgroup && !_epc->nr_cgroups) \
|
||||
continue; \
|
||||
else if (_pmu && _epc->pmu != _pmu) \
|
||||
continue; \
|
||||
else
|
||||
|
||||
static void perf_ctx_disable(struct perf_event_context *ctx, bool cgroup)
|
||||
{
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
if (cgroup && !pmu_ctx->nr_cgroups)
|
||||
continue;
|
||||
for_each_epc(pmu_ctx, ctx, NULL, cgroup)
|
||||
perf_pmu_disable(pmu_ctx->pmu);
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_ctx_enable(struct perf_event_context *ctx, bool cgroup)
|
||||
{
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
if (cgroup && !pmu_ctx->nr_cgroups)
|
||||
continue;
|
||||
for_each_epc(pmu_ctx, ctx, NULL, cgroup)
|
||||
perf_pmu_enable(pmu_ctx->pmu);
|
||||
}
|
||||
}
|
||||
|
||||
static void ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type);
|
||||
static void ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type);
|
||||
static void ctx_sched_out(struct perf_event_context *ctx, struct pmu *pmu, enum event_type_t event_type);
|
||||
static void ctx_sched_in(struct perf_event_context *ctx, struct pmu *pmu, enum event_type_t event_type);
|
||||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
|
||||
@ -865,7 +867,7 @@ static void perf_cgroup_switch(struct task_struct *task)
|
||||
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
|
||||
perf_ctx_disable(&cpuctx->ctx, true);
|
||||
|
||||
ctx_sched_out(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP);
|
||||
ctx_sched_out(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP);
|
||||
/*
|
||||
* must not be done before ctxswout due
|
||||
* to update_cgrp_time_from_cpuctx() in
|
||||
@ -877,7 +879,7 @@ static void perf_cgroup_switch(struct task_struct *task)
|
||||
* perf_cgroup_set_timestamp() in ctx_sched_in()
|
||||
* to not have to pass task around
|
||||
*/
|
||||
ctx_sched_in(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP);
|
||||
ctx_sched_in(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP);
|
||||
|
||||
perf_ctx_enable(&cpuctx->ctx, true);
|
||||
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
||||
@ -2656,7 +2658,8 @@ static void add_event_to_ctx(struct perf_event *event,
|
||||
}
|
||||
|
||||
static void task_ctx_sched_out(struct perf_event_context *ctx,
|
||||
enum event_type_t event_type)
|
||||
struct pmu *pmu,
|
||||
enum event_type_t event_type)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
|
||||
|
||||
@ -2666,18 +2669,19 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,
|
||||
if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
|
||||
return;
|
||||
|
||||
ctx_sched_out(ctx, event_type);
|
||||
ctx_sched_out(ctx, pmu, event_type);
|
||||
}
|
||||
|
||||
static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx)
|
||||
struct perf_event_context *ctx,
|
||||
struct pmu *pmu)
|
||||
{
|
||||
ctx_sched_in(&cpuctx->ctx, EVENT_PINNED);
|
||||
ctx_sched_in(&cpuctx->ctx, pmu, EVENT_PINNED);
|
||||
if (ctx)
|
||||
ctx_sched_in(ctx, EVENT_PINNED);
|
||||
ctx_sched_in(&cpuctx->ctx, EVENT_FLEXIBLE);
|
||||
ctx_sched_in(ctx, pmu, EVENT_PINNED);
|
||||
ctx_sched_in(&cpuctx->ctx, pmu, EVENT_FLEXIBLE);
|
||||
if (ctx)
|
||||
ctx_sched_in(ctx, EVENT_FLEXIBLE);
|
||||
ctx_sched_in(ctx, pmu, EVENT_FLEXIBLE);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2695,16 +2699,12 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
|
||||
* event_type is a bit mask of the types of events involved. For CPU events,
|
||||
* event_type is only either EVENT_PINNED or EVENT_FLEXIBLE.
|
||||
*/
|
||||
/*
|
||||
* XXX: ctx_resched() reschedule entire perf_event_context while adding new
|
||||
* event to the context or enabling existing event in the context. We can
|
||||
* probably optimize it by rescheduling only affected pmu_ctx.
|
||||
*/
|
||||
static void ctx_resched(struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *task_ctx,
|
||||
enum event_type_t event_type)
|
||||
struct pmu *pmu, enum event_type_t event_type)
|
||||
{
|
||||
bool cpu_event = !!(event_type & EVENT_CPU);
|
||||
struct perf_event_pmu_context *epc;
|
||||
|
||||
/*
|
||||
* If pinned groups are involved, flexible groups also need to be
|
||||
@ -2715,10 +2715,14 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
|
||||
|
||||
event_type &= EVENT_ALL;
|
||||
|
||||
perf_ctx_disable(&cpuctx->ctx, false);
|
||||
for_each_epc(epc, &cpuctx->ctx, pmu, false)
|
||||
perf_pmu_disable(epc->pmu);
|
||||
|
||||
if (task_ctx) {
|
||||
perf_ctx_disable(task_ctx, false);
|
||||
task_ctx_sched_out(task_ctx, event_type);
|
||||
for_each_epc(epc, task_ctx, pmu, false)
|
||||
perf_pmu_disable(epc->pmu);
|
||||
|
||||
task_ctx_sched_out(task_ctx, pmu, event_type);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2729,15 +2733,19 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
|
||||
* - otherwise, do nothing more.
|
||||
*/
|
||||
if (cpu_event)
|
||||
ctx_sched_out(&cpuctx->ctx, event_type);
|
||||
ctx_sched_out(&cpuctx->ctx, pmu, event_type);
|
||||
else if (event_type & EVENT_PINNED)
|
||||
ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE);
|
||||
ctx_sched_out(&cpuctx->ctx, pmu, EVENT_FLEXIBLE);
|
||||
|
||||
perf_event_sched_in(cpuctx, task_ctx);
|
||||
perf_event_sched_in(cpuctx, task_ctx, pmu);
|
||||
|
||||
perf_ctx_enable(&cpuctx->ctx, false);
|
||||
if (task_ctx)
|
||||
perf_ctx_enable(task_ctx, false);
|
||||
for_each_epc(epc, &cpuctx->ctx, pmu, false)
|
||||
perf_pmu_enable(epc->pmu);
|
||||
|
||||
if (task_ctx) {
|
||||
for_each_epc(epc, task_ctx, pmu, false)
|
||||
perf_pmu_enable(epc->pmu);
|
||||
}
|
||||
}
|
||||
|
||||
void perf_pmu_resched(struct pmu *pmu)
|
||||
@ -2746,7 +2754,7 @@ void perf_pmu_resched(struct pmu *pmu)
|
||||
struct perf_event_context *task_ctx = cpuctx->task_ctx;
|
||||
|
||||
perf_ctx_lock(cpuctx, task_ctx);
|
||||
ctx_resched(cpuctx, task_ctx, EVENT_ALL|EVENT_CPU);
|
||||
ctx_resched(cpuctx, task_ctx, pmu, EVENT_ALL|EVENT_CPU);
|
||||
perf_ctx_unlock(cpuctx, task_ctx);
|
||||
}
|
||||
|
||||
@ -2802,9 +2810,10 @@ static int __perf_install_in_context(void *info)
|
||||
#endif
|
||||
|
||||
if (reprogram) {
|
||||
ctx_sched_out(ctx, EVENT_TIME);
|
||||
ctx_sched_out(ctx, NULL, EVENT_TIME);
|
||||
add_event_to_ctx(event, ctx);
|
||||
ctx_resched(cpuctx, task_ctx, get_event_type(event));
|
||||
ctx_resched(cpuctx, task_ctx, event->pmu_ctx->pmu,
|
||||
get_event_type(event));
|
||||
} else {
|
||||
add_event_to_ctx(event, ctx);
|
||||
}
|
||||
@ -2948,7 +2957,7 @@ static void __perf_event_enable(struct perf_event *event,
|
||||
return;
|
||||
|
||||
if (ctx->is_active)
|
||||
ctx_sched_out(ctx, EVENT_TIME);
|
||||
ctx_sched_out(ctx, NULL, EVENT_TIME);
|
||||
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
|
||||
perf_cgroup_event_enable(event, ctx);
|
||||
@ -2957,7 +2966,7 @@ static void __perf_event_enable(struct perf_event *event,
|
||||
return;
|
||||
|
||||
if (!event_filter_match(event)) {
|
||||
ctx_sched_in(ctx, EVENT_TIME);
|
||||
ctx_sched_in(ctx, NULL, EVENT_TIME);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2966,7 +2975,7 @@ static void __perf_event_enable(struct perf_event *event,
|
||||
* then don't put it on unless the group is on.
|
||||
*/
|
||||
if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
|
||||
ctx_sched_in(ctx, EVENT_TIME);
|
||||
ctx_sched_in(ctx, NULL, EVENT_TIME);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2974,7 +2983,7 @@ static void __perf_event_enable(struct perf_event *event,
|
||||
if (ctx->task)
|
||||
WARN_ON_ONCE(task_ctx != ctx);
|
||||
|
||||
ctx_resched(cpuctx, task_ctx, get_event_type(event));
|
||||
ctx_resched(cpuctx, task_ctx, event->pmu_ctx->pmu, get_event_type(event));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3276,8 +3285,17 @@ static void __pmu_ctx_sched_out(struct perf_event_pmu_context *pmu_ctx,
|
||||
perf_pmu_enable(pmu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Be very careful with the @pmu argument since this will change ctx state.
|
||||
* The @pmu argument works for ctx_resched(), because that is symmetric in
|
||||
* ctx_sched_out() / ctx_sched_in() usage and the ctx state ends up invariant.
|
||||
*
|
||||
* However, if you were to be asymmetrical, you could end up with messed up
|
||||
* state, eg. ctx->is_active cleared even though most EPCs would still actually
|
||||
* be active.
|
||||
*/
|
||||
static void
|
||||
ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
|
||||
ctx_sched_out(struct perf_event_context *ctx, struct pmu *pmu, enum event_type_t event_type)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
@ -3331,11 +3349,8 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
|
||||
|
||||
is_active ^= ctx->is_active; /* changed bits */
|
||||
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
if (cgroup && !pmu_ctx->nr_cgroups)
|
||||
continue;
|
||||
for_each_epc(pmu_ctx, ctx, pmu, cgroup)
|
||||
__pmu_ctx_sched_out(pmu_ctx, is_active);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3579,7 +3594,7 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
|
||||
|
||||
inside_switch:
|
||||
perf_ctx_sched_task_cb(ctx, false);
|
||||
task_ctx_sched_out(ctx, EVENT_ALL);
|
||||
task_ctx_sched_out(ctx, NULL, EVENT_ALL);
|
||||
|
||||
perf_ctx_enable(ctx, false);
|
||||
raw_spin_unlock(&ctx->lock);
|
||||
@ -3877,29 +3892,22 @@ static void pmu_groups_sched_in(struct perf_event_context *ctx,
|
||||
merge_sched_in, &can_add_hw);
|
||||
}
|
||||
|
||||
static void ctx_groups_sched_in(struct perf_event_context *ctx,
|
||||
struct perf_event_groups *groups,
|
||||
bool cgroup)
|
||||
static void __pmu_ctx_sched_in(struct perf_event_pmu_context *pmu_ctx,
|
||||
enum event_type_t event_type)
|
||||
{
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
struct perf_event_context *ctx = pmu_ctx->ctx;
|
||||
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
if (cgroup && !pmu_ctx->nr_cgroups)
|
||||
continue;
|
||||
pmu_groups_sched_in(ctx, groups, pmu_ctx->pmu);
|
||||
}
|
||||
}
|
||||
|
||||
static void __pmu_ctx_sched_in(struct perf_event_context *ctx,
|
||||
struct pmu *pmu)
|
||||
{
|
||||
pmu_groups_sched_in(ctx, &ctx->flexible_groups, pmu);
|
||||
if (event_type & EVENT_PINNED)
|
||||
pmu_groups_sched_in(ctx, &ctx->pinned_groups, pmu_ctx->pmu);
|
||||
if (event_type & EVENT_FLEXIBLE)
|
||||
pmu_groups_sched_in(ctx, &ctx->flexible_groups, pmu_ctx->pmu);
|
||||
}
|
||||
|
||||
static void
|
||||
ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
|
||||
ctx_sched_in(struct perf_event_context *ctx, struct pmu *pmu, enum event_type_t event_type)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
int is_active = ctx->is_active;
|
||||
bool cgroup = event_type & EVENT_CGROUP;
|
||||
|
||||
@ -3935,12 +3943,16 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
|
||||
* First go through the list and put on any pinned groups
|
||||
* in order to give them the best chance of going on.
|
||||
*/
|
||||
if (is_active & EVENT_PINNED)
|
||||
ctx_groups_sched_in(ctx, &ctx->pinned_groups, cgroup);
|
||||
if (is_active & EVENT_PINNED) {
|
||||
for_each_epc(pmu_ctx, ctx, pmu, cgroup)
|
||||
__pmu_ctx_sched_in(pmu_ctx, EVENT_PINNED);
|
||||
}
|
||||
|
||||
/* Then walk through the lower prio flexible groups */
|
||||
if (is_active & EVENT_FLEXIBLE)
|
||||
ctx_groups_sched_in(ctx, &ctx->flexible_groups, cgroup);
|
||||
if (is_active & EVENT_FLEXIBLE) {
|
||||
for_each_epc(pmu_ctx, ctx, pmu, cgroup)
|
||||
__pmu_ctx_sched_in(pmu_ctx, EVENT_FLEXIBLE);
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_event_context_sched_in(struct task_struct *task)
|
||||
@ -3983,10 +3995,10 @@ static void perf_event_context_sched_in(struct task_struct *task)
|
||||
*/
|
||||
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) {
|
||||
perf_ctx_disable(&cpuctx->ctx, false);
|
||||
ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE);
|
||||
ctx_sched_out(&cpuctx->ctx, NULL, EVENT_FLEXIBLE);
|
||||
}
|
||||
|
||||
perf_event_sched_in(cpuctx, ctx);
|
||||
perf_event_sched_in(cpuctx, ctx, NULL);
|
||||
|
||||
perf_ctx_sched_task_cb(cpuctx->task_ctx, true);
|
||||
|
||||
@ -4327,14 +4339,14 @@ static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc)
|
||||
update_context_time(&cpuctx->ctx);
|
||||
__pmu_ctx_sched_out(cpu_epc, EVENT_FLEXIBLE);
|
||||
rotate_ctx(&cpuctx->ctx, cpu_event);
|
||||
__pmu_ctx_sched_in(&cpuctx->ctx, pmu);
|
||||
__pmu_ctx_sched_in(cpu_epc, EVENT_FLEXIBLE);
|
||||
}
|
||||
|
||||
if (task_event)
|
||||
rotate_ctx(task_epc->ctx, task_event);
|
||||
|
||||
if (task_event || (task_epc && cpu_event))
|
||||
__pmu_ctx_sched_in(task_epc->ctx, pmu);
|
||||
__pmu_ctx_sched_in(task_epc, EVENT_FLEXIBLE);
|
||||
|
||||
perf_pmu_enable(pmu);
|
||||
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
||||
@ -4400,7 +4412,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
|
||||
|
||||
cpuctx = this_cpu_ptr(&perf_cpu_context);
|
||||
perf_ctx_lock(cpuctx, ctx);
|
||||
ctx_sched_out(ctx, EVENT_TIME);
|
||||
ctx_sched_out(ctx, NULL, EVENT_TIME);
|
||||
|
||||
list_for_each_entry(event, &ctx->event_list, event_entry) {
|
||||
enabled |= event_enable_on_exec(event, ctx);
|
||||
@ -4412,9 +4424,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
|
||||
*/
|
||||
if (enabled) {
|
||||
clone_ctx = unclone_ctx(ctx);
|
||||
ctx_resched(cpuctx, ctx, event_type);
|
||||
ctx_resched(cpuctx, ctx, NULL, event_type);
|
||||
} else {
|
||||
ctx_sched_in(ctx, EVENT_TIME);
|
||||
ctx_sched_in(ctx, NULL, EVENT_TIME);
|
||||
}
|
||||
perf_ctx_unlock(cpuctx, ctx);
|
||||
|
||||
@ -13202,7 +13214,7 @@ static void perf_event_exit_task_context(struct task_struct *child)
|
||||
* in.
|
||||
*/
|
||||
raw_spin_lock_irq(&child_ctx->lock);
|
||||
task_ctx_sched_out(child_ctx, EVENT_ALL);
|
||||
task_ctx_sched_out(child_ctx, NULL, EVENT_ALL);
|
||||
|
||||
/*
|
||||
* Now that the context is inactive, destroy the task <-> ctx relation
|
||||
@ -13751,7 +13763,7 @@ static void __perf_event_exit_context(void *__info)
|
||||
struct perf_event *event;
|
||||
|
||||
raw_spin_lock(&ctx->lock);
|
||||
ctx_sched_out(ctx, EVENT_TIME);
|
||||
ctx_sched_out(ctx, NULL, EVENT_TIME);
|
||||
list_for_each_entry(event, &ctx->event_list, event_entry)
|
||||
__perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
|
||||
raw_spin_unlock(&ctx->lock);
|
||||
|
Loading…
Reference in New Issue
Block a user