mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-17 22:05:08 +00:00
30f29bae91
The tool pointer (to a struct largely of function pointers) is passed around but is unchanged except at initialization. Change parameter and variable types to be const to lower the possibilities of what could happen with a tool. Reviewed-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ian Rogers <irogers@google.com> Tested-by: Adrian Hunter <adrian.hunter@intel.com> Tested-by: Leo Yan <leo.yan@arm.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ilkka Koskinen <ilkka@os.amperecomputing.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.g.garry@oracle.com> Cc: Jonathan Cameron <jonathan.cameron@huawei.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@linux.dev> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mike Leach <mike.leach@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Nick Desaulniers <ndesaulniers@google.com> Cc: Nick Terrell <terrelln@fb.com> Cc: Oliver Upton <oliver.upton@linux.dev> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: Sun Haiyong <sunhaiyong@loongson.cn> Cc: Suzuki Poulouse <suzuki.poulose@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Yanteng Si <siyanteng@loongson.cn> Cc: Yicong Yang <yangyicong@hisilicon.com> Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20240812204720.631678-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
263 lines
6.8 KiB
C
263 lines
6.8 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Benchmark synthesis of perf events such as at the start of a 'perf
|
|
* record'. Synthesis is done on the current process and the 'dummy' event
|
|
* handlers are invoked that support dump_trace but otherwise do nothing.
|
|
*
|
|
* Copyright 2019 Google LLC.
|
|
*/
|
|
#include <stdio.h>
|
|
#include "bench.h"
|
|
#include "../util/debug.h"
|
|
#include "../util/session.h"
|
|
#include "../util/stat.h"
|
|
#include "../util/synthetic-events.h"
|
|
#include "../util/target.h"
|
|
#include "../util/thread_map.h"
|
|
#include "../util/tool.h"
|
|
#include "../util/util.h"
|
|
#include <linux/atomic.h>
|
|
#include <linux/err.h>
|
|
#include <linux/time64.h>
|
|
#include <subcmd/parse-options.h>
|
|
|
|
static unsigned int min_threads = 1;
|
|
static unsigned int max_threads = UINT_MAX;
|
|
static unsigned int single_iterations = 10000;
|
|
static unsigned int multi_iterations = 10;
|
|
static bool run_st;
|
|
static bool run_mt;
|
|
|
|
static const struct option options[] = {
|
|
OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
|
|
OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
|
|
OPT_UINTEGER('m', "min-threads", &min_threads,
|
|
"Minimum number of threads in multithreaded bench"),
|
|
OPT_UINTEGER('M', "max-threads", &max_threads,
|
|
"Maximum number of threads in multithreaded bench"),
|
|
OPT_UINTEGER('i', "single-iterations", &single_iterations,
|
|
"Number of iterations used to compute single-threaded average"),
|
|
OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
|
|
"Number of iterations used to compute multi-threaded average"),
|
|
OPT_END()
|
|
};
|
|
|
|
static const char *const bench_usage[] = {
|
|
"perf bench internals synthesize <options>",
|
|
NULL
|
|
};
|
|
|
|
static atomic_t event_count;
|
|
|
|
static int process_synthesized_event(const struct perf_tool *tool __maybe_unused,
|
|
union perf_event *event __maybe_unused,
|
|
struct perf_sample *sample __maybe_unused,
|
|
struct machine *machine __maybe_unused)
|
|
{
|
|
atomic_inc(&event_count);
|
|
return 0;
|
|
}
|
|
|
|
static int do_run_single_threaded(struct perf_session *session,
|
|
struct perf_thread_map *threads,
|
|
struct target *target, bool data_mmap)
|
|
{
|
|
const unsigned int nr_threads_synthesize = 1;
|
|
struct timeval start, end, diff;
|
|
u64 runtime_us;
|
|
unsigned int i;
|
|
double time_average, time_stddev, event_average, event_stddev;
|
|
int err;
|
|
struct stats time_stats, event_stats;
|
|
|
|
init_stats(&time_stats);
|
|
init_stats(&event_stats);
|
|
|
|
for (i = 0; i < single_iterations; i++) {
|
|
atomic_set(&event_count, 0);
|
|
gettimeofday(&start, NULL);
|
|
err = __machine__synthesize_threads(&session->machines.host,
|
|
NULL,
|
|
target, threads,
|
|
process_synthesized_event,
|
|
true, data_mmap,
|
|
nr_threads_synthesize);
|
|
if (err)
|
|
return err;
|
|
|
|
gettimeofday(&end, NULL);
|
|
timersub(&end, &start, &diff);
|
|
runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
|
|
update_stats(&time_stats, runtime_us);
|
|
update_stats(&event_stats, atomic_read(&event_count));
|
|
}
|
|
|
|
time_average = avg_stats(&time_stats);
|
|
time_stddev = stddev_stats(&time_stats);
|
|
printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
|
|
data_mmap ? "data " : "", time_average, time_stddev);
|
|
|
|
event_average = avg_stats(&event_stats);
|
|
event_stddev = stddev_stats(&event_stats);
|
|
printf(" Average num. events: %.3f (+- %.3f)\n",
|
|
event_average, event_stddev);
|
|
|
|
printf(" Average time per event %.3f usec\n",
|
|
time_average / event_average);
|
|
return 0;
|
|
}
|
|
|
|
static int run_single_threaded(void)
|
|
{
|
|
struct perf_session *session;
|
|
struct target target = {
|
|
.pid = "self",
|
|
};
|
|
struct perf_thread_map *threads;
|
|
int err;
|
|
|
|
perf_set_singlethreaded();
|
|
session = perf_session__new(NULL, NULL);
|
|
if (IS_ERR(session)) {
|
|
pr_err("Session creation failed.\n");
|
|
return PTR_ERR(session);
|
|
}
|
|
threads = thread_map__new_by_pid(getpid());
|
|
if (!threads) {
|
|
pr_err("Thread map creation failed.\n");
|
|
err = -ENOMEM;
|
|
goto err_out;
|
|
}
|
|
|
|
puts(
|
|
"Computing performance of single threaded perf event synthesis by\n"
|
|
"synthesizing events on the perf process itself:");
|
|
|
|
err = do_run_single_threaded(session, threads, &target, false);
|
|
if (err)
|
|
goto err_out;
|
|
|
|
err = do_run_single_threaded(session, threads, &target, true);
|
|
|
|
err_out:
|
|
if (threads)
|
|
perf_thread_map__put(threads);
|
|
|
|
perf_session__delete(session);
|
|
return err;
|
|
}
|
|
|
|
static int do_run_multi_threaded(struct target *target,
|
|
unsigned int nr_threads_synthesize)
|
|
{
|
|
struct timeval start, end, diff;
|
|
u64 runtime_us;
|
|
unsigned int i;
|
|
double time_average, time_stddev, event_average, event_stddev;
|
|
int err;
|
|
struct stats time_stats, event_stats;
|
|
struct perf_session *session;
|
|
|
|
init_stats(&time_stats);
|
|
init_stats(&event_stats);
|
|
for (i = 0; i < multi_iterations; i++) {
|
|
session = perf_session__new(NULL, NULL);
|
|
if (IS_ERR(session))
|
|
return PTR_ERR(session);
|
|
|
|
atomic_set(&event_count, 0);
|
|
gettimeofday(&start, NULL);
|
|
err = __machine__synthesize_threads(&session->machines.host,
|
|
NULL,
|
|
target, NULL,
|
|
process_synthesized_event,
|
|
true, false,
|
|
nr_threads_synthesize);
|
|
if (err) {
|
|
perf_session__delete(session);
|
|
return err;
|
|
}
|
|
|
|
gettimeofday(&end, NULL);
|
|
timersub(&end, &start, &diff);
|
|
runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
|
|
update_stats(&time_stats, runtime_us);
|
|
update_stats(&event_stats, atomic_read(&event_count));
|
|
perf_session__delete(session);
|
|
}
|
|
|
|
time_average = avg_stats(&time_stats);
|
|
time_stddev = stddev_stats(&time_stats);
|
|
printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n",
|
|
time_average, time_stddev);
|
|
|
|
event_average = avg_stats(&event_stats);
|
|
event_stddev = stddev_stats(&event_stats);
|
|
printf(" Average num. events: %.3f (+- %.3f)\n",
|
|
event_average, event_stddev);
|
|
|
|
printf(" Average time per event %.3f usec\n",
|
|
time_average / event_average);
|
|
return 0;
|
|
}
|
|
|
|
static int run_multi_threaded(void)
|
|
{
|
|
struct target target = {
|
|
.cpu_list = "0"
|
|
};
|
|
unsigned int nr_threads_synthesize;
|
|
int err;
|
|
|
|
if (max_threads == UINT_MAX)
|
|
max_threads = sysconf(_SC_NPROCESSORS_ONLN);
|
|
|
|
puts(
|
|
"Computing performance of multi threaded perf event synthesis by\n"
|
|
"synthesizing events on CPU 0:");
|
|
|
|
for (nr_threads_synthesize = min_threads;
|
|
nr_threads_synthesize <= max_threads;
|
|
nr_threads_synthesize++) {
|
|
if (nr_threads_synthesize == 1)
|
|
perf_set_singlethreaded();
|
|
else
|
|
perf_set_multithreaded();
|
|
|
|
printf(" Number of synthesis threads: %u\n",
|
|
nr_threads_synthesize);
|
|
|
|
err = do_run_multi_threaded(&target, nr_threads_synthesize);
|
|
if (err)
|
|
return err;
|
|
}
|
|
perf_set_singlethreaded();
|
|
return 0;
|
|
}
|
|
|
|
int bench_synthesize(int argc, const char **argv)
|
|
{
|
|
int err = 0;
|
|
|
|
argc = parse_options(argc, argv, options, bench_usage, 0);
|
|
if (argc) {
|
|
usage_with_options(bench_usage, options);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
/*
|
|
* If neither single threaded or multi-threaded are specified, default
|
|
* to running just single threaded.
|
|
*/
|
|
if (!run_st && !run_mt)
|
|
run_st = true;
|
|
|
|
if (run_st)
|
|
err = run_single_threaded();
|
|
|
|
if (!err && run_mt)
|
|
err = run_multi_threaded();
|
|
|
|
return err;
|
|
}
|