Kan Liang edf3ce0ed3 perf env: Find correct branch counter info on hybrid
No event is printed in the "Branch Counter" column on hybrid machines.

For example,

  $ perf record -e "{cpu_core/branch-instructions/pp,cpu_core/branches/}:S" -j any,counter
  $ perf report --total-cycles

  # Branch counter abbr list:
  # cpu_core/branch-instructions/pp = A
  # cpu_core/branches/ = B
  # '-' No event occurs
  # '+' Event occurrences may be lost due to branch counter saturated
  #
  # Sampled Cycles%  Sampled Cycles  Avg Cycles%  Avg Cycles  Branch Counter
  # ...............  ..............  ...........  ..........  ..............
            44.54%          727.1K        0.00%           1   |+   |+   |
            36.31%          592.7K        0.00%           2   |+   |+   |
            17.83%          291.1K        0.00%           1   |+   |+   |

The branch counter information (br_cntr_width and br_cntr_nr) in the
perf_env is retrieved from the CPU_PMU_CAPS. However, the CPU_PMU_CAPS
is not available on hybrid machines. Without the width information, the
number of occurrences of an event cannot be calculated.

For a hybrid machine, the caps information should be retrieved from the
PMU_CAPS, and stored in the perf_env->pmu_caps.

Add a perf_env__find_br_cntr_info() to return the correct branch counter
information from the corresponding fields.

Committer notes:

While testing I couldn't s ee those "Branch counter" columns enabled by
pressing 'B' on the TUI, after reporting it to the list Kan explained
the situation:

<quote Kan Liang>
For a hybrid client, the "Branch Counter" feature is only supported
starting from the just released Lunar Lake. Perf falls back to only
"ANY" on your Raptor Lake.

The "The branch counter is not available" message is expected.

Here is the 'perf evlist' result from my Lunar Lake machine,

  # perf evlist -v
  cpu_core/branch-instructions/pp: type: 4 (cpu_core), size: 136, config: 0xc4 (branch-instructions), { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|READ|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID|GROUP|LOST, disabled: 1, freq: 1, enable_on_exec: 1, precise_ip: 2, sample_id_all: 1, exclude_guest: 1, branch_sample_type: ANY|COUNTERS
  #
</quote>

Fixes: 6f9d8d1de2c61288 ("perf script: Add branch counters")
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240909184201.553519-1-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-09-11 13:08:46 -03:00

199 lines
4.8 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PERF_ENV_H
#define __PERF_ENV_H
#include <linux/types.h>
#include <linux/rbtree.h>
#include "cpumap.h"
#include "rwsem.h"
struct perf_cpu_map;
struct cpu_topology_map {
int socket_id;
int die_id;
int cluster_id;
int core_id;
};
struct cpu_cache_level {
u32 level;
u32 line_size;
u32 sets;
u32 ways;
char *type;
char *size;
char *map;
};
struct numa_node {
u32 node;
u64 mem_total;
u64 mem_free;
struct perf_cpu_map *map;
};
struct memory_node {
u64 node;
u64 size;
unsigned long *set;
};
struct hybrid_node {
char *pmu_name;
char *cpus;
};
struct pmu_caps {
int nr_caps;
unsigned int max_branches;
unsigned int br_cntr_nr;
unsigned int br_cntr_width;
char **caps;
char *pmu_name;
};
typedef const char *(arch_syscalls__strerrno_t)(int err);
arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch);
struct perf_env {
char *hostname;
char *os_release;
char *version;
char *arch;
int nr_cpus_online;
int nr_cpus_avail;
char *cpu_desc;
char *cpuid;
unsigned long long total_mem;
unsigned int msr_pmu_type;
unsigned int max_branches;
unsigned int br_cntr_nr;
unsigned int br_cntr_width;
int kernel_is_64_bit;
int nr_cmdline;
int nr_sibling_cores;
int nr_sibling_dies;
int nr_sibling_threads;
int nr_numa_nodes;
int nr_memory_nodes;
int nr_pmu_mappings;
int nr_groups;
int nr_cpu_pmu_caps;
int nr_hybrid_nodes;
int nr_pmus_with_caps;
char *cmdline;
const char **cmdline_argv;
char *sibling_cores;
char *sibling_dies;
char *sibling_threads;
char *pmu_mappings;
char **cpu_pmu_caps;
struct cpu_topology_map *cpu;
struct cpu_cache_level *caches;
int caches_cnt;
u32 comp_ratio;
u32 comp_ver;
u32 comp_type;
u32 comp_level;
u32 comp_mmap_len;
struct numa_node *numa_nodes;
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
struct hybrid_node *hybrid_nodes;
struct pmu_caps *pmu_caps;
#ifdef HAVE_LIBBPF_SUPPORT
/*
* bpf_info_lock protects bpf rbtrees. This is needed because the
* trees are accessed by different threads in perf-top
*/
struct {
struct rw_semaphore lock;
struct rb_root infos;
u32 infos_cnt;
struct rb_root btfs;
u32 btfs_cnt;
} bpf_progs;
#endif // HAVE_LIBBPF_SUPPORT
/* same reason as above (for perf-top) */
struct {
struct rw_semaphore lock;
struct rb_root tree;
} cgroups;
/* For fast cpu to numa node lookup via perf_env__numa_node */
int *numa_map;
int nr_numa_map;
/* For real clock time reference. */
struct {
u64 tod_ns;
u64 clockid_ns;
u64 clockid_res_ns;
int clockid;
/*
* enabled is valid for report mode, and is true if above
* values are set, it's set in process_clock_data
*/
bool enabled;
} clock;
arch_syscalls__strerrno_t *arch_strerrno;
};
enum perf_compress_type {
PERF_COMP_NONE = 0,
PERF_COMP_ZSTD,
PERF_COMP_MAX
};
struct bpf_prog_info_node;
struct btf_node;
extern struct perf_env perf_env;
void perf_env__exit(struct perf_env *env);
int perf_env__kernel_is_64_bit(struct perf_env *env);
int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
int perf_env__read_cpuid(struct perf_env *env);
int perf_env__read_pmu_mappings(struct perf_env *env);
int perf_env__nr_pmu_mappings(struct perf_env *env);
const char *perf_env__pmu_mappings(struct perf_env *env);
int perf_env__read_cpu_topology_map(struct perf_env *env);
void cpu_cache_level__free(struct cpu_cache_level *cache);
const char *perf_env__arch(struct perf_env *env);
const char *perf_env__arch_strerrno(struct perf_env *env, int err);
const char *perf_env__cpuid(struct perf_env *env);
const char *perf_env__raw_arch(struct perf_env *env);
int perf_env__nr_cpus_avail(struct perf_env *env);
void perf_env__init(struct perf_env *env);
void __perf_env__insert_bpf_prog_info(struct perf_env *env,
struct bpf_prog_info_node *info_node);
void perf_env__insert_bpf_prog_info(struct perf_env *env,
struct bpf_prog_info_node *info_node);
struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
__u32 prog_id);
bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id);
int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
const char *cap);
bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name);
void perf_env__find_br_cntr_info(struct perf_env *env,
unsigned int *nr,
unsigned int *width);
#endif /* __PERF_ENV_H */