Merge patch series "Assorted fixes in RISC-V PMU driver"

Atish Patra <atishp@rivosinc.com> says:

This series contains 3 fixes out of which the first one is a new fix
for invalid event data reported in lkml[2]. The last two are v3 of Samuel's
patch[1]. I added the RB/TB/Fixes tag and moved 1 unrelated change
to its own patch. I also changed an error message in kvm vcpu_pmu from
pr_err to pr_debug to avoid redundant failure error messages generated
due to the boot time quering of events implemented in the patch[1]

Here is the original cover letter for the patch[1]

Before this patch:
$ perf list hw

List of pre-defined events (to be used in -e or -M):

  branch-instructions OR branches                    [Hardware event]
  branch-misses                                      [Hardware event]
  bus-cycles                                         [Hardware event]
  cache-misses                                       [Hardware event]
  cache-references                                   [Hardware event]
  cpu-cycles OR cycles                               [Hardware event]
  instructions                                       [Hardware event]
  ref-cycles                                         [Hardware event]
  stalled-cycles-backend OR idle-cycles-backend      [Hardware event]
  stalled-cycles-frontend OR idle-cycles-frontend    [Hardware event]

$ perf stat -ddd true

 Performance counter stats for 'true':

              4.36 msec task-clock                       #    0.744 CPUs utilized
                 1      context-switches                 #  229.325 /sec
                 0      cpu-migrations                   #    0.000 /sec
                38      page-faults                      #    8.714 K/sec
         4,375,694      cycles                           #    1.003 GHz                         (60.64%)
           728,945      instructions                     #    0.17  insn per cycle
            79,199      branches                         #   18.162 M/sec
            17,709      branch-misses                    #   22.36% of all branches
           181,734      L1-dcache-loads                  #   41.676 M/sec
             5,547      L1-dcache-load-misses            #    3.05% of all L1-dcache accesses
     <not counted>      LLC-loads                                                               (0.00%)
     <not counted>      LLC-load-misses                                                         (0.00%)
     <not counted>      L1-icache-loads                                                         (0.00%)
     <not counted>      L1-icache-load-misses                                                   (0.00%)
     <not counted>      dTLB-loads                                                              (0.00%)
     <not counted>      dTLB-load-misses                                                        (0.00%)
     <not counted>      iTLB-loads                                                              (0.00%)
     <not counted>      iTLB-load-misses                                                        (0.00%)
     <not counted>      L1-dcache-prefetches                                                    (0.00%)
     <not counted>      L1-dcache-prefetch-misses                                               (0.00%)

       0.005860375 seconds time elapsed

       0.000000000 seconds user
       0.010383000 seconds sys

After this patch:
$ perf list hw

List of pre-defined events (to be used in -e or -M):

  branch-instructions OR branches                    [Hardware event]
  branch-misses                                      [Hardware event]
  cache-misses                                       [Hardware event]
  cache-references                                   [Hardware event]
  cpu-cycles OR cycles                               [Hardware event]
  instructions                                       [Hardware event]

$ perf stat -ddd true

 Performance counter stats for 'true':

              5.16 msec task-clock                       #    0.848 CPUs utilized
                 1      context-switches                 #  193.817 /sec
                 0      cpu-migrations                   #    0.000 /sec
                37      page-faults                      #    7.171 K/sec
         5,183,625      cycles                           #    1.005 GHz
           961,696      instructions                     #    0.19  insn per cycle
            85,853      branches                         #   16.640 M/sec
            20,462      branch-misses                    #   23.83% of all branches
           243,545      L1-dcache-loads                  #   47.203 M/sec
             5,974      L1-dcache-load-misses            #    2.45% of all L1-dcache accesses
   <not supported>      LLC-loads
   <not supported>      LLC-load-misses
   <not supported>      L1-icache-loads
   <not supported>      L1-icache-load-misses
   <not supported>      dTLB-loads
            19,619      dTLB-load-misses
   <not supported>      iTLB-loads
             6,831      iTLB-load-misses
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       0.006085625 seconds time elapsed

       0.000000000 seconds user
       0.013022000 seconds sys

[1] https://lore.kernel.org/linux-riscv/20240418014652.1143466-1-samuel.holland@sifive.com/
[2] https://lore.kernel.org/all/CC51D53B-846C-4D81-86FC-FBF969D0A0D6@pku.edu.cn/

* b4-shazam-merge:
  perf: RISC-V: Check standard event availability
  drivers/perf: riscv: Reset the counter to hpmevent mapping while starting cpus
  drivers/perf: riscv: Do not update the event data if uptodate

Link: https://lore.kernel.org/r/20240628-misc_perf_fixes-v4-0-e01cfddcf035@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Palmer Dabbelt 2024-07-03 12:56:26 -07:00
commit 210ac17ded
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889
3 changed files with 43 additions and 5 deletions

View File

@ -327,7 +327,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att
event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
if (IS_ERR(event)) {
pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
return PTR_ERR(event);
}

View File

@ -167,7 +167,7 @@ u64 riscv_pmu_event_update(struct perf_event *event)
unsigned long cmask;
u64 oldval, delta;
if (!rvpmu->ctr_read)
if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE))
return 0;
cmask = riscv_pmu_ctr_get_width_mask(event);

View File

@ -20,6 +20,7 @@
#include <linux/cpu_pm.h>
#include <linux/sched/clock.h>
#include <linux/soc/andes/irq.h>
#include <linux/workqueue.h>
#include <asm/errata_list.h>
#include <asm/sbi.h>
@ -114,7 +115,7 @@ struct sbi_pmu_event_data {
};
};
static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
static struct sbi_pmu_event_data pmu_hw_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = {
SBI_PMU_HW_CPU_CYCLES,
SBI_PMU_EVENT_TYPE_HW, 0}},
@ -148,7 +149,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
};
#define C(x) PERF_COUNT_HW_CACHE_##x
static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@ -293,6 +294,34 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M
},
};
static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata)
{
struct sbiret ret;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH,
0, cmask, 0, edata->event_idx, 0, 0);
if (!ret.error) {
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
} else if (ret.error == SBI_ERR_NOT_SUPPORTED) {
/* This event cannot be monitored by any counter */
edata->event_idx = -EINVAL;
}
}
static void pmu_sbi_check_std_events(struct work_struct *work)
{
for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++)
pmu_sbi_check_event(&pmu_hw_event_map[i]);
for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++)
for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++)
for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++)
pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]);
}
static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events);
static int pmu_sbi_ctr_get_width(int idx)
{
return pmu_ctr_list[idx].width;
@ -478,6 +507,12 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
u64 raw_config_val;
int ret;
/*
* Ensure we are finished checking standard hardware events for
* validity before allowing userspace to configure any events.
*/
flush_work(&check_std_events_work);
switch (type) {
case PERF_TYPE_HARDWARE:
if (config >= PERF_COUNT_HW_MAX)
@ -762,7 +797,7 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
* which may include counters that are not enabled yet.
*/
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
0, pmu->cmask, 0, 0, 0, 0);
0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
}
static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
@ -1359,6 +1394,9 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
goto out_unregister;
/* Asynchronously check which standard events are available */
schedule_work(&check_std_events_work);
return 0;
out_unregister: