Paul Mackerras b6c295df31 KVM: PPC: Book3S HV: Accumulate timing information for real-mode code
This reads the timebase at various points in the real-mode guest
entry/exit code and uses that to accumulate total, minimum and
maximum time spent in those parts of the code.  Currently these
times are accumulated per vcpu in 5 parts of the code:

* rm_entry - time taken from the start of kvmppc_hv_entry() until
  just before entering the guest.
* rm_intr - time from when we take a hypervisor interrupt in the
  guest until we either re-enter the guest or decide to exit to the
  host.  This includes time spent handling hcalls in real mode.
* rm_exit - time from when we decide to exit the guest until the
  return from kvmppc_hv_entry().
* guest - time spend in the guest
* cede - time spent napping in real mode due to an H_CEDE hcall
  while other threads in the same vcore are active.

These times are exposed in debugfs in a directory per vcpu that
contains a file called "timings".  This file contains one line for
each of the 5 timings above, with the name followed by a colon and
4 numbers, which are the count (number of times the code has been
executed), the total time, the minimum time, and the maximum time,
all in nanoseconds.

The overhead of the extra code amounts to about 30ns for an hcall that
is handled in real mode (e.g. H_SET_DABR), which is about 25%.  Since
production environments may not wish to incur this overhead, the new
code is conditional on a new config symbol,
CONFIG_KVM_BOOK3S_HV_EXIT_TIMING.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2015-04-21 15:21:31 +02:00

219 lines
4.9 KiB
C

/*
* Common time prototypes and such for all ppc machines.
*
* Written by Cort Dougan (cort@cs.nmt.edu) to merge
* Paul Mackerras' version and mine for PReP and Pmac.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef __POWERPC_TIME_H
#define __POWERPC_TIME_H
#ifdef __KERNEL__
#include <linux/types.h>
#include <linux/percpu.h>
#include <asm/processor.h>
/* time.c */
extern unsigned long tb_ticks_per_jiffy;
extern unsigned long tb_ticks_per_usec;
extern unsigned long tb_ticks_per_sec;
extern struct clock_event_device decrementer_clockevent;
struct rtc_time;
extern void to_tm(int tim, struct rtc_time * tm);
extern void GregorianDay(struct rtc_time *tm);
extern void tick_broadcast_ipi_handler(void);
extern void generic_calibrate_decr(void);
extern void set_dec_cpu6(unsigned int val);
/* Some sane defaults: 125 MHz timebase, 1GHz processor */
extern unsigned long ppc_proc_freq;
#define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8)
extern unsigned long ppc_tb_freq;
#define DEFAULT_TB_FREQ 125000000UL
struct div_result {
u64 result_high;
u64 result_low;
};
/* Accessor functions for the timebase (RTC on 601) registers. */
/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
#ifdef CONFIG_6xx
#define __USE_RTC() (!cpu_has_feature(CPU_FTR_USE_TB))
#else
#define __USE_RTC() 0
#endif
#ifdef CONFIG_PPC64
/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */
#define get_tbl get_tb
#else
static inline unsigned long get_tbl(void)
{
#if defined(CONFIG_403GCX)
unsigned long tbl;
asm volatile("mfspr %0, 0x3dd" : "=r" (tbl));
return tbl;
#else
return mftbl();
#endif
}
static inline unsigned int get_tbu(void)
{
#ifdef CONFIG_403GCX
unsigned int tbu;
asm volatile("mfspr %0, 0x3dc" : "=r" (tbu));
return tbu;
#else
return mftbu();
#endif
}
#endif /* !CONFIG_PPC64 */
static inline unsigned int get_rtcl(void)
{
unsigned int rtcl;
asm volatile("mfrtcl %0" : "=r" (rtcl));
return rtcl;
}
static inline u64 get_rtc(void)
{
unsigned int hi, lo, hi2;
do {
asm volatile("mfrtcu %0; mfrtcl %1; mfrtcu %2"
: "=r" (hi), "=r" (lo), "=r" (hi2));
} while (hi2 != hi);
return (u64)hi * 1000000000 + lo;
}
static inline u64 get_vtb(void)
{
#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_ARCH_207S))
return mfvtb();
#endif
return 0;
}
#ifdef CONFIG_PPC64
static inline u64 get_tb(void)
{
return mftb();
}
#else /* CONFIG_PPC64 */
static inline u64 get_tb(void)
{
unsigned int tbhi, tblo, tbhi2;
do {
tbhi = get_tbu();
tblo = get_tbl();
tbhi2 = get_tbu();
} while (tbhi != tbhi2);
return ((u64)tbhi << 32) | tblo;
}
#endif /* !CONFIG_PPC64 */
static inline u64 get_tb_or_rtc(void)
{
return __USE_RTC() ? get_rtc() : get_tb();
}
static inline void set_tb(unsigned int upper, unsigned int lower)
{
mtspr(SPRN_TBWL, 0);
mtspr(SPRN_TBWU, upper);
mtspr(SPRN_TBWL, lower);
}
/* Accessor functions for the decrementer register.
* The 4xx doesn't even have a decrementer. I tried to use the
* generic timer interrupt code, which seems OK, with the 4xx PIT
* in auto-reload mode. The problem is PIT stops counting when it
* hits zero. If it would wrap, we could use it just like a decrementer.
*/
static inline unsigned int get_dec(void)
{
#if defined(CONFIG_40x)
return (mfspr(SPRN_PIT));
#else
return (mfspr(SPRN_DEC));
#endif
}
/*
* Note: Book E and 4xx processors differ from other PowerPC processors
* in when the decrementer generates its interrupt: on the 1 to 0
* transition for Book E/4xx, but on the 0 to -1 transition for others.
*/
static inline void set_dec(int val)
{
#if defined(CONFIG_40x)
mtspr(SPRN_PIT, val);
#elif defined(CONFIG_8xx_CPU6)
set_dec_cpu6(val - 1);
#else
#ifndef CONFIG_BOOKE
--val;
#endif
mtspr(SPRN_DEC, val);
#endif /* not 40x or 8xx_CPU6 */
}
static inline unsigned long tb_ticks_since(unsigned long tstamp)
{
if (__USE_RTC()) {
int delta = get_rtcl() - (unsigned int) tstamp;
return delta < 0 ? delta + 1000000000 : delta;
}
return get_tbl() - tstamp;
}
#define mulhwu(x,y) \
({unsigned z; asm ("mulhwu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;})
#ifdef CONFIG_PPC64
#define mulhdu(x,y) \
({unsigned long z; asm ("mulhdu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;})
#else
extern u64 mulhdu(u64, u64);
#endif
extern void div128_by_32(u64 dividend_high, u64 dividend_low,
unsigned divisor, struct div_result *dr);
/* Used to store Processor Utilization register (purr) values */
struct cpu_usage {
u64 current_tb; /* Holds the current purr register values */
};
DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
extern void secondary_cpu_time_init(void);
DECLARE_PER_CPU(u64, decrementers_next_tb);
/* Convert timebase ticks to nanoseconds */
unsigned long long tb_to_ns(unsigned long long tb_ticks);
#endif /* __KERNEL__ */
#endif /* __POWERPC_TIME_H */