mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 12:12:05 +00:00
Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
Pull turbostat update from Len Brown: "Updates to the turbostat utility. Just one kernel dependency in this batch -- added a #define to msr-index.h" * 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux: tools/power turbostat: correct dumped pkg-cstate-limit value tools/power turbostat: calculate TSC frequency from CPUID(0x15) on SKL tools/power turbostat: correct DRAM RAPL units on recent Xeon processors tools/power turbostat: Initial Skylake support tools/power turbostat: Use $(CURDIR) instead of $(PWD) and add support for O= option in Makefile tools/power turbostat: modprobe msr, if needed tools/power turbostat: dump MSR_TURBO_RATIO_LIMIT2 tools/power turbostat: use new MSR_TURBO_RATIO_LIMIT names x86 msr-index: define MSR_TURBO_RATIO_LIMIT,1,2 tools/power turbostat: label base frequency tools/power turbostat: update PERF_LIMIT_REASONS decoding tools/power turbostat: simplify default output
This commit is contained in:
commit
09d51602cf
@ -61,6 +61,9 @@
|
||||
#define MSR_OFFCORE_RSP_1 0x000001a7
|
||||
#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
|
||||
#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae
|
||||
#define MSR_TURBO_RATIO_LIMIT 0x000001ad
|
||||
#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
|
||||
#define MSR_TURBO_RATIO_LIMIT2 0x000001af
|
||||
|
||||
#define MSR_LBR_SELECT 0x000001c8
|
||||
#define MSR_LBR_TOS 0x000001c9
|
||||
@ -165,6 +168,11 @@
|
||||
#define MSR_PP1_ENERGY_STATUS 0x00000641
|
||||
#define MSR_PP1_POLICY 0x00000642
|
||||
|
||||
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
|
||||
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
|
||||
#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
|
||||
#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
|
||||
|
||||
#define MSR_CORE_C1_RES 0x00000660
|
||||
|
||||
#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
|
||||
|
@ -1,8 +1,12 @@
|
||||
CC = $(CROSS_COMPILE)gcc
|
||||
BUILD_OUTPUT := $(PWD)
|
||||
BUILD_OUTPUT := $(CURDIR)
|
||||
PREFIX := /usr
|
||||
DESTDIR :=
|
||||
|
||||
ifeq ("$(origin O)", "command line")
|
||||
BUILD_OUTPUT := $(O)
|
||||
endif
|
||||
|
||||
turbostat : turbostat.c
|
||||
CFLAGS += -Wall
|
||||
CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"'
|
||||
|
@ -20,9 +20,11 @@ upon its completion.
|
||||
The second method is to omit the command,
|
||||
and turbostat displays statistics every 5 seconds.
|
||||
The 5-second interval can be changed using the --interval option.
|
||||
|
||||
.PP
|
||||
Some information is not available on older processors.
|
||||
.SS Options
|
||||
Options can be specified with a single or double '-', and only as much of the option
|
||||
name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive.
|
||||
\fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter.
|
||||
.PP
|
||||
\fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter.
|
||||
@ -55,16 +57,20 @@ more than once may also enable internal turbostat debug information.
|
||||
The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit,
|
||||
displays the statistics gathered since it was forked.
|
||||
.PP
|
||||
.SH FIELD DESCRIPTIONS
|
||||
.SH DEFAULT FIELD DESCRIPTIONS
|
||||
.nf
|
||||
\fBPackage\fP processor package number.
|
||||
\fBCore\fP processor core number.
|
||||
\fBCPU\fP Linux CPU (logical processor) number.
|
||||
Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology.
|
||||
\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
|
||||
\fBAVG_MHz\fP number of cycles executed divided by time elapsed.
|
||||
\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
|
||||
\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
|
||||
\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
|
||||
.fi
|
||||
.PP
|
||||
.SH DEBUG FIELD DESCRIPTIONS
|
||||
.nf
|
||||
\fBPackage\fP processor package number.
|
||||
\fBCore\fP processor core number.
|
||||
Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
|
||||
\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.
|
||||
\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
|
||||
\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
|
||||
@ -81,63 +87,76 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
|
||||
Without any parameters, turbostat displays statistics ever 5 seconds.
|
||||
(override interval with "-i sec" option, or specify a command
|
||||
for turbostat to fork).
|
||||
.nf
|
||||
[root@hsw]# ./turbostat
|
||||
CPU Avg_MHz %Busy Bzy_MHz TSC_MHz
|
||||
- 488 12.51 3898 3498
|
||||
0 0 0.01 3885 3498
|
||||
4 3897 99.99 3898 3498
|
||||
1 0 0.00 3861 3498
|
||||
5 0 0.00 3882 3498
|
||||
2 1 0.02 3894 3498
|
||||
6 2 0.06 3898 3498
|
||||
3 0 0.00 3849 3498
|
||||
7 0 0.00 3877 3498
|
||||
|
||||
.fi
|
||||
.SH DEBUG EXAMPLE
|
||||
The "--debug" option prints additional system information before measurements:
|
||||
|
||||
The first row of statistics is a summary for the entire system.
|
||||
For residency % columns, the summary is a weighted average.
|
||||
For Temperature columns, the summary is the column maximum.
|
||||
For Watts columns, the summary is a system total.
|
||||
Subsequent rows show per-CPU statistics.
|
||||
|
||||
.nf
|
||||
[root@ivy]# ./turbostat
|
||||
Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
|
||||
- - 6 0.36 1596 3492 0 0.59 0.01 99.04 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00
|
||||
0 0 9 0.58 1596 3492 0 0.28 0.01 99.13 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00
|
||||
0 4 1 0.07 1596 3492 0 0.79
|
||||
1 1 10 0.65 1596 3492 0 0.59 0.00 98.76 0.00 23
|
||||
1 5 5 0.28 1596 3492 0 0.95
|
||||
2 2 10 0.66 1596 3492 0 0.41 0.01 98.92 0.00 23
|
||||
2 6 2 0.10 1597 3492 0 0.97
|
||||
3 3 3 0.20 1596 3492 0 0.44 0.00 99.37 0.00 23
|
||||
3 7 5 0.31 1596 3492 0 0.33
|
||||
.fi
|
||||
.SH DEBUG EXAMPLE
|
||||
The "--debug" option prints additional system information before measurements:
|
||||
|
||||
.nf
|
||||
turbostat version 4.0 10-Feb, 2015 - Len Brown <lenb@kernel.org>
|
||||
CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9)
|
||||
turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org>
|
||||
CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3)
|
||||
CPUID(6): APERF, DTS, PTM, EPB
|
||||
RAPL: 851 sec. Joule Counter Range, at 77 Watts
|
||||
cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300
|
||||
16 * 100 = 1600 MHz max efficiency
|
||||
RAPL: 3121 sec. Joule Counter Range, at 84 Watts
|
||||
cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300
|
||||
8 * 100 = 800 MHz max efficiency
|
||||
35 * 100 = 3500 MHz TSC frequency
|
||||
cpu0: MSR_IA32_POWER_CTL: 0x0014005d (C1E auto-promotion: DISabled)
|
||||
cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6n)
|
||||
cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
|
||||
cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
|
||||
cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727
|
||||
37 * 100 = 3700 MHz max turbo 4 active cores
|
||||
38 * 100 = 3800 MHz max turbo 3 active cores
|
||||
39 * 100 = 3900 MHz max turbo 2 active cores
|
||||
39 * 100 = 3900 MHz max turbo 1 active cores
|
||||
cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
|
||||
cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.)
|
||||
cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.)
|
||||
cpu0: MSR_PKG_POWER_LIMIT: 0x30000148268 (UNlocked)
|
||||
cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled)
|
||||
cpu0: PKG Limit #2: DISabled (96.000000 Watts, 0.000977* sec, clamp DISabled)
|
||||
cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, )
|
||||
cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: )
|
||||
cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, )
|
||||
cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.)
|
||||
cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.)
|
||||
cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked)
|
||||
cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled)
|
||||
cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled)
|
||||
cpu0: MSR_PP0_POLICY: 0
|
||||
cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked)
|
||||
cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
|
||||
cpu0: MSR_PP1_POLICY: 0
|
||||
cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
|
||||
cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
|
||||
cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C)
|
||||
cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C)
|
||||
cpu0: MSR_IA32_THERM_STATUS: 0x88580000 (17 C +/- 1)
|
||||
cpu1: MSR_IA32_THERM_STATUS: 0x885a0000 (15 C +/- 1)
|
||||
cpu2: MSR_IA32_THERM_STATUS: 0x88570000 (18 C +/- 1)
|
||||
cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1)
|
||||
...
|
||||
cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
|
||||
cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C)
|
||||
cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
|
||||
cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
|
||||
cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
|
||||
cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
|
||||
Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt
|
||||
- - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00
|
||||
0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00
|
||||
0 4 3897 99.98 3898 3498 0 0.02
|
||||
1 1 7 0.17 3887 3498 0 0.04 0.00 0.00 99.79 32
|
||||
1 5 0 0.00 3885 3498 0 0.21
|
||||
2 2 29 0.76 3895 3498 0 0.10 0.01 0.01 99.13 32
|
||||
2 6 2 0.06 3896 3498 0 0.80
|
||||
3 3 1 0.02 3832 3498 0 0.03 0.00 0.00 99.95 28
|
||||
3 7 0 0.00 3879 3498 0 0.04
|
||||
^C
|
||||
|
||||
.fi
|
||||
The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
|
||||
available at the minimum package voltage. The \fBTSC frequency\fP is the base
|
||||
@ -147,6 +166,9 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling.
|
||||
The remaining rows show what maximum turbo frequency is possible
|
||||
depending on the number of idle cores. Note that not all information is
|
||||
available on all processors.
|
||||
.PP
|
||||
The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds.
|
||||
See the field definitions above.
|
||||
.SH FORK EXAMPLE
|
||||
If turbostat is invoked with a command, it will fork that command
|
||||
and output the statistics gathered when the command exits.
|
||||
@ -154,27 +176,23 @@ eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
|
||||
until ^C while the other CPUs are mostly idle:
|
||||
|
||||
.nf
|
||||
root@ivy: turbostat cat /dev/zero > /dev/null
|
||||
root@hsw: turbostat cat /dev/zero > /dev/null
|
||||
^C
|
||||
Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
|
||||
- - 496 12.75 3886 3492 0 13.16 0.04 74.04 0.00 36 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00
|
||||
0 0 22 0.57 3830 3492 0 0.83 0.02 98.59 0.00 27 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00
|
||||
0 4 9 0.24 3829 3492 0 1.15
|
||||
1 1 4 0.09 3783 3492 0 99.91 0.00 0.00 0.00 36
|
||||
1 5 3880 99.82 3888 3492 0 0.18
|
||||
2 2 17 0.44 3813 3492 0 0.77 0.04 98.75 0.00 28
|
||||
2 6 12 0.32 3823 3492 0 0.89
|
||||
3 3 16 0.43 3844 3492 0 0.63 0.11 98.84 0.00 30
|
||||
3 7 4 0.11 3827 3492 0 0.94
|
||||
30.372243 sec
|
||||
CPU Avg_MHz %Busy Bzy_MHz TSC_MHz
|
||||
- 482 12.51 3854 3498
|
||||
0 0 0.01 1960 3498
|
||||
4 0 0.00 2128 3498
|
||||
1 0 0.00 3003 3498
|
||||
5 3854 99.98 3855 3498
|
||||
2 0 0.01 3504 3498
|
||||
6 3 0.08 3884 3498
|
||||
3 0 0.00 2553 3498
|
||||
7 0 0.00 2126 3498
|
||||
10.783983 sec
|
||||
|
||||
.fi
|
||||
Above the cycle soaker drives cpu5 up its 3.8 GHz turbo limit
|
||||
while the other processors are generally in various states of idle.
|
||||
|
||||
Note that cpu1 and cpu5 are HT siblings within core1.
|
||||
As cpu5 is very busy, it prevents its sibling, cpu1,
|
||||
from entering a c-state deeper than c1.
|
||||
Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
|
||||
The first row shows the average MHz and %Busy across all the processors in the system.
|
||||
|
||||
Note that the Avg_MHz column reflects the total number of cycles executed
|
||||
divided by the measurement interval. If the %Busy column is 100%,
|
||||
|
@ -57,6 +57,7 @@ unsigned int do_pc3;
|
||||
unsigned int do_pc6;
|
||||
unsigned int do_pc7;
|
||||
unsigned int do_c8_c9_c10;
|
||||
unsigned int do_skl_residency;
|
||||
unsigned int do_slm_cstates;
|
||||
unsigned int use_c1_residency_msr;
|
||||
unsigned int has_aperf;
|
||||
@ -65,8 +66,6 @@ unsigned int units = 1000000; /* MHz etc */
|
||||
unsigned int genuine_intel;
|
||||
unsigned int has_invariant_tsc;
|
||||
unsigned int do_nhm_platform_info;
|
||||
unsigned int do_nhm_turbo_ratio_limit;
|
||||
unsigned int do_ivt_turbo_ratio_limit;
|
||||
unsigned int extra_msr_offset32;
|
||||
unsigned int extra_msr_offset64;
|
||||
unsigned int extra_delta_offset32;
|
||||
@ -84,11 +83,14 @@ unsigned int do_dts;
|
||||
unsigned int do_ptm;
|
||||
unsigned int tcc_activation_temp;
|
||||
unsigned int tcc_activation_temp_override;
|
||||
double rapl_power_units, rapl_energy_units, rapl_time_units;
|
||||
double rapl_power_units, rapl_time_units;
|
||||
double rapl_dram_energy_units, rapl_energy_units;
|
||||
double rapl_joule_counter_range;
|
||||
unsigned int do_core_perf_limit_reasons;
|
||||
unsigned int do_gfx_perf_limit_reasons;
|
||||
unsigned int do_ring_perf_limit_reasons;
|
||||
unsigned int crystal_hz;
|
||||
unsigned long long tsc_hz;
|
||||
|
||||
#define RAPL_PKG (1 << 0)
|
||||
/* 0x610 MSR_PKG_POWER_LIMIT */
|
||||
@ -101,18 +103,18 @@ unsigned int do_ring_perf_limit_reasons;
|
||||
#define RAPL_DRAM (1 << 3)
|
||||
/* 0x618 MSR_DRAM_POWER_LIMIT */
|
||||
/* 0x619 MSR_DRAM_ENERGY_STATUS */
|
||||
/* 0x61c MSR_DRAM_POWER_INFO */
|
||||
#define RAPL_DRAM_PERF_STATUS (1 << 4)
|
||||
/* 0x61b MSR_DRAM_PERF_STATUS */
|
||||
#define RAPL_DRAM_POWER_INFO (1 << 5)
|
||||
/* 0x61c MSR_DRAM_POWER_INFO */
|
||||
|
||||
#define RAPL_CORES (1 << 5)
|
||||
#define RAPL_CORES (1 << 6)
|
||||
/* 0x638 MSR_PP0_POWER_LIMIT */
|
||||
/* 0x639 MSR_PP0_ENERGY_STATUS */
|
||||
#define RAPL_CORE_POLICY (1 << 6)
|
||||
#define RAPL_CORE_POLICY (1 << 7)
|
||||
/* 0x63a MSR_PP0_POLICY */
|
||||
|
||||
|
||||
#define RAPL_GFX (1 << 7)
|
||||
#define RAPL_GFX (1 << 8)
|
||||
/* 0x640 MSR_PP1_POWER_LIMIT */
|
||||
/* 0x641 MSR_PP1_ENERGY_STATUS */
|
||||
/* 0x642 MSR_PP1_POLICY */
|
||||
@ -159,6 +161,10 @@ struct pkg_data {
|
||||
unsigned long long pc8;
|
||||
unsigned long long pc9;
|
||||
unsigned long long pc10;
|
||||
unsigned long long pkg_wtd_core_c0;
|
||||
unsigned long long pkg_any_core_c0;
|
||||
unsigned long long pkg_any_gfxe_c0;
|
||||
unsigned long long pkg_both_core_gfxe_c0;
|
||||
unsigned int package_id;
|
||||
unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
|
||||
unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
|
||||
@ -292,8 +298,7 @@ void print_header(void)
|
||||
if (has_aperf)
|
||||
outp += sprintf(outp, " Bzy_MHz");
|
||||
outp += sprintf(outp, " TSC_MHz");
|
||||
if (do_smi)
|
||||
outp += sprintf(outp, " SMI");
|
||||
|
||||
if (extra_delta_offset32)
|
||||
outp += sprintf(outp, " count 0x%03X", extra_delta_offset32);
|
||||
if (extra_delta_offset64)
|
||||
@ -302,6 +307,13 @@ void print_header(void)
|
||||
outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32);
|
||||
if (extra_msr_offset64)
|
||||
outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64);
|
||||
|
||||
if (!debug)
|
||||
goto done;
|
||||
|
||||
if (do_smi)
|
||||
outp += sprintf(outp, " SMI");
|
||||
|
||||
if (do_nhm_cstates)
|
||||
outp += sprintf(outp, " CPU%%c1");
|
||||
if (do_nhm_cstates && !do_slm_cstates)
|
||||
@ -316,6 +328,13 @@ void print_header(void)
|
||||
if (do_ptm)
|
||||
outp += sprintf(outp, " PkgTmp");
|
||||
|
||||
if (do_skl_residency) {
|
||||
outp += sprintf(outp, " Totl%%C0");
|
||||
outp += sprintf(outp, " Any%%C0");
|
||||
outp += sprintf(outp, " GFX%%C0");
|
||||
outp += sprintf(outp, " CPUGFX%%");
|
||||
}
|
||||
|
||||
if (do_pc2)
|
||||
outp += sprintf(outp, " Pkg%%pc2");
|
||||
if (do_pc3)
|
||||
@ -359,6 +378,7 @@ void print_header(void)
|
||||
outp += sprintf(outp, " time");
|
||||
|
||||
}
|
||||
done:
|
||||
outp += sprintf(outp, "\n");
|
||||
}
|
||||
|
||||
@ -396,6 +416,12 @@ int dump_counters(struct thread_data *t, struct core_data *c,
|
||||
|
||||
if (p) {
|
||||
outp += sprintf(outp, "package: %d\n", p->package_id);
|
||||
|
||||
outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
|
||||
outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
|
||||
outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
|
||||
outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
|
||||
|
||||
outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
|
||||
if (do_pc3)
|
||||
outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
|
||||
@ -487,10 +513,6 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
||||
/* TSC_MHz */
|
||||
outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
|
||||
|
||||
/* SMI */
|
||||
if (do_smi)
|
||||
outp += sprintf(outp, "%8d", t->smi_count);
|
||||
|
||||
/* delta */
|
||||
if (extra_delta_offset32)
|
||||
outp += sprintf(outp, " %11llu", t->extra_delta32);
|
||||
@ -506,6 +528,13 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
||||
if (extra_msr_offset64)
|
||||
outp += sprintf(outp, " 0x%016llx", t->extra_msr64);
|
||||
|
||||
if (!debug)
|
||||
goto done;
|
||||
|
||||
/* SMI */
|
||||
if (do_smi)
|
||||
outp += sprintf(outp, "%8d", t->smi_count);
|
||||
|
||||
if (do_nhm_cstates) {
|
||||
if (!skip_c1)
|
||||
outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
|
||||
@ -531,9 +560,18 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
||||
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
|
||||
goto done;
|
||||
|
||||
/* PkgTmp */
|
||||
if (do_ptm)
|
||||
outp += sprintf(outp, "%8d", p->pkg_temp_c);
|
||||
|
||||
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
|
||||
if (do_skl_residency) {
|
||||
outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
|
||||
outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
|
||||
outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
|
||||
outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
|
||||
}
|
||||
|
||||
if (do_pc2)
|
||||
outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc);
|
||||
if (do_pc3)
|
||||
@ -565,7 +603,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
||||
if (do_rapl & RAPL_GFX)
|
||||
outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
|
||||
if (do_rapl & RAPL_DRAM)
|
||||
outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float);
|
||||
outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
|
||||
if (do_rapl & RAPL_PKG_PERF_STATUS)
|
||||
outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
|
||||
if (do_rapl & RAPL_DRAM_PERF_STATUS)
|
||||
@ -582,7 +620,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
||||
p->energy_gfx * rapl_energy_units);
|
||||
if (do_rapl & RAPL_DRAM)
|
||||
outp += sprintf(outp, fmt8,
|
||||
p->energy_dram * rapl_energy_units);
|
||||
p->energy_dram * rapl_dram_energy_units);
|
||||
if (do_rapl & RAPL_PKG_PERF_STATUS)
|
||||
outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
|
||||
if (do_rapl & RAPL_DRAM_PERF_STATUS)
|
||||
@ -636,6 +674,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
|
||||
void
|
||||
delta_package(struct pkg_data *new, struct pkg_data *old)
|
||||
{
|
||||
|
||||
if (do_skl_residency) {
|
||||
old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
|
||||
old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
|
||||
old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
|
||||
old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
|
||||
}
|
||||
old->pc2 = new->pc2 - old->pc2;
|
||||
if (do_pc3)
|
||||
old->pc3 = new->pc3 - old->pc3;
|
||||
@ -782,6 +827,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
|
||||
c->c7 = 0;
|
||||
c->core_temp_c = 0;
|
||||
|
||||
p->pkg_wtd_core_c0 = 0;
|
||||
p->pkg_any_core_c0 = 0;
|
||||
p->pkg_any_gfxe_c0 = 0;
|
||||
p->pkg_both_core_gfxe_c0 = 0;
|
||||
|
||||
p->pc2 = 0;
|
||||
if (do_pc3)
|
||||
p->pc3 = 0;
|
||||
@ -826,6 +876,13 @@ int sum_counters(struct thread_data *t, struct core_data *c,
|
||||
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
|
||||
return 0;
|
||||
|
||||
if (do_skl_residency) {
|
||||
average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
|
||||
average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
|
||||
average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
|
||||
average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
|
||||
}
|
||||
|
||||
average.packages.pc2 += p->pc2;
|
||||
if (do_pc3)
|
||||
average.packages.pc3 += p->pc3;
|
||||
@ -873,6 +930,13 @@ void compute_average(struct thread_data *t, struct core_data *c,
|
||||
average.cores.c6 /= topo.num_cores;
|
||||
average.cores.c7 /= topo.num_cores;
|
||||
|
||||
if (do_skl_residency) {
|
||||
average.packages.pkg_wtd_core_c0 /= topo.num_packages;
|
||||
average.packages.pkg_any_core_c0 /= topo.num_packages;
|
||||
average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
|
||||
average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
|
||||
}
|
||||
|
||||
average.packages.pc2 /= topo.num_packages;
|
||||
if (do_pc3)
|
||||
average.packages.pc3 /= topo.num_packages;
|
||||
@ -979,6 +1043,16 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
||||
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
|
||||
return 0;
|
||||
|
||||
if (do_skl_residency) {
|
||||
if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
|
||||
return -10;
|
||||
if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
|
||||
return -11;
|
||||
if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
|
||||
return -12;
|
||||
if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
|
||||
return -13;
|
||||
}
|
||||
if (do_pc3)
|
||||
if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
|
||||
return -9;
|
||||
@ -1055,49 +1129,77 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
||||
#define PCL_6R 9 /* PC6 Retention */
|
||||
#define PCL__7 10 /* PC7 */
|
||||
#define PCL_7S 11 /* PC7 Shrink */
|
||||
#define PCLUNL 12 /* Unlimited */
|
||||
#define PCL__8 12 /* PC8 */
|
||||
#define PCL__9 13 /* PC9 */
|
||||
#define PCLUNL 14 /* Unlimited */
|
||||
|
||||
int pkg_cstate_limit = PCLUKN;
|
||||
char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
|
||||
"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "unlimited"};
|
||||
"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
|
||||
|
||||
int nhm_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL};
|
||||
int snb_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL};
|
||||
int hsw_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCLRSV, PCLUNL};
|
||||
int slv_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7};
|
||||
int amt_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
|
||||
int phi_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL};
|
||||
int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
|
||||
int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
|
||||
int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
|
||||
int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
|
||||
int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
|
||||
int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
|
||||
|
||||
void print_verbose_header(void)
|
||||
static void
|
||||
dump_nhm_platform_info(void)
|
||||
{
|
||||
unsigned long long msr;
|
||||
unsigned int ratio;
|
||||
|
||||
if (!do_nhm_platform_info)
|
||||
return;
|
||||
|
||||
get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
|
||||
|
||||
fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
|
||||
|
||||
ratio = (msr >> 40) & 0xFF;
|
||||
fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
|
||||
fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n",
|
||||
ratio, bclk, ratio * bclk);
|
||||
|
||||
ratio = (msr >> 8) & 0xFF;
|
||||
fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
|
||||
fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
|
||||
ratio, bclk, ratio * bclk);
|
||||
|
||||
get_msr(0, MSR_IA32_POWER_CTL, &msr);
|
||||
fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
|
||||
msr, msr & 0x2 ? "EN" : "DIS");
|
||||
|
||||
if (!do_ivt_turbo_ratio_limit)
|
||||
goto print_nhm_turbo_ratio_limits;
|
||||
return;
|
||||
}
|
||||
|
||||
get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
|
||||
static void
|
||||
dump_hsw_turbo_ratio_limits(void)
|
||||
{
|
||||
unsigned long long msr;
|
||||
unsigned int ratio;
|
||||
|
||||
fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
|
||||
get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr);
|
||||
|
||||
fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr);
|
||||
|
||||
ratio = (msr >> 8) & 0xFF;
|
||||
if (ratio)
|
||||
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
|
||||
ratio, bclk, ratio * bclk);
|
||||
|
||||
ratio = (msr >> 0) & 0xFF;
|
||||
if (ratio)
|
||||
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
|
||||
ratio, bclk, ratio * bclk);
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_ivt_turbo_ratio_limits(void)
|
||||
{
|
||||
unsigned long long msr;
|
||||
unsigned int ratio;
|
||||
|
||||
get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr);
|
||||
|
||||
fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr);
|
||||
|
||||
ratio = (msr >> 56) & 0xFF;
|
||||
if (ratio)
|
||||
@ -1138,30 +1240,18 @@ void print_verbose_header(void)
|
||||
if (ratio)
|
||||
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
|
||||
ratio, bclk, ratio * bclk);
|
||||
return;
|
||||
}
|
||||
|
||||
print_nhm_turbo_ratio_limits:
|
||||
get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
|
||||
static void
|
||||
dump_nhm_turbo_ratio_limits(void)
|
||||
{
|
||||
unsigned long long msr;
|
||||
unsigned int ratio;
|
||||
|
||||
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
|
||||
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
|
||||
get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr);
|
||||
|
||||
fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
|
||||
|
||||
fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
|
||||
(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
|
||||
(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
|
||||
(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
|
||||
(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
|
||||
(msr & (1 << 15)) ? "" : "UN",
|
||||
(unsigned int)msr & 7,
|
||||
pkg_cstate_limit_strings[pkg_cstate_limit]);
|
||||
|
||||
if (!do_nhm_turbo_ratio_limit)
|
||||
return;
|
||||
|
||||
get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
|
||||
|
||||
fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
|
||||
fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
|
||||
|
||||
ratio = (msr >> 56) & 0xFF;
|
||||
if (ratio)
|
||||
@ -1202,7 +1292,30 @@ void print_verbose_header(void)
|
||||
if (ratio)
|
||||
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
|
||||
ratio, bclk, ratio * bclk);
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_nhm_cst_cfg(void)
|
||||
{
|
||||
unsigned long long msr;
|
||||
|
||||
get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
|
||||
|
||||
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
|
||||
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
|
||||
|
||||
fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
|
||||
|
||||
fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
|
||||
(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
|
||||
(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
|
||||
(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
|
||||
(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
|
||||
(msr & (1 << 15)) ? "" : "UN",
|
||||
(unsigned int)msr & 7,
|
||||
pkg_cstate_limit_strings[pkg_cstate_limit]);
|
||||
return;
|
||||
}
|
||||
|
||||
void free_all_buffers(void)
|
||||
@ -1483,7 +1596,8 @@ void check_dev_msr()
|
||||
struct stat sb;
|
||||
|
||||
if (stat("/dev/cpu/0/msr", &sb))
|
||||
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
|
||||
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
|
||||
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
|
||||
}
|
||||
|
||||
void check_permissions()
|
||||
@ -1573,6 +1687,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
|
||||
case 0x47: /* BDW */
|
||||
case 0x4F: /* BDX */
|
||||
case 0x56: /* BDX-DE */
|
||||
case 0x4E: /* SKL */
|
||||
case 0x5E: /* SKL */
|
||||
pkg_cstate_limits = hsw_pkg_cstate_limits;
|
||||
break;
|
||||
case 0x37: /* BYT */
|
||||
@ -1590,7 +1706,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
|
||||
}
|
||||
get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
|
||||
|
||||
pkg_cstate_limit = pkg_cstate_limits[msr & 0x7];
|
||||
pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -1615,11 +1731,48 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
|
||||
|
||||
switch (model) {
|
||||
case 0x3E: /* IVB Xeon */
|
||||
case 0x3F: /* HSW Xeon */
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
|
||||
{
|
||||
if (!genuine_intel)
|
||||
return 0;
|
||||
|
||||
if (family != 6)
|
||||
return 0;
|
||||
|
||||
switch (model) {
|
||||
case 0x3F: /* HSW Xeon */
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dump_cstate_pstate_config_info(family, model)
|
||||
{
|
||||
if (!do_nhm_platform_info)
|
||||
return;
|
||||
|
||||
dump_nhm_platform_info();
|
||||
|
||||
if (has_hsw_turbo_ratio_limit(family, model))
|
||||
dump_hsw_turbo_ratio_limits();
|
||||
|
||||
if (has_ivt_turbo_ratio_limit(family, model))
|
||||
dump_ivt_turbo_ratio_limits();
|
||||
|
||||
if (has_nhm_turbo_ratio_limit(family, model))
|
||||
dump_nhm_turbo_ratio_limits();
|
||||
|
||||
dump_nhm_cst_cfg();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* print_epb()
|
||||
@ -1690,35 +1843,35 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
|
||||
get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
|
||||
fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
|
||||
fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
|
||||
(msr & 1 << 0) ? "PROCHOT, " : "",
|
||||
(msr & 1 << 1) ? "ThermStatus, " : "",
|
||||
(msr & 1 << 2) ? "bit2, " : "",
|
||||
(msr & 1 << 4) ? "Graphics, " : "",
|
||||
(msr & 1 << 5) ? "Auto-HWP, " : "",
|
||||
(msr & 1 << 6) ? "VR-Therm, " : "",
|
||||
(msr & 1 << 8) ? "Amps, " : "",
|
||||
(msr & 1 << 9) ? "CorePwr, " : "",
|
||||
(msr & 1 << 10) ? "PkgPwrL1, " : "",
|
||||
(msr & 1 << 11) ? "PkgPwrL2, " : "",
|
||||
(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
|
||||
(msr & 1 << 13) ? "Transitions, " : "",
|
||||
(msr & 1 << 15) ? "bit15, " : "",
|
||||
(msr & 1 << 14) ? "bit14, " : "",
|
||||
(msr & 1 << 15) ? "bit15, " : "");
|
||||
(msr & 1 << 13) ? "Transitions, " : "",
|
||||
(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
|
||||
(msr & 1 << 11) ? "PkgPwrL2, " : "",
|
||||
(msr & 1 << 10) ? "PkgPwrL1, " : "",
|
||||
(msr & 1 << 9) ? "CorePwr, " : "",
|
||||
(msr & 1 << 8) ? "Amps, " : "",
|
||||
(msr & 1 << 6) ? "VR-Therm, " : "",
|
||||
(msr & 1 << 5) ? "Auto-HWP, " : "",
|
||||
(msr & 1 << 4) ? "Graphics, " : "",
|
||||
(msr & 1 << 2) ? "bit2, " : "",
|
||||
(msr & 1 << 1) ? "ThermStatus, " : "",
|
||||
(msr & 1 << 0) ? "PROCHOT, " : "");
|
||||
fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
|
||||
(msr & 1 << 16) ? "PROCHOT, " : "",
|
||||
(msr & 1 << 17) ? "ThermStatus, " : "",
|
||||
(msr & 1 << 18) ? "bit18, " : "",
|
||||
(msr & 1 << 20) ? "Graphics, " : "",
|
||||
(msr & 1 << 21) ? "Auto-HWP, " : "",
|
||||
(msr & 1 << 22) ? "VR-Therm, " : "",
|
||||
(msr & 1 << 24) ? "Amps, " : "",
|
||||
(msr & 1 << 25) ? "CorePwr, " : "",
|
||||
(msr & 1 << 26) ? "PkgPwrL1, " : "",
|
||||
(msr & 1 << 27) ? "PkgPwrL2, " : "",
|
||||
(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
|
||||
(msr & 1 << 29) ? "Transitions, " : "",
|
||||
(msr & 1 << 31) ? "bit31, " : "",
|
||||
(msr & 1 << 30) ? "bit30, " : "",
|
||||
(msr & 1 << 31) ? "bit31, " : "");
|
||||
(msr & 1 << 29) ? "Transitions, " : "",
|
||||
(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
|
||||
(msr & 1 << 27) ? "PkgPwrL2, " : "",
|
||||
(msr & 1 << 26) ? "PkgPwrL1, " : "",
|
||||
(msr & 1 << 25) ? "CorePwr, " : "",
|
||||
(msr & 1 << 24) ? "Amps, " : "",
|
||||
(msr & 1 << 22) ? "VR-Therm, " : "",
|
||||
(msr & 1 << 21) ? "Auto-HWP, " : "",
|
||||
(msr & 1 << 20) ? "Graphics, " : "",
|
||||
(msr & 1 << 18) ? "bit18, " : "",
|
||||
(msr & 1 << 17) ? "ThermStatus, " : "",
|
||||
(msr & 1 << 16) ? "PROCHOT, " : "");
|
||||
|
||||
}
|
||||
if (do_gfx_perf_limit_reasons) {
|
||||
@ -1784,6 +1937,25 @@ double get_tdp(model)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* rapl_dram_energy_units_probe()
|
||||
* Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
|
||||
*/
|
||||
static double
|
||||
rapl_dram_energy_units_probe(int model, double rapl_energy_units)
|
||||
{
|
||||
/* only called for genuine_intel, family 6 */
|
||||
|
||||
switch (model) {
|
||||
case 0x3F: /* HSX */
|
||||
case 0x4F: /* BDX */
|
||||
case 0x56: /* BDX-DE */
|
||||
return (rapl_dram_energy_units = 15.3 / 1000000);
|
||||
default:
|
||||
return (rapl_energy_units);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* rapl_probe()
|
||||
@ -1812,14 +1984,18 @@ void rapl_probe(unsigned int family, unsigned int model)
|
||||
case 0x47: /* BDW */
|
||||
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
|
||||
break;
|
||||
case 0x4E: /* SKL */
|
||||
case 0x5E: /* SKL */
|
||||
do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
|
||||
break;
|
||||
case 0x3F: /* HSX */
|
||||
case 0x4F: /* BDX */
|
||||
case 0x56: /* BDX-DE */
|
||||
do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
|
||||
do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
|
||||
break;
|
||||
case 0x2D:
|
||||
case 0x3E:
|
||||
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
|
||||
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
|
||||
break;
|
||||
case 0x37: /* BYT */
|
||||
case 0x4D: /* AVN */
|
||||
@ -1839,6 +2015,8 @@ void rapl_probe(unsigned int family, unsigned int model)
|
||||
else
|
||||
rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
|
||||
|
||||
rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
|
||||
|
||||
time_unit = msr >> 16 & 0xF;
|
||||
if (time_unit == 0)
|
||||
time_unit = 0xA;
|
||||
@ -2009,19 +2187,18 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
||||
((msr >> 48) & 1) ? "EN" : "DIS");
|
||||
}
|
||||
|
||||
if (do_rapl & RAPL_DRAM) {
|
||||
if (do_rapl & RAPL_DRAM_POWER_INFO) {
|
||||
if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
|
||||
return -6;
|
||||
|
||||
|
||||
fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
|
||||
cpu, msr,
|
||||
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
|
||||
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
|
||||
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
|
||||
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
|
||||
|
||||
|
||||
}
|
||||
if (do_rapl & RAPL_DRAM) {
|
||||
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
|
||||
return -9;
|
||||
fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
|
||||
@ -2090,6 +2267,8 @@ int has_snb_msrs(unsigned int family, unsigned int model)
|
||||
case 0x47: /* BDW */
|
||||
case 0x4F: /* BDX */
|
||||
case 0x56: /* BDX-DE */
|
||||
case 0x4E: /* SKL */
|
||||
case 0x5E: /* SKL */
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
@ -2110,11 +2289,35 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
|
||||
switch (model) {
|
||||
case 0x45: /* HSW */
|
||||
case 0x3D: /* BDW */
|
||||
case 0x4E: /* SKL */
|
||||
case 0x5E: /* SKL */
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* SKL adds support for additional MSRS:
|
||||
*
|
||||
* MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
|
||||
* MSR_PKG_ANY_CORE_C0_RES 0x00000659
|
||||
* MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
|
||||
* MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
|
||||
*/
|
||||
int has_skl_msrs(unsigned int family, unsigned int model)
|
||||
{
|
||||
if (!genuine_intel)
|
||||
return 0;
|
||||
|
||||
switch (model) {
|
||||
case 0x4E: /* SKL */
|
||||
case 0x5E: /* SKL */
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int is_slm(unsigned int family, unsigned int model)
|
||||
{
|
||||
@ -2228,7 +2431,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
|
||||
|
||||
return 0;
|
||||
}
|
||||
void check_cpuid()
|
||||
void process_cpuid()
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx, max_level;
|
||||
unsigned int fms, family, model, stepping;
|
||||
@ -2294,6 +2497,41 @@ void check_cpuid()
|
||||
do_ptm ? "" : "No ",
|
||||
has_epb ? "" : "No ");
|
||||
|
||||
if (max_level > 0x15) {
|
||||
unsigned int eax_crystal;
|
||||
unsigned int ebx_tsc;
|
||||
|
||||
/*
|
||||
* CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
|
||||
*/
|
||||
eax_crystal = ebx_tsc = crystal_hz = edx = 0;
|
||||
__get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx);
|
||||
|
||||
if (ebx_tsc != 0) {
|
||||
|
||||
if (debug && (ebx != 0))
|
||||
fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
|
||||
eax_crystal, ebx_tsc, crystal_hz);
|
||||
|
||||
if (crystal_hz == 0)
|
||||
switch(model) {
|
||||
case 0x4E: /* SKL */
|
||||
case 0x5E: /* SKL */
|
||||
crystal_hz = 24000000; /* 24 MHz */
|
||||
break;
|
||||
default:
|
||||
crystal_hz = 0;
|
||||
}
|
||||
|
||||
if (crystal_hz) {
|
||||
tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
|
||||
if (debug)
|
||||
fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
|
||||
tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
|
||||
do_snb_cstates = has_snb_msrs(family, model);
|
||||
do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
|
||||
@ -2301,18 +2539,19 @@ void check_cpuid()
|
||||
do_pc6 = (pkg_cstate_limit >= PCL__6);
|
||||
do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
|
||||
do_c8_c9_c10 = has_hsw_msrs(family, model);
|
||||
do_skl_residency = has_skl_msrs(family, model);
|
||||
do_slm_cstates = is_slm(family, model);
|
||||
bclk = discover_bclk(family, model);
|
||||
|
||||
do_nhm_turbo_ratio_limit = do_nhm_platform_info && has_nhm_turbo_ratio_limit(family, model);
|
||||
do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
|
||||
rapl_probe(family, model);
|
||||
perf_limit_reasons_probe(family, model);
|
||||
|
||||
if (debug)
|
||||
dump_cstate_pstate_config_info();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void help()
|
||||
{
|
||||
fprintf(stderr,
|
||||
@ -2428,14 +2667,14 @@ void topology_probe()
|
||||
if (debug > 1)
|
||||
fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
|
||||
max_core_id, topo.num_cores_per_pkg);
|
||||
if (!summary_only && topo.num_cores_per_pkg > 1)
|
||||
if (debug && !summary_only && topo.num_cores_per_pkg > 1)
|
||||
show_core = 1;
|
||||
|
||||
topo.num_packages = max_package_id + 1;
|
||||
if (debug > 1)
|
||||
fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
|
||||
max_package_id, topo.num_packages);
|
||||
if (!summary_only && topo.num_packages > 1)
|
||||
if (debug && !summary_only && topo.num_packages > 1)
|
||||
show_pkg = 1;
|
||||
|
||||
topo.num_threads_per_core = max_siblings;
|
||||
@ -2550,13 +2789,10 @@ void turbostat_init()
|
||||
{
|
||||
check_dev_msr();
|
||||
check_permissions();
|
||||
check_cpuid();
|
||||
process_cpuid();
|
||||
|
||||
setup_all_buffers();
|
||||
|
||||
if (debug)
|
||||
print_verbose_header();
|
||||
|
||||
if (debug)
|
||||
for_all_cpus(print_epb, ODD_COUNTERS);
|
||||
|
||||
@ -2634,7 +2870,7 @@ int get_and_dump_counters(void)
|
||||
}
|
||||
|
||||
void print_version() {
|
||||
fprintf(stderr, "turbostat version 4.1 10-Feb, 2015"
|
||||
fprintf(stderr, "turbostat version 4.5 2 Apr, 2015"
|
||||
" - Len Brown <lenb@kernel.org>\n");
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user