mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-11 08:18:47 +00:00
10d91611f4
Reimplement Book3S idle code in C, moving POWER7/8/9 implementation speific HV idle code to the powernv platform code. Book3S assembly stubs are kept in common code and used only to save the stack frame and non-volatile GPRs before executing architected idle instructions, and restoring the stack and reloading GPRs then returning to C after waking from idle. The complex logic dealing with threads and subcores, locking, SPRs, HMIs, timebase resync, etc., is all done in C which makes it more maintainable. This is not a strict translation to C code, there are some significant differences: - Idle wakeup no longer uses the ->cpu_restore call to reinit SPRs, but saves and restores them itself. - The optimisation where EC=ESL=0 idle modes did not have to save GPRs or change MSR is restored, because it's now simple to do. ESL=1 sleeps that do not lose GPRs can use this optimization too. - KVM secondary entry and cede is now more of a call/return style rather than branchy. nap_state_lost is not required because KVM always returns via NVGPR restoring path. - KVM secondary wakeup from offline sequence is moved entirely into the offline wakeup, which avoids a hwsync in the normal idle wakeup path. Performance measured with context switch ping-pong on different threads or cores, is possibly improved a small amount, 1-3% depending on stop state and core vs thread test for shallow states. Deep states it's in the noise compared with other latencies. KVM improvements: - Idle sleepers now always return to caller rather than branch out to KVM first. - This allows optimisations like very fast return to caller when no state has been lost. - KVM no longer requires nap_state_lost because it controls NVGPR save/restore itself on the way in and out. - The heavy idle wakeup KVM request check can be moved out of the normal host idle code and into the not-performance-critical offline code. - KVM nap code now returns from where it is called, which makes the flow a bit easier to follow. Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> [mpe: Squash the KVM changes in] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
106 lines
3.3 KiB
C
106 lines
3.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_POWERPC_CPUIDLE_H
|
|
#define _ASM_POWERPC_CPUIDLE_H
|
|
|
|
#ifdef CONFIG_PPC_POWERNV
|
|
/* Thread state used in powernv idle state management */
|
|
#define PNV_THREAD_RUNNING 0
|
|
#define PNV_THREAD_NAP 1
|
|
#define PNV_THREAD_SLEEP 2
|
|
#define PNV_THREAD_WINKLE 3
|
|
|
|
/*
|
|
* Core state used in powernv idle for POWER8.
|
|
*
|
|
* The lock bit synchronizes updates to the state, as well as parts of the
|
|
* sleep/wake code (see kernel/idle_book3s.S).
|
|
*
|
|
* Bottom 8 bits track the idle state of each thread. Bit is cleared before
|
|
* the thread executes an idle instruction (nap/sleep/winkle).
|
|
*
|
|
* Then there is winkle tracking. A core does not lose complete state
|
|
* until every thread is in winkle. So the winkle count field counts the
|
|
* number of threads in winkle (small window of false positives is okay
|
|
* around the sleep/wake, so long as there are no false negatives).
|
|
*
|
|
* When the winkle count reaches 8 (the COUNT_ALL_BIT becomes set), then
|
|
* the THREAD_WINKLE_BITS are set, which indicate which threads have not
|
|
* yet woken from the winkle state.
|
|
*/
|
|
#define NR_PNV_CORE_IDLE_LOCK_BIT 28
|
|
#define PNV_CORE_IDLE_LOCK_BIT (1ULL << NR_PNV_CORE_IDLE_LOCK_BIT)
|
|
|
|
#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 16
|
|
#define PNV_CORE_IDLE_WINKLE_COUNT 0x00010000
|
|
#define PNV_CORE_IDLE_WINKLE_COUNT_BITS 0x000F0000
|
|
#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT 8
|
|
#define PNV_CORE_IDLE_THREAD_WINKLE_BITS 0x0000FF00
|
|
|
|
#define PNV_CORE_IDLE_THREAD_BITS 0x000000FF
|
|
|
|
/*
|
|
* ============================ NOTE =================================
|
|
* The older firmware populates only the RL field in the psscr_val and
|
|
* sets the psscr_mask to 0xf. On such a firmware, the kernel sets the
|
|
* remaining PSSCR fields to default values as follows:
|
|
*
|
|
* - ESL and EC bits are to 1. So wakeup from any stop state will be
|
|
* at vector 0x100.
|
|
*
|
|
* - MTL and PSLL are set to the maximum allowed value as per the ISA,
|
|
* i.e. 15.
|
|
*
|
|
* - The Transition Rate, TR is set to the Maximum value 3.
|
|
*/
|
|
#define PSSCR_HV_DEFAULT_VAL (PSSCR_ESL | PSSCR_EC | \
|
|
PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
|
|
PSSCR_MTL_MASK)
|
|
|
|
#define PSSCR_HV_DEFAULT_MASK (PSSCR_ESL | PSSCR_EC | \
|
|
PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
|
|
PSSCR_MTL_MASK | PSSCR_RL_MASK)
|
|
#define PSSCR_EC_SHIFT 20
|
|
#define PSSCR_ESL_SHIFT 21
|
|
#define GET_PSSCR_EC(x) (((x) & PSSCR_EC) >> PSSCR_EC_SHIFT)
|
|
#define GET_PSSCR_ESL(x) (((x) & PSSCR_ESL) >> PSSCR_ESL_SHIFT)
|
|
#define GET_PSSCR_RL(x) ((x) & PSSCR_RL_MASK)
|
|
|
|
#define ERR_EC_ESL_MISMATCH -1
|
|
#define ERR_DEEP_STATE_ESL_MISMATCH -2
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#define PNV_IDLE_NAME_LEN 16
|
|
struct pnv_idle_states_t {
|
|
char name[PNV_IDLE_NAME_LEN];
|
|
u32 latency_ns;
|
|
u32 residency_ns;
|
|
u64 psscr_val;
|
|
u64 psscr_mask;
|
|
u32 flags;
|
|
bool valid;
|
|
};
|
|
|
|
extern struct pnv_idle_states_t *pnv_idle_states;
|
|
extern int nr_pnv_idle_states;
|
|
|
|
unsigned long pnv_cpu_offline(unsigned int cpu);
|
|
int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
|
|
static inline void report_invalid_psscr_val(u64 psscr_val, int err)
|
|
{
|
|
switch (err) {
|
|
case ERR_EC_ESL_MISMATCH:
|
|
pr_warn("Invalid psscr 0x%016llx : ESL,EC bits unequal",
|
|
psscr_val);
|
|
break;
|
|
case ERR_DEEP_STATE_ESL_MISMATCH:
|
|
pr_warn("Invalid psscr 0x%016llx : ESL cleared for deep stop-state",
|
|
psscr_val);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#endif
|