Rafael J. Wysocki 124cf9117c PM / sleep: Make it possible to quiesce timers during suspend-to-idle
The efficiency of suspend-to-idle depends on being able to keep CPUs
in the deepest available idle states for as much time as possible.
Ideally, they should only be brought out of idle by system wakeup
interrupts.

However, timer interrupts occurring periodically prevent that from
happening and it is not practical to chase all of the "misbehaving"
timers in a whack-a-mole fashion.  A much more effective approach is
to suspend the local ticks for all CPUs and the entire timekeeping
along the lines of what is done during full suspend, which also
helps to keep suspend-to-idle and full suspend reasonably similar.

The idea is to suspend the local tick on each CPU executing
cpuidle_enter_freeze() and to make the last of them suspend the
entire timekeeping.  That should prevent timer interrupts from
triggering until an IO interrupt wakes up one of the CPUs.  It
needs to be done with interrupts disabled on all of the CPUs,
though, because otherwise the suspended clocksource might be
accessed by an interrupt handler which might lead to fatal
consequences.

Unfortunately, the existing ->enter callbacks provided by cpuidle
drivers generally cannot be used for implementing that, because some
of them re-enable interrupts temporarily and some idle entry methods
cause interrupts to be re-enabled automatically on exit.  Also some
of these callbacks manipulate local clock event devices of the CPUs
which really shouldn't be done after suspending their ticks.

To overcome that difficulty, introduce a new cpuidle state callback,
->enter_freeze, that will be guaranteed (1) to keep interrupts
disabled all the time (and return with interrupts disabled) and (2)
not to touch the CPU timer devices.  Modify cpuidle_enter_freeze() to
look for the deepest available idle state with ->enter_freeze present
and to make the CPU execute that callback with suspended tick (and the
last of the online CPUs to execute it with suspended timekeeping).

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
2015-02-15 19:40:09 +01:00

234 lines
7.2 KiB
C

/*
* cpuidle.h - a generic framework for CPU idle power management
*
* (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Shaohua Li <shaohua.li@intel.com>
* Adam Belay <abelay@novell.com>
*
* This code is licenced under the GPL.
*/
#ifndef _LINUX_CPUIDLE_H
#define _LINUX_CPUIDLE_H
#include <linux/percpu.h>
#include <linux/list.h>
#include <linux/hrtimer.h>
#define CPUIDLE_STATE_MAX 10
#define CPUIDLE_NAME_LEN 16
#define CPUIDLE_DESC_LEN 32
struct module;
struct cpuidle_device;
struct cpuidle_driver;
/****************************
* CPUIDLE DEVICE INTERFACE *
****************************/
struct cpuidle_state_usage {
unsigned long long disable;
unsigned long long usage;
unsigned long long time; /* in US */
};
struct cpuidle_state {
char name[CPUIDLE_NAME_LEN];
char desc[CPUIDLE_DESC_LEN];
unsigned int flags;
unsigned int exit_latency; /* in US */
int power_usage; /* in mW */
unsigned int target_residency; /* in US */
bool disabled; /* disabled on all CPUs */
int (*enter) (struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index);
int (*enter_dead) (struct cpuidle_device *dev, int index);
/*
* CPUs execute ->enter_freeze with the local tick or entire timekeeping
* suspended, so it must not re-enable interrupts at any point (even
* temporarily) or attempt to change states of clock event devices.
*/
void (*enter_freeze) (struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index);
};
/* Idle State Flags */
#define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */
#define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */
#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
struct cpuidle_device_kobj;
struct cpuidle_state_kobj;
struct cpuidle_driver_kobj;
struct cpuidle_device {
unsigned int registered:1;
unsigned int enabled:1;
unsigned int cpu;
int last_residency;
int state_count;
struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX];
struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
struct cpuidle_driver_kobj *kobj_driver;
struct cpuidle_device_kobj *kobj_dev;
struct list_head device_list;
#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
int safe_state_index;
cpumask_t coupled_cpus;
struct cpuidle_coupled *coupled;
#endif
};
DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
/**
* cpuidle_get_last_residency - retrieves the last state's residency time
* @dev: the target CPU
*/
static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
{
return dev->last_residency;
}
/****************************
* CPUIDLE DRIVER INTERFACE *
****************************/
struct cpuidle_driver {
const char *name;
struct module *owner;
int refcnt;
/* used by the cpuidle framework to setup the broadcast timer */
unsigned int bctimer:1;
/* states array must be ordered in decreasing power consumption */
struct cpuidle_state states[CPUIDLE_STATE_MAX];
int state_count;
int safe_state_index;
/* the driver handles the cpus in cpumask */
struct cpumask *cpumask;
};
#ifdef CONFIG_CPU_IDLE
extern void disable_cpuidle(void);
extern int cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev);
extern int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index);
extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
extern int cpuidle_register_driver(struct cpuidle_driver *drv);
extern struct cpuidle_driver *cpuidle_get_driver(void);
extern struct cpuidle_driver *cpuidle_driver_ref(void);
extern void cpuidle_driver_unref(void);
extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
extern int cpuidle_register_device(struct cpuidle_device *dev);
extern void cpuidle_unregister_device(struct cpuidle_device *dev);
extern int cpuidle_register(struct cpuidle_driver *drv,
const struct cpumask *const coupled_cpus);
extern void cpuidle_unregister(struct cpuidle_driver *drv);
extern void cpuidle_pause_and_lock(void);
extern void cpuidle_resume_and_unlock(void);
extern void cpuidle_pause(void);
extern void cpuidle_resume(void);
extern int cpuidle_enable_device(struct cpuidle_device *dev);
extern void cpuidle_disable_device(struct cpuidle_device *dev);
extern int cpuidle_play_dead(void);
extern void cpuidle_enter_freeze(void);
extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
#else
static inline void disable_cpuidle(void) { }
static inline int cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{return -ENODEV; }
static inline int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index)
{return -ENODEV; }
static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { }
static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
{return -ENODEV; }
static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; }
static inline struct cpuidle_driver *cpuidle_driver_ref(void) {return NULL; }
static inline void cpuidle_driver_unref(void) {}
static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { }
static inline int cpuidle_register_device(struct cpuidle_device *dev)
{return -ENODEV; }
static inline void cpuidle_unregister_device(struct cpuidle_device *dev) { }
static inline int cpuidle_register(struct cpuidle_driver *drv,
const struct cpumask *const coupled_cpus)
{return -ENODEV; }
static inline void cpuidle_unregister(struct cpuidle_driver *drv) { }
static inline void cpuidle_pause_and_lock(void) { }
static inline void cpuidle_resume_and_unlock(void) { }
static inline void cpuidle_pause(void) { }
static inline void cpuidle_resume(void) { }
static inline int cpuidle_enable_device(struct cpuidle_device *dev)
{return -ENODEV; }
static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
static inline int cpuidle_play_dead(void) {return -ENODEV; }
static inline void cpuidle_enter_freeze(void) { }
static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
struct cpuidle_device *dev) {return NULL; }
#endif
#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a);
#else
static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a)
{
}
#endif
/******************************
* CPUIDLE GOVERNOR INTERFACE *
******************************/
struct cpuidle_governor {
char name[CPUIDLE_NAME_LEN];
struct list_head governor_list;
unsigned int rating;
int (*enable) (struct cpuidle_driver *drv,
struct cpuidle_device *dev);
void (*disable) (struct cpuidle_driver *drv,
struct cpuidle_device *dev);
int (*select) (struct cpuidle_driver *drv,
struct cpuidle_device *dev);
void (*reflect) (struct cpuidle_device *dev, int index);
struct module *owner;
};
#ifdef CONFIG_CPU_IDLE
extern int cpuidle_register_governor(struct cpuidle_governor *gov);
#else
static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
{return 0;}
#endif
#ifdef CONFIG_ARCH_HAS_CPU_RELAX
#define CPUIDLE_DRIVER_STATE_START 1
#else
#define CPUIDLE_DRIVER_STATE_START 0
#endif
#endif /* _LINUX_CPUIDLE_H */