linux-next/include/linux/clockchips.h
Thomas Gleixner 6ff7041dbf hrtimer: Fix migration expiry check
The timer migration expiry check should prevent the migration of a
timer to another CPU when the timer expires before the next event is
scheduled on the other CPU. Migrating the timer might delay it because
we can not reprogram the clock event device on the other CPU. But the
code implementing that check has two flaws:

- for !HIGHRES the check compares the expiry value with the clock
  events device expiry value which is wrong for CLOCK_REALTIME based
  timers.

- the check is racy. It holds the hrtimer base lock of the target CPU,
  but the clock event device expiry value can be modified
  nevertheless, e.g. by an timer interrupt firing.

The !HIGHRES case is easy to fix as we can enqueue the timer on the
cpu which was selected by the load balancer. It runs the idle
balancing code once per jiffy anyway. So the maximum delay for the
timer is the same as when we keep the tick on the current cpu going.

In the HIGHRES case we can get the next expiry value from the hrtimer
cpu_base of the target CPU and serialize the update with the cpu_base
lock. This moves the lock section in hrtimer_interrupt() so we can set
next_event to KTIME_MAX while we are handling the expired timers and
set it to the next expiry value after we handled the timers under the
base lock. While the expired timers are processed timer migration is
blocked because the expiry time of the timer is always <= KTIME_MAX.

Also remove the now useless clockevents_get_next_event() function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2009-07-10 17:32:55 +02:00

146 lines
4.2 KiB
C

/* linux/include/linux/clockchips.h
*
* This file contains the structure definitions for clockchips.
*
* If you are not a clockchip, or the time of day code, you should
* not be including this file!
*/
#ifndef _LINUX_CLOCKCHIPS_H
#define _LINUX_CLOCKCHIPS_H
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
#include <linux/clocksource.h>
#include <linux/cpumask.h>
#include <linux/ktime.h>
#include <linux/notifier.h>
struct clock_event_device;
/* Clock event mode commands */
enum clock_event_mode {
CLOCK_EVT_MODE_UNUSED = 0,
CLOCK_EVT_MODE_SHUTDOWN,
CLOCK_EVT_MODE_PERIODIC,
CLOCK_EVT_MODE_ONESHOT,
CLOCK_EVT_MODE_RESUME,
};
/* Clock event notification values */
enum clock_event_nofitiers {
CLOCK_EVT_NOTIFY_ADD,
CLOCK_EVT_NOTIFY_BROADCAST_ON,
CLOCK_EVT_NOTIFY_BROADCAST_OFF,
CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
CLOCK_EVT_NOTIFY_SUSPEND,
CLOCK_EVT_NOTIFY_RESUME,
CLOCK_EVT_NOTIFY_CPU_DYING,
CLOCK_EVT_NOTIFY_CPU_DEAD,
};
/*
* Clock event features
*/
#define CLOCK_EVT_FEAT_PERIODIC 0x000001
#define CLOCK_EVT_FEAT_ONESHOT 0x000002
/*
* x86(64) specific misfeatures:
*
* - Clockevent source stops in C3 State and needs broadcast support.
* - Local APIC timer is used as a dummy device.
*/
#define CLOCK_EVT_FEAT_C3STOP 0x000004
#define CLOCK_EVT_FEAT_DUMMY 0x000008
/**
* struct clock_event_device - clock event device descriptor
* @name: ptr to clock event name
* @features: features
* @max_delta_ns: maximum delta value in ns
* @min_delta_ns: minimum delta value in ns
* @mult: nanosecond to cycles multiplier
* @shift: nanoseconds to cycles divisor (power of two)
* @rating: variable to rate clock event devices
* @irq: IRQ number (only for non CPU local devices)
* @cpumask: cpumask to indicate for which CPUs this device works
* @set_next_event: set next event function
* @set_mode: set mode function
* @event_handler: Assigned by the framework to be called by the low
* level handler of the event source
* @broadcast: function to broadcast events
* @list: list head for the management code
* @mode: operating mode assigned by the management code
* @next_event: local storage for the next event in oneshot mode
*/
struct clock_event_device {
const char *name;
unsigned int features;
unsigned long max_delta_ns;
unsigned long min_delta_ns;
unsigned long mult;
int shift;
int rating;
int irq;
const struct cpumask *cpumask;
int (*set_next_event)(unsigned long evt,
struct clock_event_device *);
void (*set_mode)(enum clock_event_mode mode,
struct clock_event_device *);
void (*event_handler)(struct clock_event_device *);
void (*broadcast)(const struct cpumask *mask);
struct list_head list;
enum clock_event_mode mode;
ktime_t next_event;
};
/*
* Calculate a multiplication factor for scaled math, which is used to convert
* nanoseconds based values to clock ticks:
*
* clock_ticks = (nanoseconds * factor) >> shift.
*
* div_sc is the rearranged equation to calculate a factor from a given clock
* ticks / nanoseconds ratio:
*
* factor = (clock_ticks << shift) / nanoseconds
*/
static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec,
int shift)
{
uint64_t tmp = ((uint64_t)ticks) << shift;
do_div(tmp, nsec);
return (unsigned long) tmp;
}
/* Clock event layer functions */
extern unsigned long clockevent_delta2ns(unsigned long latch,
struct clock_event_device *evt);
extern void clockevents_register_device(struct clock_event_device *dev);
extern void clockevents_exchange_device(struct clock_event_device *old,
struct clock_event_device *new);
extern void clockevents_set_mode(struct clock_event_device *dev,
enum clock_event_mode mode);
extern int clockevents_register_notifier(struct notifier_block *nb);
extern int clockevents_program_event(struct clock_event_device *dev,
ktime_t expires, ktime_t now);
extern void clockevents_handle_noop(struct clock_event_device *dev);
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern void clockevents_notify(unsigned long reason, void *arg);
#else
# define clockevents_notify(reason, arg) do { } while (0)
#endif
#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */
#define clockevents_notify(reason, arg) do { } while (0)
#endif
#endif