linux-stable/kernel/time/posix-timers.c

1548 lines
42 KiB
C
Raw Normal View History

time: Add SPDX license identifiers Update the time(r) core files files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Philippe Ombredanne, Kate Stewart and myself. The data has been created with two independent license scanners and manual inspection. The following files do not contain any direct license information and have been omitted from the big initial SPDX changes: timeconst.bc: The .bc files were not touched time.c, timer.c, timekeeping.c: Licence was deduced from EXPORT_SYMBOL_GPL As those files do not contain direct license references they fall under the project license, i.e. GPL V2 only. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Kees Cook <keescook@chromium.org> Acked-by: Ingo Molnar <mingo@kernel.org> Acked-by: John Stultz <john.stultz@linaro.org> Acked-by: Corey Minyard <cminyard@mvista.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Kate Stewart <kstewart@linuxfoundation.org> Cc: Philippe Ombredanne <pombredanne@nexb.com> Cc: Russell King <rmk+kernel@armlinux.org.uk> Cc: Richard Cochran <richardcochran@gmail.com> Cc: Nicolas Pitre <nicolas.pitre@linaro.org> Cc: David Riley <davidriley@chromium.org> Cc: Colin Cross <ccross@android.com> Cc: Mark Brown <broonie@kernel.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Link: https://lkml.kernel.org/r/20181031182252.879109557@linutronix.de
2018-10-31 18:21:09 +00:00
// SPDX-License-Identifier: GPL-2.0+
/*
* 2002-10-15 Posix Clocks & timers
* by George Anzinger george@mvista.com
* Copyright (C) 2002 2003 by MontaVista Software.
*
* 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
* Copyright (C) 2004 Boris Hu
*
* These are all the functions necessary to implement POSIX clocks & timers
*/
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>
#include <linux/uaccess.h>
#include <linux/list.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/hash.h>
#include <linux/posix-clock.h>
#include <linux/posix-timers.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <linux/export.h>
#include <linux/hashtable.h>
#include <linux/compat.h>
#include <linux/nospec.h>
#include <linux/time_namespace.h>
#include "timekeeping.h"
#include "posix-timers.h"
static struct kmem_cache *posix_timers_cache;
/*
* Timers are managed in a hash table for lockless lookup. The hash key is
* constructed from current::signal and the timer ID and the timer is
* matched against current::signal and the timer ID when walking the hash
* bucket list.
*
* This allows checkpoint/restore to reconstruct the exact timer IDs for
* a process.
*/
static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
static DEFINE_SPINLOCK(hash_lock);
static const struct k_clock * const posix_clocks[];
static const struct k_clock *clockid_to_kclock(const clockid_t id);
static const struct k_clock clock_realtime, clock_monotonic;
/* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */
#if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
#endif
static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
#define lock_timer(tid, flags) \
({ struct k_itimer *__timr; \
__cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags)); \
__timr; \
})
static int hash(struct signal_struct *sig, unsigned int nr)
{
return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
}
static struct k_itimer *__posix_timers_find(struct hlist_head *head,
struct signal_struct *sig,
timer_t id)
{
struct k_itimer *timer;
hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&hash_lock)) {
/* timer->it_signal can be set concurrently */
if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id))
return timer;
}
return NULL;
}
static struct k_itimer *posix_timer_by_id(timer_t id)
{
struct signal_struct *sig = current->signal;
struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
return __posix_timers_find(head, sig, id);
}
static int posix_timer_add(struct k_itimer *timer)
{
struct signal_struct *sig = current->signal;
struct hlist_head *head;
unsigned int cnt, id;
/*
* FIXME: Replace this by a per signal struct xarray once there is
* a plan to handle the resulting CRIU regression gracefully.
*/
for (cnt = 0; cnt <= INT_MAX; cnt++) {
spin_lock(&hash_lock);
id = sig->next_posix_timer_id;
/* Write the next ID back. Clamp it to the positive space */
sig->next_posix_timer_id = (id + 1) & INT_MAX;
head = &posix_timers_hashtable[hash(sig, id)];
if (!__posix_timers_find(head, sig, id)) {
hlist_add_head_rcu(&timer->t_hash, head);
spin_unlock(&hash_lock);
return id;
}
spin_unlock(&hash_lock);
}
/* POSIX return code when no timer ID could be allocated */
return -EAGAIN;
}
static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
{
spin_unlock_irqrestore(&timr->it_lock, flags);
}
static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_real_ts64(tp);
return 0;
}
static ktime_t posix_get_realtime_ktime(clockid_t which_clock)
{
return ktime_get_real();
}
static int posix_clock_realtime_set(const clockid_t which_clock,
const struct timespec64 *tp)
{
return do_sys_settimeofday64(tp, NULL);
}
static int posix_clock_realtime_adj(const clockid_t which_clock,
struct __kernel_timex *t)
{
return do_adjtimex(t);
}
static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_ts64(tp);
timens_add_monotonic(tp);
return 0;
}
static ktime_t posix_get_monotonic_ktime(clockid_t which_clock)
{
return ktime_get();
}
static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_raw_ts64(tp);
timens_add_monotonic(tp);
return 0;
}
static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_coarse_real_ts64(tp);
return 0;
}
static int posix_get_monotonic_coarse(clockid_t which_clock,
struct timespec64 *tp)
{
ktime_get_coarse_ts64(tp);
timens_add_monotonic(tp);
return 0;
}
static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp)
{
*tp = ktime_to_timespec64(KTIME_LOW_RES);
return 0;
}
static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_boottime_ts64(tp);
timens_add_boottime(tp);
return 0;
}
static ktime_t posix_get_boottime_ktime(const clockid_t which_clock)
{
return ktime_get_boottime();
}
static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_clocktai_ts64(tp);
return 0;
}
static ktime_t posix_get_tai_ktime(clockid_t which_clock)
{
return ktime_get_clocktai();
}
static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp)
{
tp->tv_sec = 0;
tp->tv_nsec = hrtimer_resolution;
return 0;
}
static __init int init_posix_timers(void)
{
posix_timers_cache = kmem_cache_create("posix_timers_cache",
memcg: enable accounting for posix_timers_cache slab A program may create multiple interval timers using timer_create(). For each timer the kernel preallocates a "queued real-time signal", Consequently, the number of timers is limited by the RLIMIT_SIGPENDING resource limit. The allocated object is quite small, ~250 bytes, but even the default signal limits allow to consume up to 100 megabytes per user. It makes sense to account for them to limit the host's memory consumption from inside the memcg-limited container. Link: https://lkml.kernel.org/r/57795560-025c-267c-6b1a-dea852d95530@virtuozzo.com Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Shakeel Butt <shakeelb@google.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Andrei Vagin <avagin@gmail.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bp@suse.de> Cc: Christian Brauner <christian.brauner@ubuntu.com> Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "J. Bruce Fields" <bfields@fieldses.org> Cc: Jeff Layton <jlayton@kernel.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: Jiri Slaby <jirislaby@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kirill Tkhai <ktkhai@virtuozzo.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Roman Gushchin <guro@fb.com> Cc: Serge Hallyn <serge@hallyn.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Yutian Yang <nglaive@gmail.com> Cc: Zefan Li <lizefan.x@bytedance.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-09-02 21:55:39 +00:00
sizeof(struct k_itimer), 0,
SLAB_PANIC | SLAB_ACCOUNT, NULL);
return 0;
}
__initcall(init_posix_timers);
/*
* The siginfo si_overrun field and the return value of timer_getoverrun(2)
* are of type int. Clamp the overrun value to INT_MAX
*/
static inline int timer_overrun_to_int(struct k_itimer *timr)
{
if (timr->it_overrun_last > (s64)INT_MAX)
return INT_MAX;
return (int)timr->it_overrun_last;
}
static void common_hrtimer_rearm(struct k_itimer *timr)
{
struct hrtimer *timer = &timr->it.real.timer;
timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
timr->it_interval);
hrtimer_restart(timer);
}
static bool __posixtimer_deliver_signal(struct kernel_siginfo *info, struct k_itimer *timr)
{
guard(spinlock)(&timr->it_lock);
/*
* Check if the timer is still alive or whether it got modified
* since the signal was queued. In either case, don't rearm and
* drop the signal.
*/
if (timr->it_signal_seq != timr->it_sigqueue_seq || WARN_ON_ONCE(!timr->it_signal))
return false;
if (!timr->it_interval || WARN_ON_ONCE(timr->it_status != POSIX_TIMER_REQUEUE_PENDING))
return true;
timr->kclock->timer_rearm(timr);
timr->it_status = POSIX_TIMER_ARMED;
timr->it_overrun_last = timr->it_overrun;
timr->it_overrun = -1LL;
++timr->it_signal_seq;
info->si_overrun = timer_overrun_to_int(timr);
return true;
}
/*
* This function is called from the signal delivery code. It decides
* whether the signal should be dropped and rearms interval timers. The
* timer can be unconditionally accessed as there is a reference held on
* it.
*/
bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq)
{
struct k_itimer *timr = container_of(timer_sigq, struct k_itimer, sigq);
bool ret;
/*
* Release siglock to ensure proper locking order versus
* timr::it_lock. Keep interrupts disabled.
*/
spin_unlock(&current->sighand->siglock);
ret = __posixtimer_deliver_signal(info, timr);
/* Drop the reference which was acquired when the signal was queued */
posixtimer_putref(timr);
spin_lock(&current->sighand->siglock);
return ret;
}
void posix_timer_queue_signal(struct k_itimer *timr)
{
lockdep_assert_held(&timr->it_lock);
timr->it_status = timr->it_interval ? POSIX_TIMER_REQUEUE_PENDING : POSIX_TIMER_DISARMED;
posixtimer_send_sigqueue(timr);
}
/*
* This function gets called when a POSIX.1b interval timer expires from
* the HRTIMER interrupt (soft interrupt on RT kernels).
*
* Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI
* based timers.
*/
static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
{
struct k_itimer *timr = container_of(timer, struct k_itimer, it.real.timer);
guard(spinlock_irqsave)(&timr->it_lock);
posix_timer_queue_signal(timr);
return HRTIMER_NORESTART;
}
static struct pid *good_sigevent(sigevent_t * event)
{
struct pid *pid = task_tgid(current);
struct task_struct *rtn;
switch (event->sigev_notify) {
case SIGEV_SIGNAL | SIGEV_THREAD_ID:
pid = find_vpid(event->sigev_notify_thread_id);
rtn = pid_task(pid, PIDTYPE_PID);
if (!rtn || !same_thread_group(rtn, current))
return NULL;
fallthrough;
case SIGEV_SIGNAL:
case SIGEV_THREAD:
if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
return NULL;
fallthrough;
case SIGEV_NONE:
return pid;
default:
return NULL;
}
}
static struct k_itimer *alloc_posix_timer(void)
{
struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
if (!tmr)
return tmr;
if (unlikely(!posixtimer_init_sigqueue(&tmr->sigq))) {
kmem_cache_free(posix_timers_cache, tmr);
return NULL;
}
rcuref_init(&tmr->rcuref, 1);
return tmr;
}
void posixtimer_free_timer(struct k_itimer *tmr)
{
put_pid(tmr->it_pid);
if (tmr->sigq.ucounts)
dec_rlimit_put_ucounts(tmr->sigq.ucounts, UCOUNT_RLIMIT_SIGPENDING);
kfree_rcu(tmr, rcu);
}
static void posix_timer_unhash_and_free(struct k_itimer *tmr)
{
spin_lock(&hash_lock);
hlist_del_rcu(&tmr->t_hash);
spin_unlock(&hash_lock);
posixtimer_putref(tmr);
}
static int common_timer_create(struct k_itimer *new_timer)
{
hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
return 0;
}
/* Create a POSIX.1b interval timer. */
static int do_timer_create(clockid_t which_clock, struct sigevent *event,
timer_t __user *created_timer_id)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct k_itimer *new_timer;
int error, new_timer_id;
if (!kc)
return -EINVAL;
if (!kc->timer_create)
return -EOPNOTSUPP;
new_timer = alloc_posix_timer();
if (unlikely(!new_timer))
return -EAGAIN;
spin_lock_init(&new_timer->it_lock);
/*
* Add the timer to the hash table. The timer is not yet valid
* because new_timer::it_signal is still NULL. The timer id is also
* not yet visible to user space.
*/
new_timer_id = posix_timer_add(new_timer);
if (new_timer_id < 0) {
posixtimer_free_timer(new_timer);
return new_timer_id;
}
new_timer->it_id = (timer_t) new_timer_id;
new_timer->it_clock = which_clock;
new_timer->kclock = kc;
new_timer->it_overrun = -1LL;
if (event) {
rcu_read_lock();
new_timer->it_pid = get_pid(good_sigevent(event));
rcu_read_unlock();
if (!new_timer->it_pid) {
error = -EINVAL;
goto out;
}
new_timer->it_sigev_notify = event->sigev_notify;
new_timer->sigq.info.si_signo = event->sigev_signo;
new_timer->sigq.info.si_value = event->sigev_value;
} else {
new_timer->it_sigev_notify = SIGEV_SIGNAL;
new_timer->sigq.info.si_signo = SIGALRM;
memset(&new_timer->sigq.info.si_value, 0, sizeof(sigval_t));
new_timer->sigq.info.si_value.sival_int = new_timer->it_id;
new_timer->it_pid = get_pid(task_tgid(current));
}
if (new_timer->it_sigev_notify & SIGEV_THREAD_ID)
new_timer->it_pid_type = PIDTYPE_PID;
else
new_timer->it_pid_type = PIDTYPE_TGID;
new_timer->sigq.info.si_tid = new_timer->it_id;
new_timer->sigq.info.si_code = SI_TIMER;
if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) {
error = -EFAULT;
goto out;
}
/*
* After succesful copy out, the timer ID is visible to user space
* now but not yet valid because new_timer::signal is still NULL.
*
* Complete the initialization with the clock specific create
* callback.
*/
error = kc->timer_create(new_timer);
if (error)
goto out;
spin_lock_irq(&current->sighand->siglock);
/* This makes the timer valid in the hash table */
WRITE_ONCE(new_timer->it_signal, current->signal);
hlist_add_head(&new_timer->list, &current->signal->posix_timers);
spin_unlock_irq(&current->sighand->siglock);
/*
* After unlocking sighand::siglock @new_timer is subject to
* concurrent removal and cannot be touched anymore
*/
return 0;
out:
posix_timer_unhash_and_free(new_timer);
return error;
}
SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
struct sigevent __user *, timer_event_spec,
timer_t __user *, created_timer_id)
{
if (timer_event_spec) {
sigevent_t event;
if (copy_from_user(&event, timer_event_spec, sizeof (event)))
return -EFAULT;
return do_timer_create(which_clock, &event, created_timer_id);
}
return do_timer_create(which_clock, NULL, created_timer_id);
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock,
struct compat_sigevent __user *, timer_event_spec,
timer_t __user *, created_timer_id)
{
if (timer_event_spec) {
sigevent_t event;
if (get_compat_sigevent(&event, timer_event_spec))
return -EFAULT;
return do_timer_create(which_clock, &event, created_timer_id);
}
return do_timer_create(which_clock, NULL, created_timer_id);
}
#endif
static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
{
struct k_itimer *timr;
/*
* timer_t could be any type >= int and we want to make sure any
* @timer_id outside positive int range fails lookup.
*/
if ((unsigned long long)timer_id > INT_MAX)
return NULL;
/*
* The hash lookup and the timers are RCU protected.
*
* Timers are added to the hash in invalid state where
* timr::it_signal == NULL. timer::it_signal is only set after the
* rest of the initialization succeeded.
*
* Timer destruction happens in steps:
* 1) Set timr::it_signal to NULL with timr::it_lock held
* 2) Release timr::it_lock
* 3) Remove from the hash under hash_lock
* 4) Put the reference count.
*
* The reference count might not drop to zero if timr::sigq is
* queued. In that case the signal delivery or flush will put the
* last reference count.
*
* When the reference count reaches zero, the timer is scheduled
* for RCU removal after the grace period.
*
* Holding rcu_read_lock() accross the lookup ensures that
* the timer cannot be freed.
*
* The lookup validates locklessly that timr::it_signal ==
* current::it_signal and timr::it_id == @timer_id. timr::it_id
* can't change, but timr::it_signal becomes NULL during
* destruction.
*/
rcu_read_lock();
timr = posix_timer_by_id(timer_id);
if (timr) {
spin_lock_irqsave(&timr->it_lock, *flags);
/*
* Validate under timr::it_lock that timr::it_signal is
* still valid. Pairs with #1 above.
*/
if (timr->it_signal == current->signal) {
rcu_read_unlock();
return timr;
}
spin_unlock_irqrestore(&timr->it_lock, *flags);
}
rcu_read_unlock();
return NULL;
}
static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now)
{
struct hrtimer *timer = &timr->it.real.timer;
return __hrtimer_expires_remaining_adjusted(timer, now);
}
static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now)
{
struct hrtimer *timer = &timr->it.real.timer;
return hrtimer_forward(timer, now, timr->it_interval);
}
/*
* Get the time remaining on a POSIX.1b interval timer.
*
* Two issues to handle here:
*
* 1) The timer has a requeue pending. The return value must appear as
* if the timer has been requeued right now.
*
* 2) The timer is a SIGEV_NONE timer. These timers are never enqueued
* into the hrtimer queue and therefore never expired. Emulate expiry
* here taking #1 into account.
*/
void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
{
const struct k_clock *kc = timr->kclock;
ktime_t now, remaining, iv;
bool sig_none;
sig_none = timr->it_sigev_notify == SIGEV_NONE;
iv = timr->it_interval;
/* interval timer ? */
if (iv) {
cur_setting->it_interval = ktime_to_timespec64(iv);
} else if (timr->it_status == POSIX_TIMER_DISARMED) {
/*
* SIGEV_NONE oneshot timers are never queued and therefore
* timr->it_status is always DISARMED. The check below
* vs. remaining time will handle this case.
*
* For all other timers there is nothing to update here, so
* return.
*/
if (!sig_none)
return;
}
now = kc->clock_get_ktime(timr->it_clock);
/*
* If this is an interval timer and either has requeue pending or
* is a SIGEV_NONE timer move the expiry time forward by intervals,
* so expiry is > now.
*/
if (iv && timr->it_status != POSIX_TIMER_ARMED)
timr->it_overrun += kc->timer_forward(timr, now);
remaining = kc->timer_remaining(timr, now);
/*
* As @now is retrieved before a possible timer_forward() and
* cannot be reevaluated by the compiler @remaining is based on the
* same @now value. Therefore @remaining is consistent vs. @now.
*
* Consequently all interval timers, i.e. @iv > 0, cannot have a
* remaining time <= 0 because timer_forward() guarantees to move
* them forward so that the next timer expiry is > @now.
*/
if (remaining <= 0) {
/*
* A single shot SIGEV_NONE timer must return 0, when it is
* expired! Timers which have a real signal delivery mode
* must return a remaining time greater than 0 because the
* signal has not yet been delivered.
*/
if (!sig_none)
cur_setting->it_value.tv_nsec = 1;
} else {
cur_setting->it_value = ktime_to_timespec64(remaining);
}
}
static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting)
{
const struct k_clock *kc;
struct k_itimer *timr;
unsigned long flags;
int ret = 0;
timr = lock_timer(timer_id, &flags);
if (!timr)
return -EINVAL;
memset(setting, 0, sizeof(*setting));
kc = timr->kclock;
if (WARN_ON_ONCE(!kc || !kc->timer_get))
ret = -EINVAL;
else
kc->timer_get(timr, setting);
unlock_timer(timr, flags);
return ret;
}
/* Get the time remaining on a POSIX.1b interval timer. */
SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
struct __kernel_itimerspec __user *, setting)
{
struct itimerspec64 cur_setting;
int ret = do_timer_gettime(timer_id, &cur_setting);
if (!ret) {
if (put_itimerspec64(&cur_setting, setting))
ret = -EFAULT;
}
return ret;
}
#ifdef CONFIG_COMPAT_32BIT_TIME
SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id,
struct old_itimerspec32 __user *, setting)
{
struct itimerspec64 cur_setting;
int ret = do_timer_gettime(timer_id, &cur_setting);
if (!ret) {
y2038: globally rename compat_time to old_time32 Christoph Hellwig suggested a slightly different path for handling backwards compatibility with the 32-bit time_t based system calls: Rather than simply reusing the compat_sys_* entry points on 32-bit architectures unchanged, we get rid of those entry points and the compat_time types by renaming them to something that makes more sense on 32-bit architectures (which don't have a compat mode otherwise), and then share the entry points under the new name with the 64-bit architectures that use them for implementing the compatibility. The following types and interfaces are renamed here, and moved from linux/compat_time.h to linux/time32.h: old new --- --- compat_time_t old_time32_t struct compat_timeval struct old_timeval32 struct compat_timespec struct old_timespec32 struct compat_itimerspec struct old_itimerspec32 ns_to_compat_timeval() ns_to_old_timeval32() get_compat_itimerspec64() get_old_itimerspec32() put_compat_itimerspec64() put_old_itimerspec32() compat_get_timespec64() get_old_timespec32() compat_put_timespec64() put_old_timespec32() As we already have aliases in place, this patch addresses only the instances that are relevant to the system call interface in particular, not those that occur in device drivers and other modules. Those will get handled separately, while providing the 64-bit version of the respective interfaces. I'm not renaming the timex, rusage and itimerval structures, as we are still debating what the new interface will look like, and whether we will need a replacement at all. This also doesn't change the names of the syscall entry points, which can be done more easily when we actually switch over the 32-bit architectures to use them, at that point we need to change COMPAT_SYSCALL_DEFINEx to SYSCALL_DEFINEx with a new name, e.g. with a _time32 suffix. Suggested-by: Christoph Hellwig <hch@infradead.org> Link: https://lore.kernel.org/lkml/20180705222110.GA5698@infradead.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-07-13 10:52:28 +00:00
if (put_old_itimerspec32(&cur_setting, setting))
ret = -EFAULT;
}
return ret;
}
#endif
/**
* sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer
* @timer_id: The timer ID which identifies the timer
*
* The "overrun count" of a timer is one plus the number of expiration
* intervals which have elapsed between the first expiry, which queues the
* signal and the actual signal delivery. On signal delivery the "overrun
* count" is calculated and cached, so it can be returned directly here.
*
* As this is relative to the last queued signal the returned overrun count
* is meaningless outside of the signal delivery path and even there it
* does not accurately reflect the current state when user space evaluates
* it.
*
* Returns:
* -EINVAL @timer_id is invalid
* 1..INT_MAX The number of overruns related to the last delivered signal
*/
SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
{
struct k_itimer *timr;
unsigned long flags;
int overrun;
timr = lock_timer(timer_id, &flags);
if (!timr)
return -EINVAL;
overrun = timer_overrun_to_int(timr);
unlock_timer(timr, flags);
return overrun;
}
static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires,
bool absolute, bool sigev_none)
{
struct hrtimer *timer = &timr->it.real.timer;
enum hrtimer_mode mode;
mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
/*
* Posix magic: Relative CLOCK_REALTIME timers are not affected by
* clock modifications, so they become CLOCK_MONOTONIC based under the
* hood. See hrtimer_init(). Update timr->kclock, so the generic
* functions which use timr->kclock->clock_get_*() work.
*
* Note: it_clock stays unmodified, because the next timer_set() might
* use ABSTIME, so it needs to switch back.
*/
if (timr->it_clock == CLOCK_REALTIME)
timr->kclock = absolute ? &clock_realtime : &clock_monotonic;
hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
timr->it.real.timer.function = posix_timer_fn;
if (!absolute)
expires = ktime_add_safe(expires, timer->base->get_time());
hrtimer_set_expires(timer, expires);
if (!sigev_none)
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
{
return hrtimer_try_to_cancel(&timr->it.real.timer);
}
static void common_timer_wait_running(struct k_itimer *timer)
{
hrtimer_cancel_wait_running(&timer->it.real.timer);
}
/*
* On PREEMPT_RT this prevents priority inversion and a potential livelock
* against the ksoftirqd thread in case that ksoftirqd gets preempted while
* executing a hrtimer callback.
*
* See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this
* just results in a cpu_relax().
*
* For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is
* just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this
* prevents spinning on an eventually scheduled out task and a livelock
* when the task which tries to delete or disarm the timer has preempted
* the task which runs the expiry in task work context.
*/
static struct k_itimer *timer_wait_running(struct k_itimer *timer,
unsigned long *flags)
{
const struct k_clock *kc = READ_ONCE(timer->kclock);
timer_t timer_id = READ_ONCE(timer->it_id);
/* Prevent kfree(timer) after dropping the lock */
rcu_read_lock();
unlock_timer(timer, *flags);
posix-cpu-timers: Implement the missing timer_wait_running callback For some unknown reason the introduction of the timer_wait_running callback missed to fixup posix CPU timers, which went unnoticed for almost four years. Marco reported recently that the WARN_ON() in timer_wait_running() triggers with a posix CPU timer test case. Posix CPU timers have two execution models for expiring timers depending on CONFIG_POSIX_CPU_TIMERS_TASK_WORK: 1) If not enabled, the expiry happens in hard interrupt context so spin waiting on the remote CPU is reasonably time bound. Implement an empty stub function for that case. 2) If enabled, the expiry happens in task work before returning to user space or guest mode. The expired timers are marked as firing and moved from the timer queue to a local list head with sighand lock held. Once the timers are moved, sighand lock is dropped and the expiry happens in fully preemptible context. That means the expiring task can be scheduled out, migrated, interrupted etc. So spin waiting on it is more than suboptimal. The timer wheel has a timer_wait_running() mechanism for RT, which uses a per CPU timer-base expiry lock which is held by the expiry code and the task waiting for the timer function to complete blocks on that lock. This does not work in the same way for posix CPU timers as there is no timer base and expiry for process wide timers can run on any task belonging to that process, but the concept of waiting on an expiry lock can be used too in a slightly different way: - Add a mutex to struct posix_cputimers_work. This struct is per task and used to schedule the expiry task work from the timer interrupt. - Add a task_struct pointer to struct cpu_timer which is used to store a the task which runs the expiry. That's filled in when the task moves the expired timers to the local expiry list. That's not affecting the size of the k_itimer union as there are bigger union members already - Let the task take the expiry mutex around the expiry function - Let the waiter acquire a task reference with rcu_read_lock() held and block on the expiry mutex This avoids spin-waiting on a task which might not even be on a CPU and works nicely for RT too. Fixes: ec8f954a40da ("posix-timers: Use a callback for cancel synchronization on PREEMPT_RT") Reported-by: Marco Elver <elver@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Marco Elver <elver@google.com> Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87zg764ojw.ffs@tglx
2023-04-17 13:37:55 +00:00
/*
* kc->timer_wait_running() might drop RCU lock. So @timer
* cannot be touched anymore after the function returns!
*/
if (!WARN_ON_ONCE(!kc->timer_wait_running))
kc->timer_wait_running(timer);
rcu_read_unlock();
/* Relock the timer. It might be not longer hashed. */
return lock_timer(timer_id, flags);
}
/*
* Set up the new interval and reset the signal delivery data
*/
void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting)
{
if (new_setting->it_value.tv_sec || new_setting->it_value.tv_nsec)
timer->it_interval = timespec64_to_ktime(new_setting->it_interval);
else
timer->it_interval = 0;
/* Reset overrun accounting */
timer->it_overrun_last = 0;
timer->it_overrun = -1LL;
}
/* Set a POSIX.1b interval timer. */
int common_timer_set(struct k_itimer *timr, int flags,
struct itimerspec64 *new_setting,
struct itimerspec64 *old_setting)
{
const struct k_clock *kc = timr->kclock;
bool sigev_none;
ktime_t expires;
if (old_setting)
common_timer_get(timr, old_setting);
/*
* Careful here. On SMP systems the timer expiry function could be
* active and spinning on timr->it_lock.
*/
if (kc->timer_try_to_cancel(timr) < 0)
return TIMER_RETRY;
timr->it_status = POSIX_TIMER_DISARMED;
posix_timer_set_common(timr, new_setting);
/* Keep timer disarmed when it_value is zero */
if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
return 0;
expires = timespec64_to_ktime(new_setting->it_value);
if (flags & TIMER_ABSTIME)
expires = timens_ktime_to_host(timr->it_clock, expires);
sigev_none = timr->it_sigev_notify == SIGEV_NONE;
kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
if (!sigev_none)
timr->it_status = POSIX_TIMER_ARMED;
return 0;
}
static int do_timer_settime(timer_t timer_id, int tmr_flags,
struct itimerspec64 *new_spec64,
struct itimerspec64 *old_spec64)
{
const struct k_clock *kc;
struct k_itimer *timr;
unsigned long flags;
int error;
if (!timespec64_valid(&new_spec64->it_interval) ||
!timespec64_valid(&new_spec64->it_value))
return -EINVAL;
if (old_spec64)
memset(old_spec64, 0, sizeof(*old_spec64));
timr = lock_timer(timer_id, &flags);
retry:
if (!timr)
return -EINVAL;
if (old_spec64)
old_spec64->it_interval = ktime_to_timespec64(timr->it_interval);
/* Prevent signal delivery and rearming. */
timr->it_signal_seq++;
kc = timr->kclock;
if (WARN_ON_ONCE(!kc || !kc->timer_set))
error = -EINVAL;
else
error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64);
if (error == TIMER_RETRY) {
// We already got the old time...
old_spec64 = NULL;
/* Unlocks and relocks the timer if it still exists */
timr = timer_wait_running(timr, &flags);
goto retry;
}
unlock_timer(timr, flags);
return error;
}
/* Set a POSIX.1b interval timer */
SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
const struct __kernel_itimerspec __user *, new_setting,
struct __kernel_itimerspec __user *, old_setting)
{
struct itimerspec64 new_spec, old_spec, *rtn;
int error = 0;
if (!new_setting)
return -EINVAL;
if (get_itimerspec64(&new_spec, new_setting))
return -EFAULT;
rtn = old_setting ? &old_spec : NULL;
error = do_timer_settime(timer_id, flags, &new_spec, rtn);
if (!error && old_setting) {
if (put_itimerspec64(&old_spec, old_setting))
error = -EFAULT;
}
return error;
}
#ifdef CONFIG_COMPAT_32BIT_TIME
SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags,
struct old_itimerspec32 __user *, new,
struct old_itimerspec32 __user *, old)
{
struct itimerspec64 new_spec, old_spec;
struct itimerspec64 *rtn = old ? &old_spec : NULL;
int error = 0;
if (!new)
return -EINVAL;
y2038: globally rename compat_time to old_time32 Christoph Hellwig suggested a slightly different path for handling backwards compatibility with the 32-bit time_t based system calls: Rather than simply reusing the compat_sys_* entry points on 32-bit architectures unchanged, we get rid of those entry points and the compat_time types by renaming them to something that makes more sense on 32-bit architectures (which don't have a compat mode otherwise), and then share the entry points under the new name with the 64-bit architectures that use them for implementing the compatibility. The following types and interfaces are renamed here, and moved from linux/compat_time.h to linux/time32.h: old new --- --- compat_time_t old_time32_t struct compat_timeval struct old_timeval32 struct compat_timespec struct old_timespec32 struct compat_itimerspec struct old_itimerspec32 ns_to_compat_timeval() ns_to_old_timeval32() get_compat_itimerspec64() get_old_itimerspec32() put_compat_itimerspec64() put_old_itimerspec32() compat_get_timespec64() get_old_timespec32() compat_put_timespec64() put_old_timespec32() As we already have aliases in place, this patch addresses only the instances that are relevant to the system call interface in particular, not those that occur in device drivers and other modules. Those will get handled separately, while providing the 64-bit version of the respective interfaces. I'm not renaming the timex, rusage and itimerval structures, as we are still debating what the new interface will look like, and whether we will need a replacement at all. This also doesn't change the names of the syscall entry points, which can be done more easily when we actually switch over the 32-bit architectures to use them, at that point we need to change COMPAT_SYSCALL_DEFINEx to SYSCALL_DEFINEx with a new name, e.g. with a _time32 suffix. Suggested-by: Christoph Hellwig <hch@infradead.org> Link: https://lore.kernel.org/lkml/20180705222110.GA5698@infradead.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-07-13 10:52:28 +00:00
if (get_old_itimerspec32(&new_spec, new))
return -EFAULT;
error = do_timer_settime(timer_id, flags, &new_spec, rtn);
if (!error && old) {
y2038: globally rename compat_time to old_time32 Christoph Hellwig suggested a slightly different path for handling backwards compatibility with the 32-bit time_t based system calls: Rather than simply reusing the compat_sys_* entry points on 32-bit architectures unchanged, we get rid of those entry points and the compat_time types by renaming them to something that makes more sense on 32-bit architectures (which don't have a compat mode otherwise), and then share the entry points under the new name with the 64-bit architectures that use them for implementing the compatibility. The following types and interfaces are renamed here, and moved from linux/compat_time.h to linux/time32.h: old new --- --- compat_time_t old_time32_t struct compat_timeval struct old_timeval32 struct compat_timespec struct old_timespec32 struct compat_itimerspec struct old_itimerspec32 ns_to_compat_timeval() ns_to_old_timeval32() get_compat_itimerspec64() get_old_itimerspec32() put_compat_itimerspec64() put_old_itimerspec32() compat_get_timespec64() get_old_timespec32() compat_put_timespec64() put_old_timespec32() As we already have aliases in place, this patch addresses only the instances that are relevant to the system call interface in particular, not those that occur in device drivers and other modules. Those will get handled separately, while providing the 64-bit version of the respective interfaces. I'm not renaming the timex, rusage and itimerval structures, as we are still debating what the new interface will look like, and whether we will need a replacement at all. This also doesn't change the names of the syscall entry points, which can be done more easily when we actually switch over the 32-bit architectures to use them, at that point we need to change COMPAT_SYSCALL_DEFINEx to SYSCALL_DEFINEx with a new name, e.g. with a _time32 suffix. Suggested-by: Christoph Hellwig <hch@infradead.org> Link: https://lore.kernel.org/lkml/20180705222110.GA5698@infradead.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-07-13 10:52:28 +00:00
if (put_old_itimerspec32(&old_spec, old))
error = -EFAULT;
}
return error;
}
#endif
int common_timer_del(struct k_itimer *timer)
{
const struct k_clock *kc = timer->kclock;
if (kc->timer_try_to_cancel(timer) < 0)
return TIMER_RETRY;
timer->it_status = POSIX_TIMER_DISARMED;
return 0;
}
/*
* If the deleted timer is on the ignored list, remove it and
* drop the associated reference.
*/
static inline void posix_timer_cleanup_ignored(struct k_itimer *tmr)
{
if (!hlist_unhashed(&tmr->ignored_list)) {
hlist_del_init(&tmr->ignored_list);
posixtimer_putref(tmr);
}
}
static inline int timer_delete_hook(struct k_itimer *timer)
{
const struct k_clock *kc = timer->kclock;
/* Prevent signal delivery and rearming. */
timer->it_signal_seq++;
if (WARN_ON_ONCE(!kc || !kc->timer_del))
return -EINVAL;
return kc->timer_del(timer);
}
/* Delete a POSIX.1b interval timer. */
SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
{
struct k_itimer *timer;
unsigned long flags;
timer = lock_timer(timer_id, &flags);
retry_delete:
if (!timer)
return -EINVAL;
if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) {
/* Unlocks and relocks the timer if it still exists */
timer = timer_wait_running(timer, &flags);
goto retry_delete;
}
spin_lock(&current->sighand->siglock);
hlist_del(&timer->list);
posix_timer_cleanup_ignored(timer);
/*
* A concurrent lookup could check timer::it_signal lockless. It
* will reevaluate with timer::it_lock held and observe the NULL.
signal: Queue ignored posixtimers on ignore list Queue posixtimers which have their signal ignored on the ignored list: 1) When the timer fires and the signal has SIG_IGN set 2) When SIG_IGN is installed via sigaction() and a timer signal is already queued This only happens when the signal is for a valid timer, which delivered the signal in periodic mode. One-shot timer signals are correctly dropped. Due to the lock order constraints (sighand::siglock nests inside timer::lock) the signal code cannot access any of the timer fields which are relevant to make this decision, e.g. timer::it_status. This is addressed by establishing a protection scheme which requires to lock both locks on the timer side for modifying decision fields in the timer struct and therefore makes it possible for the signal delivery to evaluate with only sighand:siglock being held: 1) Move the NULLification of timer->it_signal into the sighand::siglock protected section of timer_delete() and check timer::it_signal in the code path which determines whether the signal is dropped or queued on the ignore list. This ensures that a deleted timer cannot be moved onto the ignore list, which would prevent it from being freed on exit() as it is not longer in the process' posix timer list. If the timer got moved to the ignored list before deletion then it is removed from the ignored list under sighand lock in timer_delete(). 2) Provide a new timer::it_sig_periodic flag, which gets set in the signal queue path with both timer and sighand locks held if the timer is actually in periodic mode at expiry time. The ignore list code checks this flag under sighand::siglock and drops the signal when it is not set. If it is set, then the signal is moved to the ignored list independent of the actual state of the timer. When the signal is un-ignored later then the signal is moved back to the signal queue. On signal delivery the posix timer side decides about dropping the signal if the timer was re-armed, dis-armed or deleted based on the signal sequence counter check. If the thread/process exits then not yet delivered signals are discarded which means the reference of the timer containing the sigqueue is dropped and frees the timer. This is way cheaper than requiring all code paths to lock sighand::siglock of the target thread/process on any modification of timer::it_status or going all the way and removing pending signals from the signal queues on every rearm, disarm or delete operation. So the protection scheme here is that on the timer side both timer::lock and sighand::siglock have to be held for modifying timer::it_signal timer::it_sig_periodic which means that on the signal side holding sighand::siglock is enough to evaluate these fields. In posixtimer_deliver_signal() holding timer::lock is sufficient to do the sequence validation against timer::it_signal_seq because a concurrent expiry is waiting on timer::lock to be released. This completes the SIG_IGN handling and such timers are not longer self rearmed which avoids pointless wakeups. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/all/20241105064214.120756416@linutronix.de
2024-11-05 08:14:54 +00:00
*
* It must be written with siglock held so that the signal code
* observes timer->it_signal == NULL in do_sigaction(SIG_IGN),
* which prevents it from moving a pending signal of a deleted
* timer to the ignore list.
*/
WRITE_ONCE(timer->it_signal, NULL);
signal: Queue ignored posixtimers on ignore list Queue posixtimers which have their signal ignored on the ignored list: 1) When the timer fires and the signal has SIG_IGN set 2) When SIG_IGN is installed via sigaction() and a timer signal is already queued This only happens when the signal is for a valid timer, which delivered the signal in periodic mode. One-shot timer signals are correctly dropped. Due to the lock order constraints (sighand::siglock nests inside timer::lock) the signal code cannot access any of the timer fields which are relevant to make this decision, e.g. timer::it_status. This is addressed by establishing a protection scheme which requires to lock both locks on the timer side for modifying decision fields in the timer struct and therefore makes it possible for the signal delivery to evaluate with only sighand:siglock being held: 1) Move the NULLification of timer->it_signal into the sighand::siglock protected section of timer_delete() and check timer::it_signal in the code path which determines whether the signal is dropped or queued on the ignore list. This ensures that a deleted timer cannot be moved onto the ignore list, which would prevent it from being freed on exit() as it is not longer in the process' posix timer list. If the timer got moved to the ignored list before deletion then it is removed from the ignored list under sighand lock in timer_delete(). 2) Provide a new timer::it_sig_periodic flag, which gets set in the signal queue path with both timer and sighand locks held if the timer is actually in periodic mode at expiry time. The ignore list code checks this flag under sighand::siglock and drops the signal when it is not set. If it is set, then the signal is moved to the ignored list independent of the actual state of the timer. When the signal is un-ignored later then the signal is moved back to the signal queue. On signal delivery the posix timer side decides about dropping the signal if the timer was re-armed, dis-armed or deleted based on the signal sequence counter check. If the thread/process exits then not yet delivered signals are discarded which means the reference of the timer containing the sigqueue is dropped and frees the timer. This is way cheaper than requiring all code paths to lock sighand::siglock of the target thread/process on any modification of timer::it_status or going all the way and removing pending signals from the signal queues on every rearm, disarm or delete operation. So the protection scheme here is that on the timer side both timer::lock and sighand::siglock have to be held for modifying timer::it_signal timer::it_sig_periodic which means that on the signal side holding sighand::siglock is enough to evaluate these fields. In posixtimer_deliver_signal() holding timer::lock is sufficient to do the sequence validation against timer::it_signal_seq because a concurrent expiry is waiting on timer::lock to be released. This completes the SIG_IGN handling and such timers are not longer self rearmed which avoids pointless wakeups. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/all/20241105064214.120756416@linutronix.de
2024-11-05 08:14:54 +00:00
spin_unlock(&current->sighand->siglock);
unlock_timer(timer, flags);
posix_timer_unhash_and_free(timer);
return 0;
}
/*
* Delete a timer if it is armed, remove it from the hash and schedule it
* for RCU freeing.
*/
static void itimer_delete(struct k_itimer *timer)
{
unsigned long flags;
/*
* irqsave is required to make timer_wait_running() work.
*/
spin_lock_irqsave(&timer->it_lock, flags);
retry_delete:
/*
* Even if the timer is not longer accessible from other tasks
* it still might be armed and queued in the underlying timer
* mechanism. Worse, that timer mechanism might run the expiry
* function concurrently.
*/
if (timer_delete_hook(timer) == TIMER_RETRY) {
/*
* Timer is expired concurrently, prevent livelocks
* and pointless spinning on RT.
*
* timer_wait_running() drops timer::it_lock, which opens
* the possibility for another task to delete the timer.
*
* That's not possible here because this is invoked from
* do_exit() only for the last thread of the thread group.
* So no other task can access and delete that timer.
*/
if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer))
return;
goto retry_delete;
}
hlist_del(&timer->list);
posix_timer_cleanup_ignored(timer);
/*
* Setting timer::it_signal to NULL is technically not required
* here as nothing can access the timer anymore legitimately via
* the hash table. Set it to NULL nevertheless so that all deletion
* paths are consistent.
*/
WRITE_ONCE(timer->it_signal, NULL);
spin_unlock_irqrestore(&timer->it_lock, flags);
posix_timer_unhash_and_free(timer);
}
/*
* Invoked from do_exit() when the last thread of a thread group exits.
* At that point no other task can access the timers of the dying
* task anymore.
*/
void exit_itimers(struct task_struct *tsk)
{
struct hlist_head timers;
if (hlist_empty(&tsk->signal->posix_timers))
return;
/* Protect against concurrent read via /proc/$PID/timers */
spin_lock_irq(&tsk->sighand->siglock);
hlist_move_list(&tsk->signal->posix_timers, &timers);
spin_unlock_irq(&tsk->sighand->siglock);
/* The timers are not longer accessible via tsk::signal */
while (!hlist_empty(&timers))
itimer_delete(hlist_entry(timers.first, struct k_itimer, list));
/*
* There should be no timers on the ignored list. itimer_delete() has
* mopped them up.
*/
if (!WARN_ON_ONCE(!hlist_empty(&tsk->signal->ignored_posix_timers)))
return;
hlist_move_list(&tsk->signal->ignored_posix_timers, &timers);
while (!hlist_empty(&timers)) {
posix_timer_cleanup_ignored(hlist_entry(timers.first, struct k_itimer,
ignored_list));
}
}
SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
const struct __kernel_timespec __user *, tp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 new_tp;
if (!kc || !kc->clock_set)
return -EINVAL;
if (get_timespec64(&new_tp, tp))
return -EFAULT;
/*
* Permission checks have to be done inside the clock specific
* setter callback.
*/
return kc->clock_set(which_clock, &new_tp);
}
SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
struct __kernel_timespec __user *, tp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 kernel_tp;
int error;
if (!kc)
return -EINVAL;
error = kc->clock_get_timespec(which_clock, &kernel_tp);
if (!error && put_timespec64(&kernel_tp, tp))
error = -EFAULT;
return error;
}
int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
if (!kc)
return -EINVAL;
if (!kc->clock_adj)
return -EOPNOTSUPP;
return kc->clock_adj(which_clock, ktx);
}
SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
struct __kernel_timex __user *, utx)
{
struct __kernel_timex ktx;
int err;
if (copy_from_user(&ktx, utx, sizeof(ktx)))
return -EFAULT;
err = do_clock_adjtime(which_clock, &ktx);
if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
return -EFAULT;
return err;
}
/**
* sys_clock_getres - Get the resolution of a clock
* @which_clock: The clock to get the resolution for
* @tp: Pointer to a a user space timespec64 for storage
*
* POSIX defines:
*
* "The clock_getres() function shall return the resolution of any
* clock. Clock resolutions are implementation-defined and cannot be set by
* a process. If the argument res is not NULL, the resolution of the
* specified clock shall be stored in the location pointed to by res. If
* res is NULL, the clock resolution is not returned. If the time argument
* of clock_settime() is not a multiple of res, then the value is truncated
* to a multiple of res."
*
* Due to the various hardware constraints the real resolution can vary
* wildly and even change during runtime when the underlying devices are
* replaced. The kernel also can use hardware devices with different
* resolutions for reading the time and for arming timers.
*
* The kernel therefore deviates from the POSIX spec in various aspects:
*
* 1) The resolution returned to user space
*
* For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI,
* CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW
* the kernel differentiates only two cases:
*
* I) Low resolution mode:
*
* When high resolution timers are disabled at compile or runtime
* the resolution returned is nanoseconds per tick, which represents
* the precision at which timers expire.
*
* II) High resolution mode:
*
* When high resolution timers are enabled the resolution returned
* is always one nanosecond independent of the actual resolution of
* the underlying hardware devices.
*
* For CLOCK_*_ALARM the actual resolution depends on system
* state. When system is running the resolution is the same as the
* resolution of the other clocks. During suspend the actual
* resolution is the resolution of the underlying RTC device which
* might be way less precise than the clockevent device used during
* running state.
*
* For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution
* returned is always nanoseconds per tick.
*
* For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution
* returned is always one nanosecond under the assumption that the
* underlying scheduler clock has a better resolution than nanoseconds
* per tick.
*
* For dynamic POSIX clocks (PTP devices) the resolution returned is
* always one nanosecond.
*
* 2) Affect on sys_clock_settime()
*
* The kernel does not truncate the time which is handed in to
* sys_clock_settime(). The kernel internal timekeeping is always using
* nanoseconds precision independent of the clocksource device which is
* used to read the time from. The resolution of that device only
* affects the presicion of the time returned by sys_clock_gettime().
*
* Returns:
* 0 Success. @tp contains the resolution
* -EINVAL @which_clock is not a valid clock ID
* -EFAULT Copying the resolution to @tp faulted
* -ENODEV Dynamic POSIX clock is not backed by a device
* -EOPNOTSUPP Dynamic POSIX clock does not support getres()
*/
SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
struct __kernel_timespec __user *, tp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 rtn_tp;
int error;
if (!kc)
return -EINVAL;
error = kc->clock_getres(which_clock, &rtn_tp);
if (!error && tp && put_timespec64(&rtn_tp, tp))
error = -EFAULT;
return error;
}
#ifdef CONFIG_COMPAT_32BIT_TIME
SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock,
struct old_timespec32 __user *, tp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 ts;
if (!kc || !kc->clock_set)
return -EINVAL;
y2038: globally rename compat_time to old_time32 Christoph Hellwig suggested a slightly different path for handling backwards compatibility with the 32-bit time_t based system calls: Rather than simply reusing the compat_sys_* entry points on 32-bit architectures unchanged, we get rid of those entry points and the compat_time types by renaming them to something that makes more sense on 32-bit architectures (which don't have a compat mode otherwise), and then share the entry points under the new name with the 64-bit architectures that use them for implementing the compatibility. The following types and interfaces are renamed here, and moved from linux/compat_time.h to linux/time32.h: old new --- --- compat_time_t old_time32_t struct compat_timeval struct old_timeval32 struct compat_timespec struct old_timespec32 struct compat_itimerspec struct old_itimerspec32 ns_to_compat_timeval() ns_to_old_timeval32() get_compat_itimerspec64() get_old_itimerspec32() put_compat_itimerspec64() put_old_itimerspec32() compat_get_timespec64() get_old_timespec32() compat_put_timespec64() put_old_timespec32() As we already have aliases in place, this patch addresses only the instances that are relevant to the system call interface in particular, not those that occur in device drivers and other modules. Those will get handled separately, while providing the 64-bit version of the respective interfaces. I'm not renaming the timex, rusage and itimerval structures, as we are still debating what the new interface will look like, and whether we will need a replacement at all. This also doesn't change the names of the syscall entry points, which can be done more easily when we actually switch over the 32-bit architectures to use them, at that point we need to change COMPAT_SYSCALL_DEFINEx to SYSCALL_DEFINEx with a new name, e.g. with a _time32 suffix. Suggested-by: Christoph Hellwig <hch@infradead.org> Link: https://lore.kernel.org/lkml/20180705222110.GA5698@infradead.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-07-13 10:52:28 +00:00
if (get_old_timespec32(&ts, tp))
return -EFAULT;
return kc->clock_set(which_clock, &ts);
}
SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock,
struct old_timespec32 __user *, tp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 ts;
int err;
if (!kc)
return -EINVAL;
err = kc->clock_get_timespec(which_clock, &ts);
y2038: globally rename compat_time to old_time32 Christoph Hellwig suggested a slightly different path for handling backwards compatibility with the 32-bit time_t based system calls: Rather than simply reusing the compat_sys_* entry points on 32-bit architectures unchanged, we get rid of those entry points and the compat_time types by renaming them to something that makes more sense on 32-bit architectures (which don't have a compat mode otherwise), and then share the entry points under the new name with the 64-bit architectures that use them for implementing the compatibility. The following types and interfaces are renamed here, and moved from linux/compat_time.h to linux/time32.h: old new --- --- compat_time_t old_time32_t struct compat_timeval struct old_timeval32 struct compat_timespec struct old_timespec32 struct compat_itimerspec struct old_itimerspec32 ns_to_compat_timeval() ns_to_old_timeval32() get_compat_itimerspec64() get_old_itimerspec32() put_compat_itimerspec64() put_old_itimerspec32() compat_get_timespec64() get_old_timespec32() compat_put_timespec64() put_old_timespec32() As we already have aliases in place, this patch addresses only the instances that are relevant to the system call interface in particular, not those that occur in device drivers and other modules. Those will get handled separately, while providing the 64-bit version of the respective interfaces. I'm not renaming the timex, rusage and itimerval structures, as we are still debating what the new interface will look like, and whether we will need a replacement at all. This also doesn't change the names of the syscall entry points, which can be done more easily when we actually switch over the 32-bit architectures to use them, at that point we need to change COMPAT_SYSCALL_DEFINEx to SYSCALL_DEFINEx with a new name, e.g. with a _time32 suffix. Suggested-by: Christoph Hellwig <hch@infradead.org> Link: https://lore.kernel.org/lkml/20180705222110.GA5698@infradead.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-07-13 10:52:28 +00:00
if (!err && put_old_timespec32(&ts, tp))
err = -EFAULT;
return err;
}
SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock,
struct old_timex32 __user *, utp)
{
struct __kernel_timex ktx;
int err;
err = get_old_timex32(&ktx, utp);
if (err)
return err;
err = do_clock_adjtime(which_clock, &ktx);
if (err >= 0 && put_old_timex32(utp, &ktx))
return -EFAULT;
return err;
}
SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
struct old_timespec32 __user *, tp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 ts;
int err;
if (!kc)
return -EINVAL;
err = kc->clock_getres(which_clock, &ts);
y2038: globally rename compat_time to old_time32 Christoph Hellwig suggested a slightly different path for handling backwards compatibility with the 32-bit time_t based system calls: Rather than simply reusing the compat_sys_* entry points on 32-bit architectures unchanged, we get rid of those entry points and the compat_time types by renaming them to something that makes more sense on 32-bit architectures (which don't have a compat mode otherwise), and then share the entry points under the new name with the 64-bit architectures that use them for implementing the compatibility. The following types and interfaces are renamed here, and moved from linux/compat_time.h to linux/time32.h: old new --- --- compat_time_t old_time32_t struct compat_timeval struct old_timeval32 struct compat_timespec struct old_timespec32 struct compat_itimerspec struct old_itimerspec32 ns_to_compat_timeval() ns_to_old_timeval32() get_compat_itimerspec64() get_old_itimerspec32() put_compat_itimerspec64() put_old_itimerspec32() compat_get_timespec64() get_old_timespec32() compat_put_timespec64() put_old_timespec32() As we already have aliases in place, this patch addresses only the instances that are relevant to the system call interface in particular, not those that occur in device drivers and other modules. Those will get handled separately, while providing the 64-bit version of the respective interfaces. I'm not renaming the timex, rusage and itimerval structures, as we are still debating what the new interface will look like, and whether we will need a replacement at all. This also doesn't change the names of the syscall entry points, which can be done more easily when we actually switch over the 32-bit architectures to use them, at that point we need to change COMPAT_SYSCALL_DEFINEx to SYSCALL_DEFINEx with a new name, e.g. with a _time32 suffix. Suggested-by: Christoph Hellwig <hch@infradead.org> Link: https://lore.kernel.org/lkml/20180705222110.GA5698@infradead.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-07-13 10:52:28 +00:00
if (!err && tp && put_old_timespec32(&ts, tp))
return -EFAULT;
return err;
}
#endif
/*
* sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI
*/
static int common_nsleep(const clockid_t which_clock, int flags,
const struct timespec64 *rqtp)
{
ktime_t texp = timespec64_to_ktime(*rqtp);
return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
hrtimer: fix *rmtp handling in hrtimer_nanosleep() Spotted by Pavel Emelyanov and Alexey Dobriyan. hrtimer_nanosleep() sets restart_block->arg1 = rmtp, but this rmtp points to the local variable which lives in the caller's stack frame. This means that if sys_restart_syscall() actually happens and it is interrupted as well, we don't update the user-space variable, but write into the already dead stack frame. Introduced by commit 04c227140fed77587432667a574b14736a06dd7f hrtimer: Rework hrtimer_nanosleep to make sys_compat_nanosleep easier Change the callers to pass "__user *rmtp" to hrtimer_nanosleep(), and change hrtimer_nanosleep() to use copy_to_user() to actually update *rmtp. Small problem remains. man 2 nanosleep states that *rtmp should be written if nanosleep() was interrupted (it says nothing whether it is OK to update *rmtp if nanosleep returns 0), but (with or without this patch) we can dirty *rem even if nanosleep() returns 0. NOTE: this patch doesn't change compat_sys_nanosleep(), because it has other bugs. Fixed by the next patch. Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Cc: Alexey Dobriyan <adobriyan@sw.ru> Cc: Michael Kerrisk <mtk.manpages@googlemail.com> Cc: Pavel Emelyanov <xemul@sw.ru> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Toyo Abe <toyoa@mvista.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> include/linux/hrtimer.h | 2 - kernel/hrtimer.c | 51 +++++++++++++++++++++++++----------------------- kernel/posix-timers.c | 14 +------------ 3 files changed, 30 insertions(+), 37 deletions(-)
2008-02-01 14:29:05 +00:00
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
/*
* sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME
*
* Absolute nanosleeps for these clocks are time-namespace adjusted.
*/
static int common_nsleep_timens(const clockid_t which_clock, int flags,
const struct timespec64 *rqtp)
{
ktime_t texp = timespec64_to_ktime(*rqtp);
if (flags & TIMER_ABSTIME)
texp = timens_ktime_to_host(which_clock, texp);
return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
const struct __kernel_timespec __user *, rqtp,
struct __kernel_timespec __user *, rmtp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 t;
if (!kc)
return -EINVAL;
if (!kc->nsleep)
return -EOPNOTSUPP;
if (get_timespec64(&t, rqtp))
return -EFAULT;
if (!timespec64_valid(&t))
return -EINVAL;
if (flags & TIMER_ABSTIME)
rmtp = NULL;
2023-01-05 13:44:03 +00:00
current->restart_block.fn = do_no_restart_syscall;
current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
current->restart_block.nanosleep.rmtp = rmtp;
return kc->nsleep(which_clock, flags, &t);
}
#ifdef CONFIG_COMPAT_32BIT_TIME
SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
struct old_timespec32 __user *, rqtp,
struct old_timespec32 __user *, rmtp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 t;
if (!kc)
return -EINVAL;
if (!kc->nsleep)
return -EOPNOTSUPP;
y2038: globally rename compat_time to old_time32 Christoph Hellwig suggested a slightly different path for handling backwards compatibility with the 32-bit time_t based system calls: Rather than simply reusing the compat_sys_* entry points on 32-bit architectures unchanged, we get rid of those entry points and the compat_time types by renaming them to something that makes more sense on 32-bit architectures (which don't have a compat mode otherwise), and then share the entry points under the new name with the 64-bit architectures that use them for implementing the compatibility. The following types and interfaces are renamed here, and moved from linux/compat_time.h to linux/time32.h: old new --- --- compat_time_t old_time32_t struct compat_timeval struct old_timeval32 struct compat_timespec struct old_timespec32 struct compat_itimerspec struct old_itimerspec32 ns_to_compat_timeval() ns_to_old_timeval32() get_compat_itimerspec64() get_old_itimerspec32() put_compat_itimerspec64() put_old_itimerspec32() compat_get_timespec64() get_old_timespec32() compat_put_timespec64() put_old_timespec32() As we already have aliases in place, this patch addresses only the instances that are relevant to the system call interface in particular, not those that occur in device drivers and other modules. Those will get handled separately, while providing the 64-bit version of the respective interfaces. I'm not renaming the timex, rusage and itimerval structures, as we are still debating what the new interface will look like, and whether we will need a replacement at all. This also doesn't change the names of the syscall entry points, which can be done more easily when we actually switch over the 32-bit architectures to use them, at that point we need to change COMPAT_SYSCALL_DEFINEx to SYSCALL_DEFINEx with a new name, e.g. with a _time32 suffix. Suggested-by: Christoph Hellwig <hch@infradead.org> Link: https://lore.kernel.org/lkml/20180705222110.GA5698@infradead.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-07-13 10:52:28 +00:00
if (get_old_timespec32(&t, rqtp))
return -EFAULT;
if (!timespec64_valid(&t))
return -EINVAL;
if (flags & TIMER_ABSTIME)
rmtp = NULL;
2023-01-05 13:44:03 +00:00
current->restart_block.fn = do_no_restart_syscall;
current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
current->restart_block.nanosleep.compat_rmtp = rmtp;
return kc->nsleep(which_clock, flags, &t);
}
#endif
static const struct k_clock clock_realtime = {
.clock_getres = posix_get_hrtimer_res,
.clock_get_timespec = posix_get_realtime_timespec,
.clock_get_ktime = posix_get_realtime_ktime,
.clock_set = posix_clock_realtime_set,
.clock_adj = posix_clock_realtime_adj,
.nsleep = common_nsleep,
.timer_create = common_timer_create,
.timer_set = common_timer_set,
.timer_get = common_timer_get,
.timer_del = common_timer_del,
.timer_rearm = common_hrtimer_rearm,
.timer_forward = common_hrtimer_forward,
.timer_remaining = common_hrtimer_remaining,
.timer_try_to_cancel = common_hrtimer_try_to_cancel,
.timer_wait_running = common_timer_wait_running,
.timer_arm = common_hrtimer_arm,
};
static const struct k_clock clock_monotonic = {
.clock_getres = posix_get_hrtimer_res,
.clock_get_timespec = posix_get_monotonic_timespec,
.clock_get_ktime = posix_get_monotonic_ktime,
.nsleep = common_nsleep_timens,
.timer_create = common_timer_create,
.timer_set = common_timer_set,
.timer_get = common_timer_get,
.timer_del = common_timer_del,
.timer_rearm = common_hrtimer_rearm,
.timer_forward = common_hrtimer_forward,
.timer_remaining = common_hrtimer_remaining,
.timer_try_to_cancel = common_hrtimer_try_to_cancel,
.timer_wait_running = common_timer_wait_running,
.timer_arm = common_hrtimer_arm,
};
static const struct k_clock clock_monotonic_raw = {
.clock_getres = posix_get_hrtimer_res,
.clock_get_timespec = posix_get_monotonic_raw,
};
static const struct k_clock clock_realtime_coarse = {
.clock_getres = posix_get_coarse_res,
.clock_get_timespec = posix_get_realtime_coarse,
};
static const struct k_clock clock_monotonic_coarse = {
.clock_getres = posix_get_coarse_res,
.clock_get_timespec = posix_get_monotonic_coarse,
};
static const struct k_clock clock_tai = {
.clock_getres = posix_get_hrtimer_res,
.clock_get_ktime = posix_get_tai_ktime,
.clock_get_timespec = posix_get_tai_timespec,
.nsleep = common_nsleep,
.timer_create = common_timer_create,
.timer_set = common_timer_set,
.timer_get = common_timer_get,
.timer_del = common_timer_del,
.timer_rearm = common_hrtimer_rearm,
.timer_forward = common_hrtimer_forward,
.timer_remaining = common_hrtimer_remaining,
.timer_try_to_cancel = common_hrtimer_try_to_cancel,
.timer_wait_running = common_timer_wait_running,
.timer_arm = common_hrtimer_arm,
};
Revert: Unify CLOCK_MONOTONIC and CLOCK_BOOTTIME Revert commits 92af4dcb4e1c ("tracing: Unify the "boot" and "mono" tracing clocks") 127bfa5f4342 ("hrtimer: Unify MONOTONIC and BOOTTIME clock behavior") 7250a4047aa6 ("posix-timers: Unify MONOTONIC and BOOTTIME clock behavior") d6c7270e913d ("timekeeping: Remove boot time specific code") f2d6fdbfd238 ("Input: Evdev - unify MONOTONIC and BOOTTIME clock behavior") d6ed449afdb3 ("timekeeping: Make the MONOTONIC clock behave like the BOOTTIME clock") 72199320d49d ("timekeeping: Add the new CLOCK_MONOTONIC_ACTIVE clock") As stated in the pull request for the unification of CLOCK_MONOTONIC and CLOCK_BOOTTIME, it was clear that we might have to revert the change. As reported by several folks systemd and other applications rely on the documented behaviour of CLOCK_MONOTONIC on Linux and break with the above changes. After resume daemons time out and other timeout related issues are observed. Rafael compiled this list: * systemd kills daemons on resume, after >WatchdogSec seconds of suspending (Genki Sky). [Verified that that's because systemd uses CLOCK_MONOTONIC and expects it to not include the suspend time.] * systemd-journald misbehaves after resume: systemd-journald[7266]: File /var/log/journal/016627c3c4784cd4812d4b7e96a34226/system.journal corrupted or uncleanly shut down, renaming and replacing. (Mike Galbraith). * NetworkManager reports "networking disabled" and networking is broken after resume 50% of the time (Pavel). [May be because of systemd.] * MATE desktop dims the display and starts the screensaver right after system resume (Pavel). * Full system hang during resume (me). [May be due to systemd or NM or both.] That happens on debian and open suse systems. It's sad, that these problems were neither catched in -next nor by those folks who expressed interest in this change. Reported-by: Rafael J. Wysocki <rjw@rjwysocki.net> Reported-by: Genki Sky <sky@genki.is>, Reported-by: Pavel Machek <pavel@ucw.cz> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com> Cc: John Stultz <john.stultz@linaro.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Kevin Easton <kevin@guarana.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mark Salyzyn <salyzyn@android.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Petr Mladek <pmladek@suse.com> Cc: Prarit Bhargava <prarit@redhat.com> Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org>
2018-04-25 13:33:38 +00:00
static const struct k_clock clock_boottime = {
.clock_getres = posix_get_hrtimer_res,
.clock_get_ktime = posix_get_boottime_ktime,
.clock_get_timespec = posix_get_boottime_timespec,
.nsleep = common_nsleep_timens,
Revert: Unify CLOCK_MONOTONIC and CLOCK_BOOTTIME Revert commits 92af4dcb4e1c ("tracing: Unify the "boot" and "mono" tracing clocks") 127bfa5f4342 ("hrtimer: Unify MONOTONIC and BOOTTIME clock behavior") 7250a4047aa6 ("posix-timers: Unify MONOTONIC and BOOTTIME clock behavior") d6c7270e913d ("timekeeping: Remove boot time specific code") f2d6fdbfd238 ("Input: Evdev - unify MONOTONIC and BOOTTIME clock behavior") d6ed449afdb3 ("timekeeping: Make the MONOTONIC clock behave like the BOOTTIME clock") 72199320d49d ("timekeeping: Add the new CLOCK_MONOTONIC_ACTIVE clock") As stated in the pull request for the unification of CLOCK_MONOTONIC and CLOCK_BOOTTIME, it was clear that we might have to revert the change. As reported by several folks systemd and other applications rely on the documented behaviour of CLOCK_MONOTONIC on Linux and break with the above changes. After resume daemons time out and other timeout related issues are observed. Rafael compiled this list: * systemd kills daemons on resume, after >WatchdogSec seconds of suspending (Genki Sky). [Verified that that's because systemd uses CLOCK_MONOTONIC and expects it to not include the suspend time.] * systemd-journald misbehaves after resume: systemd-journald[7266]: File /var/log/journal/016627c3c4784cd4812d4b7e96a34226/system.journal corrupted or uncleanly shut down, renaming and replacing. (Mike Galbraith). * NetworkManager reports "networking disabled" and networking is broken after resume 50% of the time (Pavel). [May be because of systemd.] * MATE desktop dims the display and starts the screensaver right after system resume (Pavel). * Full system hang during resume (me). [May be due to systemd or NM or both.] That happens on debian and open suse systems. It's sad, that these problems were neither catched in -next nor by those folks who expressed interest in this change. Reported-by: Rafael J. Wysocki <rjw@rjwysocki.net> Reported-by: Genki Sky <sky@genki.is>, Reported-by: Pavel Machek <pavel@ucw.cz> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com> Cc: John Stultz <john.stultz@linaro.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Kevin Easton <kevin@guarana.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mark Salyzyn <salyzyn@android.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Petr Mladek <pmladek@suse.com> Cc: Prarit Bhargava <prarit@redhat.com> Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org>
2018-04-25 13:33:38 +00:00
.timer_create = common_timer_create,
.timer_set = common_timer_set,
.timer_get = common_timer_get,
.timer_del = common_timer_del,
.timer_rearm = common_hrtimer_rearm,
.timer_forward = common_hrtimer_forward,
.timer_remaining = common_hrtimer_remaining,
.timer_try_to_cancel = common_hrtimer_try_to_cancel,
.timer_wait_running = common_timer_wait_running,
Revert: Unify CLOCK_MONOTONIC and CLOCK_BOOTTIME Revert commits 92af4dcb4e1c ("tracing: Unify the "boot" and "mono" tracing clocks") 127bfa5f4342 ("hrtimer: Unify MONOTONIC and BOOTTIME clock behavior") 7250a4047aa6 ("posix-timers: Unify MONOTONIC and BOOTTIME clock behavior") d6c7270e913d ("timekeeping: Remove boot time specific code") f2d6fdbfd238 ("Input: Evdev - unify MONOTONIC and BOOTTIME clock behavior") d6ed449afdb3 ("timekeeping: Make the MONOTONIC clock behave like the BOOTTIME clock") 72199320d49d ("timekeeping: Add the new CLOCK_MONOTONIC_ACTIVE clock") As stated in the pull request for the unification of CLOCK_MONOTONIC and CLOCK_BOOTTIME, it was clear that we might have to revert the change. As reported by several folks systemd and other applications rely on the documented behaviour of CLOCK_MONOTONIC on Linux and break with the above changes. After resume daemons time out and other timeout related issues are observed. Rafael compiled this list: * systemd kills daemons on resume, after >WatchdogSec seconds of suspending (Genki Sky). [Verified that that's because systemd uses CLOCK_MONOTONIC and expects it to not include the suspend time.] * systemd-journald misbehaves after resume: systemd-journald[7266]: File /var/log/journal/016627c3c4784cd4812d4b7e96a34226/system.journal corrupted or uncleanly shut down, renaming and replacing. (Mike Galbraith). * NetworkManager reports "networking disabled" and networking is broken after resume 50% of the time (Pavel). [May be because of systemd.] * MATE desktop dims the display and starts the screensaver right after system resume (Pavel). * Full system hang during resume (me). [May be due to systemd or NM or both.] That happens on debian and open suse systems. It's sad, that these problems were neither catched in -next nor by those folks who expressed interest in this change. Reported-by: Rafael J. Wysocki <rjw@rjwysocki.net> Reported-by: Genki Sky <sky@genki.is>, Reported-by: Pavel Machek <pavel@ucw.cz> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com> Cc: John Stultz <john.stultz@linaro.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Kevin Easton <kevin@guarana.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mark Salyzyn <salyzyn@android.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Petr Mladek <pmladek@suse.com> Cc: Prarit Bhargava <prarit@redhat.com> Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org>
2018-04-25 13:33:38 +00:00
.timer_arm = common_hrtimer_arm,
};
static const struct k_clock * const posix_clocks[] = {
[CLOCK_REALTIME] = &clock_realtime,
[CLOCK_MONOTONIC] = &clock_monotonic,
[CLOCK_PROCESS_CPUTIME_ID] = &clock_process,
[CLOCK_THREAD_CPUTIME_ID] = &clock_thread,
[CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw,
[CLOCK_REALTIME_COARSE] = &clock_realtime_coarse,
[CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse,
Revert: Unify CLOCK_MONOTONIC and CLOCK_BOOTTIME Revert commits 92af4dcb4e1c ("tracing: Unify the "boot" and "mono" tracing clocks") 127bfa5f4342 ("hrtimer: Unify MONOTONIC and BOOTTIME clock behavior") 7250a4047aa6 ("posix-timers: Unify MONOTONIC and BOOTTIME clock behavior") d6c7270e913d ("timekeeping: Remove boot time specific code") f2d6fdbfd238 ("Input: Evdev - unify MONOTONIC and BOOTTIME clock behavior") d6ed449afdb3 ("timekeeping: Make the MONOTONIC clock behave like the BOOTTIME clock") 72199320d49d ("timekeeping: Add the new CLOCK_MONOTONIC_ACTIVE clock") As stated in the pull request for the unification of CLOCK_MONOTONIC and CLOCK_BOOTTIME, it was clear that we might have to revert the change. As reported by several folks systemd and other applications rely on the documented behaviour of CLOCK_MONOTONIC on Linux and break with the above changes. After resume daemons time out and other timeout related issues are observed. Rafael compiled this list: * systemd kills daemons on resume, after >WatchdogSec seconds of suspending (Genki Sky). [Verified that that's because systemd uses CLOCK_MONOTONIC and expects it to not include the suspend time.] * systemd-journald misbehaves after resume: systemd-journald[7266]: File /var/log/journal/016627c3c4784cd4812d4b7e96a34226/system.journal corrupted or uncleanly shut down, renaming and replacing. (Mike Galbraith). * NetworkManager reports "networking disabled" and networking is broken after resume 50% of the time (Pavel). [May be because of systemd.] * MATE desktop dims the display and starts the screensaver right after system resume (Pavel). * Full system hang during resume (me). [May be due to systemd or NM or both.] That happens on debian and open suse systems. It's sad, that these problems were neither catched in -next nor by those folks who expressed interest in this change. Reported-by: Rafael J. Wysocki <rjw@rjwysocki.net> Reported-by: Genki Sky <sky@genki.is>, Reported-by: Pavel Machek <pavel@ucw.cz> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com> Cc: John Stultz <john.stultz@linaro.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Kevin Easton <kevin@guarana.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mark Salyzyn <salyzyn@android.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Petr Mladek <pmladek@suse.com> Cc: Prarit Bhargava <prarit@redhat.com> Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org>
2018-04-25 13:33:38 +00:00
[CLOCK_BOOTTIME] = &clock_boottime,
[CLOCK_REALTIME_ALARM] = &alarm_clock,
[CLOCK_BOOTTIME_ALARM] = &alarm_clock,
[CLOCK_TAI] = &clock_tai,
};
static const struct k_clock *clockid_to_kclock(const clockid_t id)
{
clockid_t idx = id;
if (id < 0) {
return (id & CLOCKFD_MASK) == CLOCKFD ?
&clock_posix_dynamic : &clock_posix_cpu;
}
if (id >= ARRAY_SIZE(posix_clocks))
return NULL;
return posix_clocks[array_index_nospec(idx, ARRAY_SIZE(posix_clocks))];
}