linux-stable/include/linux/sched/cputime.h
Frederic Weisbecker e7f2be115f sched/cputime: Fix getrusage(RUSAGE_THREAD) with nohz_full
getrusage(RUSAGE_THREAD) with nohz_full may return shorter utime/stime
than the actual time.

task_cputime_adjusted() snapshots utime and stime and then adjust their
sum to match the scheduler maintained cputime.sum_exec_runtime.
Unfortunately in nohz_full, sum_exec_runtime is only updated once per
second in the worst case, causing a discrepancy against utime and stime
that can be updated anytime by the reader using vtime.

To fix this situation, perform an update of cputime.sum_exec_runtime
when the cputime snapshot reports the task as actually running while
the tick is disabled. The related overhead is then contained within the
relevant situations.

Reported-by: Hasegawa Hitomi <hasegawa-hitomi@fujitsu.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Hasegawa Hitomi <hasegawa-hitomi@fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Acked-by: Phil Auld <pauld@redhat.com>
Link: https://lore.kernel.org/r/20211026141055.57358-3-frederic@kernel.org
2021-12-02 15:08:22 +01:00

193 lines
5.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_CPUTIME_H
#define _LINUX_SCHED_CPUTIME_H
#include <linux/sched/signal.h>
/*
* cputime accounting APIs:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <asm/cputime.h>
#ifndef cputime_to_nsecs
# define cputime_to_nsecs(__ct) \
(cputime_to_usecs(__ct) * NSEC_PER_USEC)
#endif
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern bool task_cputime(struct task_struct *t,
u64 *utime, u64 *stime);
extern u64 task_gtime(struct task_struct *t);
#else
static inline bool task_cputime(struct task_struct *t,
u64 *utime, u64 *stime)
{
*utime = t->utime;
*stime = t->stime;
return false;
}
static inline u64 task_gtime(struct task_struct *t)
{
return t->gtime;
}
#endif
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
static inline void task_cputime_scaled(struct task_struct *t,
u64 *utimescaled,
u64 *stimescaled)
{
*utimescaled = t->utimescaled;
*stimescaled = t->stimescaled;
}
#else
static inline void task_cputime_scaled(struct task_struct *t,
u64 *utimescaled,
u64 *stimescaled)
{
task_cputime(t, utimescaled, stimescaled);
}
#endif
extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
u64 *ut, u64 *st);
/*
* Thread group CPU time accounting.
*/
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples);
/*
* The following are functions that support scheduler-internal time accounting.
* These functions are generally called at the timer tick. None of this depends
* on CONFIG_SCHEDSTATS.
*/
/**
* get_running_cputimer - return &tsk->signal->cputimer if cputimers are active
*
* @tsk: Pointer to target task.
*/
#ifdef CONFIG_POSIX_TIMERS
static inline
struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
{
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
/*
* Check whether posix CPU timers are active. If not the thread
* group accounting is not active either. Lockless check.
*/
if (!READ_ONCE(tsk->signal->posix_cputimers.timers_active))
return NULL;
/*
* After we flush the task's sum_exec_runtime to sig->sum_sched_runtime
* in __exit_signal(), we won't account to the signal struct further
* cputime consumed by that task, even though the task can still be
* ticking after __exit_signal().
*
* In order to keep a consistent behaviour between thread group cputime
* and thread group cputimer accounting, lets also ignore the cputime
* elapsing after __exit_signal() in any thread group timer running.
*
* This makes sure that POSIX CPU clocks and timers are synchronized, so
* that a POSIX CPU timer won't expire while the corresponding POSIX CPU
* clock delta is behind the expiring timer value.
*/
if (unlikely(!tsk->sighand))
return NULL;
return cputimer;
}
#else
static inline
struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
{
return NULL;
}
#endif
/**
* account_group_user_time - Maintain utime for a thread group.
*
* @tsk: Pointer to task structure.
* @cputime: Time value by which to increment the utime field of the
* thread_group_cputime structure.
*
* If thread group time is being maintained, get the structure for the
* running CPU and update the utime field there.
*/
static inline void account_group_user_time(struct task_struct *tsk,
u64 cputime)
{
struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
if (!cputimer)
return;
atomic64_add(cputime, &cputimer->cputime_atomic.utime);
}
/**
* account_group_system_time - Maintain stime for a thread group.
*
* @tsk: Pointer to task structure.
* @cputime: Time value by which to increment the stime field of the
* thread_group_cputime structure.
*
* If thread group time is being maintained, get the structure for the
* running CPU and update the stime field there.
*/
static inline void account_group_system_time(struct task_struct *tsk,
u64 cputime)
{
struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
if (!cputimer)
return;
atomic64_add(cputime, &cputimer->cputime_atomic.stime);
}
/**
* account_group_exec_runtime - Maintain exec runtime for a thread group.
*
* @tsk: Pointer to task structure.
* @ns: Time value by which to increment the sum_exec_runtime field
* of the thread_group_cputime structure.
*
* If thread group time is being maintained, get the structure for the
* running CPU and update the sum_exec_runtime field there.
*/
static inline void account_group_exec_runtime(struct task_struct *tsk,
unsigned long long ns)
{
struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
if (!cputimer)
return;
atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime);
}
static inline void prev_cputime_init(struct prev_cputime *prev)
{
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
prev->utime = prev->stime = 0;
raw_spin_lock_init(&prev->lock);
#endif
}
extern unsigned long long
task_sched_runtime(struct task_struct *task);
#endif /* _LINUX_SCHED_CPUTIME_H */