mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-09 14:43:16 +00:00
34f26a1561
PSI accounts stalls for each cgroup separately and aggregates it at each level of the hierarchy. This may cause non-negligible overhead for some workloads when under deep level of the hierarchy. commit 3958e2d0c34e ("cgroup: make per-cgroup pressure stall tracking configurable") make PSI to skip per-cgroup stall accounting, only account system-wide to avoid this each level overhead. But for our use case, we also want leaf cgroup PSI stats accounted for userspace adjustment on that cgroup, apart from only system-wide adjustment. So this patch introduce a per-cgroup PSI accounting disable/re-enable interface "cgroup.pressure", which is a read-write single value file that allowed values are "0" and "1", the defaults is "1" so per-cgroup PSI stats is enabled by default. Implementation details: It should be relatively straight-forward to disable and re-enable state aggregation, time tracking, averaging on a per-cgroup level, if we can live with losing history from while it was disabled. I.e. the avgs will restart from 0, total= will have gaps. But it's hard or complex to stop/restart groupc->tasks[] updates, which is not implemented in this patch. So we always update groupc->tasks[] and PSI_ONCPU bit in psi_group_change() even when the cgroup PSI stats is disabled. Suggested-by: Johannes Weiner <hannes@cmpxchg.org> Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Link: https://lkml.kernel.org/r/20220907090332.2078-1-zhouchengming@bytedance.com
70 lines
1.7 KiB
C
70 lines
1.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_PSI_H
|
|
#define _LINUX_PSI_H
|
|
|
|
#include <linux/jump_label.h>
|
|
#include <linux/psi_types.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/cgroup-defs.h>
|
|
#include <linux/cgroup.h>
|
|
|
|
struct seq_file;
|
|
struct css_set;
|
|
|
|
#ifdef CONFIG_PSI
|
|
|
|
extern struct static_key_false psi_disabled;
|
|
extern struct psi_group psi_system;
|
|
|
|
void psi_init(void);
|
|
|
|
void psi_memstall_enter(unsigned long *flags);
|
|
void psi_memstall_leave(unsigned long *flags);
|
|
|
|
int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
|
|
struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
|
char *buf, enum psi_res res);
|
|
void psi_trigger_destroy(struct psi_trigger *t);
|
|
|
|
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
|
|
poll_table *wait);
|
|
|
|
#ifdef CONFIG_CGROUPS
|
|
static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
|
|
{
|
|
return cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
|
|
}
|
|
|
|
int psi_cgroup_alloc(struct cgroup *cgrp);
|
|
void psi_cgroup_free(struct cgroup *cgrp);
|
|
void cgroup_move_task(struct task_struct *p, struct css_set *to);
|
|
void psi_cgroup_restart(struct psi_group *group);
|
|
#endif
|
|
|
|
#else /* CONFIG_PSI */
|
|
|
|
static inline void psi_init(void) {}
|
|
|
|
static inline void psi_memstall_enter(unsigned long *flags) {}
|
|
static inline void psi_memstall_leave(unsigned long *flags) {}
|
|
|
|
#ifdef CONFIG_CGROUPS
|
|
static inline int psi_cgroup_alloc(struct cgroup *cgrp)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline void psi_cgroup_free(struct cgroup *cgrp)
|
|
{
|
|
}
|
|
static inline void cgroup_move_task(struct task_struct *p, struct css_set *to)
|
|
{
|
|
rcu_assign_pointer(p->cgroups, to);
|
|
}
|
|
static inline void psi_cgroup_restart(struct psi_group *group) {}
|
|
#endif
|
|
|
|
#endif /* CONFIG_PSI */
|
|
|
|
#endif /* _LINUX_PSI_H */
|