mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-17 02:36:21 +00:00
a3b2aeac9d
Delay accounting does not track the delay of IRQ/SOFTIRQ. While IRQ/SOFTIRQ could have obvious impact on some workloads productivity, such as when workloads are running on system which is busy handling network IRQ/SOFTIRQ. Get the delay of IRQ/SOFTIRQ could help users to reduce such delay. Such as setting interrupt affinity or task affinity, using kernel thread for NAPI etc. This is inspired by "sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure"[1]. Also fix some code indent problems of older code. And update tools/accounting/getdelays.c: / # ./getdelays -p 156 -di print delayacct stats ON printing IO accounting PID 156 CPU count real total virtual total delay total delay average 15 15836008 16218149 275700790 18.380ms IO count delay total delay average 0 0 0.000ms SWAP count delay total delay average 0 0 0.000ms RECLAIM count delay total delay average 0 0 0.000ms THRASHING count delay total delay average 0 0 0.000ms COMPACT count delay total delay average 0 0 0.000ms WPCOPY count delay total delay average 36 7586118 0.211ms IRQ count delay total delay average 42 929161 0.022ms [1] commit 52b1364ba0b1("sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure") Link: https://lkml.kernel.org/r/202304081728353557233@zte.com.cn Signed-off-by: Yang Yang <yang.yang29@zte.com.cn> Cc: Jiang Xuexin <jiang.xuexin@zte.com.cn> Cc: wangyong <wang.yong12@zte.com.cn> Cc: junhua huang <huang.junhua@zte.com.cn> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
253 lines
8.1 KiB
C
253 lines
8.1 KiB
C
/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */
|
|
/* taskstats.h - exporting per-task statistics
|
|
*
|
|
* Copyright (C) Shailabh Nagar, IBM Corp. 2006
|
|
* (C) Balbir Singh, IBM Corp. 2006
|
|
* (C) Jay Lan, SGI, 2006
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of version 2.1 of the GNU Lesser General Public License
|
|
* as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it would be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
*/
|
|
|
|
#ifndef _LINUX_TASKSTATS_H
|
|
#define _LINUX_TASKSTATS_H
|
|
|
|
#include <linux/types.h>
|
|
|
|
/* Format for per-task data returned to userland when
|
|
* - a task exits
|
|
* - listener requests stats for a task
|
|
*
|
|
* The struct is versioned. Newer versions should only add fields to
|
|
* the bottom of the struct to maintain backward compatibility.
|
|
*
|
|
*
|
|
* To add new fields
|
|
* a) bump up TASKSTATS_VERSION
|
|
* b) add comment indicating new version number at end of struct
|
|
* c) add new fields after version comment; maintain 64-bit alignment
|
|
*/
|
|
|
|
|
|
#define TASKSTATS_VERSION 14
|
|
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
|
|
* in linux/sched.h */
|
|
|
|
struct taskstats {
|
|
|
|
/* The version number of this struct. This field is always set to
|
|
* TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
|
|
* Each time the struct is changed, the value should be incremented.
|
|
*/
|
|
__u16 version;
|
|
__u32 ac_exitcode; /* Exit status */
|
|
|
|
/* The accounting flags of a task as defined in <linux/acct.h>
|
|
* Defined values are AFORK, ASU, ACOMPAT, ACORE, AXSIG, and AGROUP.
|
|
* (AGROUP since version 12).
|
|
*/
|
|
__u8 ac_flag; /* Record flags */
|
|
__u8 ac_nice; /* task_nice */
|
|
|
|
/* Delay accounting fields start
|
|
*
|
|
* All values, until comment "Delay accounting fields end" are
|
|
* available only if delay accounting is enabled, even though the last
|
|
* few fields are not delays
|
|
*
|
|
* xxx_count is the number of delay values recorded
|
|
* xxx_delay_total is the corresponding cumulative delay in nanoseconds
|
|
*
|
|
* xxx_delay_total wraps around to zero on overflow
|
|
* xxx_count incremented regardless of overflow
|
|
*/
|
|
|
|
/* Delay waiting for cpu, while runnable
|
|
* count, delay_total NOT updated atomically
|
|
*/
|
|
__u64 cpu_count __attribute__((aligned(8)));
|
|
__u64 cpu_delay_total;
|
|
|
|
/* Following four fields atomically updated using task->delays->lock */
|
|
|
|
/* Delay waiting for synchronous block I/O to complete
|
|
* does not account for delays in I/O submission
|
|
*/
|
|
__u64 blkio_count;
|
|
__u64 blkio_delay_total;
|
|
|
|
/* Delay waiting for page fault I/O (swap in only) */
|
|
__u64 swapin_count;
|
|
__u64 swapin_delay_total;
|
|
|
|
/* cpu "wall-clock" running time
|
|
* On some architectures, value will adjust for cpu time stolen
|
|
* from the kernel in involuntary waits due to virtualization.
|
|
* Value is cumulative, in nanoseconds, without a corresponding count
|
|
* and wraps around to zero silently on overflow
|
|
*/
|
|
__u64 cpu_run_real_total;
|
|
|
|
/* cpu "virtual" running time
|
|
* Uses time intervals seen by the kernel i.e. no adjustment
|
|
* for kernel's involuntary waits due to virtualization.
|
|
* Value is cumulative, in nanoseconds, without a corresponding count
|
|
* and wraps around to zero silently on overflow
|
|
*/
|
|
__u64 cpu_run_virtual_total;
|
|
/* Delay accounting fields end */
|
|
/* version 1 ends here */
|
|
|
|
/* Basic Accounting Fields start */
|
|
char ac_comm[TS_COMM_LEN]; /* Command name */
|
|
__u8 ac_sched __attribute__((aligned(8)));
|
|
/* Scheduling discipline */
|
|
__u8 ac_pad[3];
|
|
__u32 ac_uid __attribute__((aligned(8)));
|
|
/* User ID */
|
|
__u32 ac_gid; /* Group ID */
|
|
__u32 ac_pid; /* Process ID */
|
|
__u32 ac_ppid; /* Parent process ID */
|
|
/* __u32 range means times from 1970 to 2106 */
|
|
__u32 ac_btime; /* Begin time [sec since 1970] */
|
|
__u64 ac_etime __attribute__((aligned(8)));
|
|
/* Elapsed time [usec] */
|
|
__u64 ac_utime; /* User CPU time [usec] */
|
|
__u64 ac_stime; /* SYstem CPU time [usec] */
|
|
__u64 ac_minflt; /* Minor Page Fault Count */
|
|
__u64 ac_majflt; /* Major Page Fault Count */
|
|
/* Basic Accounting Fields end */
|
|
|
|
/* Extended accounting fields start */
|
|
/* Accumulated RSS usage in duration of a task, in MBytes-usecs.
|
|
* The current rss usage is added to this counter every time
|
|
* a tick is charged to a task's system time. So, at the end we
|
|
* will have memory usage multiplied by system time. Thus an
|
|
* average usage per system time unit can be calculated.
|
|
*/
|
|
__u64 coremem; /* accumulated RSS usage in MB-usec */
|
|
/* Accumulated virtual memory usage in duration of a task.
|
|
* Same as acct_rss_mem1 above except that we keep track of VM usage.
|
|
*/
|
|
__u64 virtmem; /* accumulated VM usage in MB-usec */
|
|
|
|
/* High watermark of RSS and virtual memory usage in duration of
|
|
* a task, in KBytes.
|
|
*/
|
|
__u64 hiwater_rss; /* High-watermark of RSS usage, in KB */
|
|
__u64 hiwater_vm; /* High-water VM usage, in KB */
|
|
|
|
/* The following four fields are I/O statistics of a task. */
|
|
__u64 read_char; /* bytes read */
|
|
__u64 write_char; /* bytes written */
|
|
__u64 read_syscalls; /* read syscalls */
|
|
__u64 write_syscalls; /* write syscalls */
|
|
/* Extended accounting fields end */
|
|
|
|
#define TASKSTATS_HAS_IO_ACCOUNTING
|
|
/* Per-task storage I/O accounting starts */
|
|
__u64 read_bytes; /* bytes of read I/O */
|
|
__u64 write_bytes; /* bytes of write I/O */
|
|
__u64 cancelled_write_bytes; /* bytes of cancelled write I/O */
|
|
|
|
__u64 nvcsw; /* voluntary_ctxt_switches */
|
|
__u64 nivcsw; /* nonvoluntary_ctxt_switches */
|
|
|
|
/* time accounting for SMT machines */
|
|
__u64 ac_utimescaled; /* utime scaled on frequency etc */
|
|
__u64 ac_stimescaled; /* stime scaled on frequency etc */
|
|
__u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
|
|
|
|
/* Delay waiting for memory reclaim */
|
|
__u64 freepages_count;
|
|
__u64 freepages_delay_total;
|
|
|
|
/* Delay waiting for thrashing page */
|
|
__u64 thrashing_count;
|
|
__u64 thrashing_delay_total;
|
|
|
|
/* v10: 64-bit btime to avoid overflow */
|
|
__u64 ac_btime64; /* 64-bit begin time */
|
|
|
|
/* v11: Delay waiting for memory compact */
|
|
__u64 compact_count;
|
|
__u64 compact_delay_total;
|
|
|
|
/* v12 begin */
|
|
__u32 ac_tgid; /* thread group ID */
|
|
/* Thread group walltime up to now. This is total process walltime if
|
|
* AGROUP flag is set.
|
|
*/
|
|
__u64 ac_tgetime __attribute__((aligned(8)));
|
|
/* Lightweight information to identify process binary files.
|
|
* This leaves userspace to match this to a file system path, using
|
|
* MAJOR() and MINOR() macros to identify a device and mount point,
|
|
* the inode to identify the executable file. This is /proc/self/exe
|
|
* at the end, so matching the most recent exec(). Values are zero
|
|
* for kernel threads.
|
|
*/
|
|
__u64 ac_exe_dev; /* program binary device ID */
|
|
__u64 ac_exe_inode; /* program binary inode number */
|
|
/* v12 end */
|
|
|
|
/* v13: Delay waiting for write-protect copy */
|
|
__u64 wpcopy_count;
|
|
__u64 wpcopy_delay_total;
|
|
|
|
/* v14: Delay waiting for IRQ/SOFTIRQ */
|
|
__u64 irq_count;
|
|
__u64 irq_delay_total;
|
|
};
|
|
|
|
|
|
/*
|
|
* Commands sent from userspace
|
|
* Not versioned. New commands should only be inserted at the enum's end
|
|
* prior to __TASKSTATS_CMD_MAX
|
|
*/
|
|
|
|
enum {
|
|
TASKSTATS_CMD_UNSPEC = 0, /* Reserved */
|
|
TASKSTATS_CMD_GET, /* user->kernel request/get-response */
|
|
TASKSTATS_CMD_NEW, /* kernel->user event */
|
|
__TASKSTATS_CMD_MAX,
|
|
};
|
|
|
|
#define TASKSTATS_CMD_MAX (__TASKSTATS_CMD_MAX - 1)
|
|
|
|
enum {
|
|
TASKSTATS_TYPE_UNSPEC = 0, /* Reserved */
|
|
TASKSTATS_TYPE_PID, /* Process id */
|
|
TASKSTATS_TYPE_TGID, /* Thread group id */
|
|
TASKSTATS_TYPE_STATS, /* taskstats structure */
|
|
TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */
|
|
TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */
|
|
TASKSTATS_TYPE_NULL, /* contains nothing */
|
|
__TASKSTATS_TYPE_MAX,
|
|
};
|
|
|
|
#define TASKSTATS_TYPE_MAX (__TASKSTATS_TYPE_MAX - 1)
|
|
|
|
enum {
|
|
TASKSTATS_CMD_ATTR_UNSPEC = 0,
|
|
TASKSTATS_CMD_ATTR_PID,
|
|
TASKSTATS_CMD_ATTR_TGID,
|
|
TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
|
|
TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
|
|
__TASKSTATS_CMD_ATTR_MAX,
|
|
};
|
|
|
|
#define TASKSTATS_CMD_ATTR_MAX (__TASKSTATS_CMD_ATTR_MAX - 1)
|
|
|
|
/* NETLINK_GENERIC related info */
|
|
|
|
#define TASKSTATS_GENL_NAME "TASKSTATS"
|
|
#define TASKSTATS_GENL_VERSION 0x1
|
|
|
|
#endif /* _LINUX_TASKSTATS_H */
|