mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-04 04:06:26 +00:00
workqueue: Add pwq->stats[] and a monitoring script
Currently, the only way to peer into workqueue operations is through tracing. While possible, it isn't easy or convenient to monitor per-workqueue behaviors over time this way. Let's add pwq->stats[] that track relevant events and a drgn monitoring script - tools/workqueue/wq_monitor.py. It's arguable whether this needs to be configurable. However, it currently only has several counters and the runtime overhead shouldn't be noticeable given that they're on pwq's which are per-cpu on per-cpu workqueues and per-numa-node on unbound ones. Let's keep it simple for the time being. v2: Patch reordered to earlier with fewer fields. Field will be added back gradually. Help message improved. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Lai Jiangshan <jiangshanlai@gmail.com>
This commit is contained in:
parent
854f5cc5b7
commit
725e8ec59c
@ -348,6 +348,37 @@ Guidelines
|
|||||||
level of locality in wq operations and work item execution.
|
level of locality in wq operations and work item execution.
|
||||||
|
|
||||||
|
|
||||||
|
Monitoring
|
||||||
|
==========
|
||||||
|
|
||||||
|
Use tools/workqueue/wq_monitor.py to monitor workqueue operations: ::
|
||||||
|
|
||||||
|
$ tools/workqueue/wq_monitor.py events
|
||||||
|
total infl CMwake mayday rescued
|
||||||
|
events 18545 0 5 - -
|
||||||
|
events_highpri 8 0 0 - -
|
||||||
|
events_long 3 0 0 - -
|
||||||
|
events_unbound 38306 0 - - -
|
||||||
|
events_freezable 0 0 0 - -
|
||||||
|
events_power_efficient 29598 0 0 - -
|
||||||
|
events_freezable_power_ 10 0 0 - -
|
||||||
|
sock_diag_events 0 0 0 - -
|
||||||
|
|
||||||
|
total infl CMwake mayday rescued
|
||||||
|
events 18548 0 5 - -
|
||||||
|
events_highpri 8 0 0 - -
|
||||||
|
events_long 3 0 0 - -
|
||||||
|
events_unbound 38322 0 - - -
|
||||||
|
events_freezable 0 0 0 - -
|
||||||
|
events_power_efficient 29603 0 0 - -
|
||||||
|
events_freezable_power_ 10 0 0 - -
|
||||||
|
sock_diag_events 0 0 0 - -
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
See the command's help message for more info.
|
||||||
|
|
||||||
|
|
||||||
Debugging
|
Debugging
|
||||||
=========
|
=========
|
||||||
|
|
||||||
@ -387,6 +418,7 @@ the stack trace of the offending worker thread. ::
|
|||||||
The work item's function should be trivially visible in the stack
|
The work item's function should be trivially visible in the stack
|
||||||
trace.
|
trace.
|
||||||
|
|
||||||
|
|
||||||
Non-reentrance Conditions
|
Non-reentrance Conditions
|
||||||
=========================
|
=========================
|
||||||
|
|
||||||
|
@ -199,6 +199,20 @@ struct worker_pool {
|
|||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Per-pool_workqueue statistics. These can be monitored using
|
||||||
|
* tools/workqueue/wq_monitor.py.
|
||||||
|
*/
|
||||||
|
enum pool_workqueue_stats {
|
||||||
|
PWQ_STAT_STARTED, /* work items started execution */
|
||||||
|
PWQ_STAT_COMPLETED, /* work items completed execution */
|
||||||
|
PWQ_STAT_CM_WAKEUP, /* concurrency-management worker wakeups */
|
||||||
|
PWQ_STAT_MAYDAY, /* maydays to rescuer */
|
||||||
|
PWQ_STAT_RESCUED, /* linked work items executed by rescuer */
|
||||||
|
|
||||||
|
PWQ_NR_STATS,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
|
* The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
|
||||||
* of work_struct->data are used for flags and the remaining high bits
|
* of work_struct->data are used for flags and the remaining high bits
|
||||||
@ -236,6 +250,8 @@ struct pool_workqueue {
|
|||||||
struct list_head pwqs_node; /* WR: node on wq->pwqs */
|
struct list_head pwqs_node; /* WR: node on wq->pwqs */
|
||||||
struct list_head mayday_node; /* MD: node on wq->maydays */
|
struct list_head mayday_node; /* MD: node on wq->maydays */
|
||||||
|
|
||||||
|
u64 stats[PWQ_NR_STATS];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Release of unbound pwq is punted to system_wq. See put_pwq()
|
* Release of unbound pwq is punted to system_wq. See put_pwq()
|
||||||
* and pwq_unbound_release_workfn() for details. pool_workqueue
|
* and pwq_unbound_release_workfn() for details. pool_workqueue
|
||||||
@ -929,8 +945,10 @@ void wq_worker_sleeping(struct task_struct *task)
|
|||||||
}
|
}
|
||||||
|
|
||||||
pool->nr_running--;
|
pool->nr_running--;
|
||||||
if (need_more_worker(pool))
|
if (need_more_worker(pool)) {
|
||||||
|
worker->current_pwq->stats[PWQ_STAT_CM_WAKEUP]++;
|
||||||
wake_up_worker(pool);
|
wake_up_worker(pool);
|
||||||
|
}
|
||||||
raw_spin_unlock_irq(&pool->lock);
|
raw_spin_unlock_irq(&pool->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2165,6 +2183,7 @@ static void send_mayday(struct work_struct *work)
|
|||||||
get_pwq(pwq);
|
get_pwq(pwq);
|
||||||
list_add_tail(&pwq->mayday_node, &wq->maydays);
|
list_add_tail(&pwq->mayday_node, &wq->maydays);
|
||||||
wake_up_process(wq->rescuer->task);
|
wake_up_process(wq->rescuer->task);
|
||||||
|
pwq->stats[PWQ_STAT_MAYDAY]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2403,6 +2422,7 @@ __acquires(&pool->lock)
|
|||||||
* workqueues), so hiding them isn't a problem.
|
* workqueues), so hiding them isn't a problem.
|
||||||
*/
|
*/
|
||||||
lockdep_invariant_state(true);
|
lockdep_invariant_state(true);
|
||||||
|
pwq->stats[PWQ_STAT_STARTED]++;
|
||||||
trace_workqueue_execute_start(work);
|
trace_workqueue_execute_start(work);
|
||||||
worker->current_func(work);
|
worker->current_func(work);
|
||||||
/*
|
/*
|
||||||
@ -2410,6 +2430,7 @@ __acquires(&pool->lock)
|
|||||||
* point will only record its address.
|
* point will only record its address.
|
||||||
*/
|
*/
|
||||||
trace_workqueue_execute_end(work, worker->current_func);
|
trace_workqueue_execute_end(work, worker->current_func);
|
||||||
|
pwq->stats[PWQ_STAT_COMPLETED]++;
|
||||||
lock_map_release(&lockdep_map);
|
lock_map_release(&lockdep_map);
|
||||||
lock_map_release(&pwq->wq->lockdep_map);
|
lock_map_release(&pwq->wq->lockdep_map);
|
||||||
|
|
||||||
@ -2653,6 +2674,7 @@ static int rescuer_thread(void *__rescuer)
|
|||||||
if (first)
|
if (first)
|
||||||
pool->watchdog_ts = jiffies;
|
pool->watchdog_ts = jiffies;
|
||||||
move_linked_works(work, scheduled, &n);
|
move_linked_works(work, scheduled, &n);
|
||||||
|
pwq->stats[PWQ_STAT_RESCUED]++;
|
||||||
}
|
}
|
||||||
first = false;
|
first = false;
|
||||||
}
|
}
|
||||||
|
150
tools/workqueue/wq_monitor.py
Normal file
150
tools/workqueue/wq_monitor.py
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
#!/usr/bin/env drgn
|
||||||
|
#
|
||||||
|
# Copyright (C) 2023 Tejun Heo <tj@kernel.org>
|
||||||
|
# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
|
||||||
|
|
||||||
|
desc = """
|
||||||
|
This is a drgn script to monitor workqueues. For more info on drgn, visit
|
||||||
|
https://github.com/osandov/drgn.
|
||||||
|
|
||||||
|
total Total number of work items executed by the workqueue.
|
||||||
|
|
||||||
|
infl The number of currently in-flight work items.
|
||||||
|
|
||||||
|
CMwake The number of concurrency-management wake-ups while executing a
|
||||||
|
work item of the workqueue.
|
||||||
|
|
||||||
|
mayday The number of times the rescuer was requested while waiting for
|
||||||
|
new worker creation.
|
||||||
|
|
||||||
|
rescued The number of work items executed by the rescuer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import signal
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
|
||||||
|
import drgn
|
||||||
|
from drgn.helpers.linux.list import list_for_each_entry,list_empty
|
||||||
|
from drgn.helpers.linux.cpumask import for_each_possible_cpu
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(description=desc,
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter)
|
||||||
|
parser.add_argument('workqueue', metavar='REGEX', nargs='*',
|
||||||
|
help='Target workqueue name patterns (all if empty)')
|
||||||
|
parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
|
||||||
|
help='Monitoring interval (0 to print once and exit)')
|
||||||
|
parser.add_argument('-j', '--json', action='store_true',
|
||||||
|
help='Output in json')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
def err(s):
|
||||||
|
print(s, file=sys.stderr, flush=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
workqueues = prog['workqueues']
|
||||||
|
|
||||||
|
WQ_UNBOUND = prog['WQ_UNBOUND']
|
||||||
|
WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
|
||||||
|
|
||||||
|
PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
|
||||||
|
PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
|
||||||
|
PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
|
||||||
|
PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
|
||||||
|
PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
|
||||||
|
PWQ_NR_STATS = prog['PWQ_NR_STATS']
|
||||||
|
|
||||||
|
class WqStats:
|
||||||
|
def __init__(self, wq):
|
||||||
|
self.name = wq.name.string_().decode()
|
||||||
|
self.unbound = wq.flags & WQ_UNBOUND != 0
|
||||||
|
self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
|
||||||
|
self.stats = [0] * PWQ_NR_STATS
|
||||||
|
for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
|
||||||
|
for i in range(PWQ_NR_STATS):
|
||||||
|
self.stats[i] += int(pwq.stats[i])
|
||||||
|
|
||||||
|
def dict(self, now):
|
||||||
|
return { 'timestamp' : now,
|
||||||
|
'name' : self.name,
|
||||||
|
'unbound' : self.unbound,
|
||||||
|
'mem_reclaim' : self.mem_reclaim,
|
||||||
|
'started' : self.stats[PWQ_STAT_STARTED],
|
||||||
|
'completed' : self.stats[PWQ_STAT_COMPLETED],
|
||||||
|
'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
|
||||||
|
'mayday' : self.stats[PWQ_STAT_MAYDAY],
|
||||||
|
'rescued' : self.stats[PWQ_STAT_RESCUED], }
|
||||||
|
|
||||||
|
def table_header_str():
|
||||||
|
return f'{"":>24} {"total":>8} {"infl":>5} {"CMwake":>7} {"mayday":>7} {"rescued":>7}'
|
||||||
|
|
||||||
|
def table_row_str(self):
|
||||||
|
cm_wakeup = '-'
|
||||||
|
mayday = '-'
|
||||||
|
rescued = '-'
|
||||||
|
|
||||||
|
if not self.unbound:
|
||||||
|
cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP])
|
||||||
|
|
||||||
|
if self.mem_reclaim:
|
||||||
|
mayday = str(self.stats[PWQ_STAT_MAYDAY])
|
||||||
|
rescued = str(self.stats[PWQ_STAT_RESCUED])
|
||||||
|
|
||||||
|
out = f'{self.name[-24:]:24} ' \
|
||||||
|
f'{self.stats[PWQ_STAT_STARTED]:8} ' \
|
||||||
|
f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
|
||||||
|
f'{cm_wakeup:>7} ' \
|
||||||
|
f'{mayday:>7} ' \
|
||||||
|
f'{rescued:>7} '
|
||||||
|
return out.rstrip(':')
|
||||||
|
|
||||||
|
exit_req = False
|
||||||
|
|
||||||
|
def sigint_handler(signr, frame):
|
||||||
|
global exit_req
|
||||||
|
exit_req = True
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# handle args
|
||||||
|
table_fmt = not args.json
|
||||||
|
interval = args.interval
|
||||||
|
|
||||||
|
re_str = None
|
||||||
|
if args.workqueue:
|
||||||
|
for r in args.workqueue:
|
||||||
|
if re_str is None:
|
||||||
|
re_str = r
|
||||||
|
else:
|
||||||
|
re_str += '|' + r
|
||||||
|
|
||||||
|
filter_re = re.compile(re_str) if re_str else None
|
||||||
|
|
||||||
|
# monitoring loop
|
||||||
|
signal.signal(signal.SIGINT, sigint_handler)
|
||||||
|
|
||||||
|
while not exit_req:
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
if table_fmt:
|
||||||
|
print()
|
||||||
|
print(WqStats.table_header_str())
|
||||||
|
|
||||||
|
for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
|
||||||
|
stats = WqStats(wq)
|
||||||
|
if filter_re and not filter_re.search(stats.name):
|
||||||
|
continue
|
||||||
|
if table_fmt:
|
||||||
|
print(stats.table_row_str())
|
||||||
|
else:
|
||||||
|
print(stats.dict(now))
|
||||||
|
|
||||||
|
if interval == 0:
|
||||||
|
break
|
||||||
|
time.sleep(interval)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user