linux-next/block/blk-stat.h
Ming Lei 544fbd16a4 block: deactivate blk_stat timer in wbt_disable_default()
rwb_enabled() can't be changed when there is any inflight IO.

wbt_disable_default() may set rwb->wb_normal as zero, however the
blk_stat timer may still be pending, and the timer function will update
wrb->wb_normal again.

This patch introduces blk_stat_deactivate() and applies it in
wbt_disable_default(), then the following IO hang triggered when running
parted & switching io scheduler can be fixed:

[  369.937806] INFO: task parted:3645 blocked for more than 120 seconds.
[  369.938941]       Not tainted 4.20.0-rc6-00284-g906c801e5248 #498
[  369.939797] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  369.940768] parted          D    0  3645   3239 0x00000000
[  369.941500] Call Trace:
[  369.941874]  ? __schedule+0x6d9/0x74c
[  369.942392]  ? wbt_done+0x5e/0x5e
[  369.942864]  ? wbt_cleanup_cb+0x16/0x16
[  369.943404]  ? wbt_done+0x5e/0x5e
[  369.943874]  schedule+0x67/0x78
[  369.944298]  io_schedule+0x12/0x33
[  369.944771]  rq_qos_wait+0xb5/0x119
[  369.945193]  ? karma_partition+0x1c2/0x1c2
[  369.945691]  ? wbt_cleanup_cb+0x16/0x16
[  369.946151]  wbt_wait+0x85/0xb6
[  369.946540]  __rq_qos_throttle+0x23/0x2f
[  369.947014]  blk_mq_make_request+0xe6/0x40a
[  369.947518]  generic_make_request+0x192/0x2fe
[  369.948042]  ? submit_bio+0x103/0x11f
[  369.948486]  ? __radix_tree_lookup+0x35/0xb5
[  369.949011]  submit_bio+0x103/0x11f
[  369.949436]  ? blkg_lookup_slowpath+0x25/0x44
[  369.949962]  submit_bio_wait+0x53/0x7f
[  369.950469]  blkdev_issue_flush+0x8a/0xae
[  369.951032]  blkdev_fsync+0x2f/0x3a
[  369.951502]  do_fsync+0x2e/0x47
[  369.951887]  __x64_sys_fsync+0x10/0x13
[  369.952374]  do_syscall_64+0x89/0x149
[  369.952819]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  369.953492] RIP: 0033:0x7f95a1e729d4
[  369.953996] Code: Bad RIP value.
[  369.954456] RSP: 002b:00007ffdb570dd48 EFLAGS: 00000246 ORIG_RAX: 000000000000004a
[  369.955506] RAX: ffffffffffffffda RBX: 000055c2139c6be0 RCX: 00007f95a1e729d4
[  369.956389] RDX: 0000000000000001 RSI: 0000000000001261 RDI: 0000000000000004
[  369.957325] RBP: 0000000000000002 R08: 0000000000000000 R09: 000055c2139c6ce0
[  369.958199] R10: 0000000000000000 R11: 0000000000000246 R12: 000055c2139c0380
[  369.959143] R13: 0000000000000004 R14: 0000000000000100 R15: 0000000000000008

Cc: stable@vger.kernel.org
Cc: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2018-12-12 06:47:51 -07:00

172 lines
4.6 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef BLK_STAT_H
#define BLK_STAT_H
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/ktime.h>
#include <linux/rcupdate.h>
#include <linux/timer.h>
/**
* struct blk_stat_callback - Block statistics callback.
*
* A &struct blk_stat_callback is associated with a &struct request_queue. While
* @timer is active, that queue's request completion latencies are sorted into
* buckets by @bucket_fn and added to a per-cpu buffer, @cpu_stat. When the
* timer fires, @cpu_stat is flushed to @stat and @timer_fn is invoked.
*/
struct blk_stat_callback {
/*
* @list: RCU list of callbacks for a &struct request_queue.
*/
struct list_head list;
/**
* @timer: Timer for the next callback invocation.
*/
struct timer_list timer;
/**
* @cpu_stat: Per-cpu statistics buckets.
*/
struct blk_rq_stat __percpu *cpu_stat;
/**
* @bucket_fn: Given a request, returns which statistics bucket it
* should be accounted under. Return -1 for no bucket for this
* request.
*/
int (*bucket_fn)(const struct request *);
/**
* @buckets: Number of statistics buckets.
*/
unsigned int buckets;
/**
* @stat: Array of statistics buckets.
*/
struct blk_rq_stat *stat;
/**
* @fn: Callback function.
*/
void (*timer_fn)(struct blk_stat_callback *);
/**
* @data: Private pointer for the user.
*/
void *data;
struct rcu_head rcu;
};
struct blk_queue_stats *blk_alloc_queue_stats(void);
void blk_free_queue_stats(struct blk_queue_stats *);
void blk_stat_add(struct request *rq, u64 now);
/* record time/size info in request but not add a callback */
void blk_stat_enable_accounting(struct request_queue *q);
/**
* blk_stat_alloc_callback() - Allocate a block statistics callback.
* @timer_fn: Timer callback function.
* @bucket_fn: Bucket callback function.
* @buckets: Number of statistics buckets.
* @data: Value for the @data field of the &struct blk_stat_callback.
*
* See &struct blk_stat_callback for details on the callback functions.
*
* Return: &struct blk_stat_callback on success or NULL on ENOMEM.
*/
struct blk_stat_callback *
blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),
int (*bucket_fn)(const struct request *),
unsigned int buckets, void *data);
/**
* blk_stat_add_callback() - Add a block statistics callback to be run on a
* request queue.
* @q: The request queue.
* @cb: The callback.
*
* Note that a single &struct blk_stat_callback can only be added to a single
* &struct request_queue.
*/
void blk_stat_add_callback(struct request_queue *q,
struct blk_stat_callback *cb);
/**
* blk_stat_remove_callback() - Remove a block statistics callback from a
* request queue.
* @q: The request queue.
* @cb: The callback.
*
* When this returns, the callback is not running on any CPUs and will not be
* called again unless readded.
*/
void blk_stat_remove_callback(struct request_queue *q,
struct blk_stat_callback *cb);
/**
* blk_stat_free_callback() - Free a block statistics callback.
* @cb: The callback.
*
* @cb may be NULL, in which case this does nothing. If it is not NULL, @cb must
* not be associated with a request queue. I.e., if it was previously added with
* blk_stat_add_callback(), it must also have been removed since then with
* blk_stat_remove_callback().
*/
void blk_stat_free_callback(struct blk_stat_callback *cb);
/**
* blk_stat_is_active() - Check if a block statistics callback is currently
* gathering statistics.
* @cb: The callback.
*/
static inline bool blk_stat_is_active(struct blk_stat_callback *cb)
{
return timer_pending(&cb->timer);
}
/**
* blk_stat_activate_nsecs() - Gather block statistics during a time window in
* nanoseconds.
* @cb: The callback.
* @nsecs: Number of nanoseconds to gather statistics for.
*
* The timer callback will be called when the window expires.
*/
static inline void blk_stat_activate_nsecs(struct blk_stat_callback *cb,
u64 nsecs)
{
mod_timer(&cb->timer, jiffies + nsecs_to_jiffies(nsecs));
}
static inline void blk_stat_deactivate(struct blk_stat_callback *cb)
{
del_timer_sync(&cb->timer);
}
/**
* blk_stat_activate_msecs() - Gather block statistics during a time window in
* milliseconds.
* @cb: The callback.
* @msecs: Number of milliseconds to gather statistics for.
*
* The timer callback will be called when the window expires.
*/
static inline void blk_stat_activate_msecs(struct blk_stat_callback *cb,
unsigned int msecs)
{
mod_timer(&cb->timer, jiffies + msecs_to_jiffies(msecs));
}
void blk_rq_stat_add(struct blk_rq_stat *, u64);
void blk_rq_stat_sum(struct blk_rq_stat *, struct blk_rq_stat *);
void blk_rq_stat_init(struct blk_rq_stat *);
#endif