mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
blk-mq: move srcu from blk_mq_hw_ctx to request_queue
In case of BLK_MQ_F_BLOCKING, per-hctx srcu is used to protect dispatch critical area. However, this srcu instance stays at the end of hctx, and it often takes standalone cacheline, often cold. Inside srcu_read_lock() and srcu_read_unlock(), WRITE is always done on the indirect percpu variable which is allocated from heap instead of being embedded, srcu->srcu_idx is read only in srcu_read_lock(). It doesn't matter if srcu structure stays in hctx or request queue. So switch to per-request-queue srcu for protecting dispatch, and this way simplifies quiesce a lot, not mention quiesce is always done on the request queue wide. Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/20211203131534.3668411-3-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
2a904d0085
commit
704b914f15
@ -66,6 +66,7 @@ DEFINE_IDA(blk_queue_ida);
|
|||||||
* For queue allocation
|
* For queue allocation
|
||||||
*/
|
*/
|
||||||
struct kmem_cache *blk_requestq_cachep;
|
struct kmem_cache *blk_requestq_cachep;
|
||||||
|
struct kmem_cache *blk_requestq_srcu_cachep;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Controlling structure to kblockd
|
* Controlling structure to kblockd
|
||||||
@ -437,21 +438,27 @@ static void blk_timeout_work(struct work_struct *work)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
struct request_queue *blk_alloc_queue(int node_id)
|
struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
|
||||||
{
|
{
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
q = kmem_cache_alloc_node(blk_requestq_cachep,
|
q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
|
||||||
GFP_KERNEL | __GFP_ZERO, node_id);
|
GFP_KERNEL | __GFP_ZERO, node_id);
|
||||||
if (!q)
|
if (!q)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
if (alloc_srcu) {
|
||||||
|
blk_queue_flag_set(QUEUE_FLAG_HAS_SRCU, q);
|
||||||
|
if (init_srcu_struct(q->srcu) != 0)
|
||||||
|
goto fail_q;
|
||||||
|
}
|
||||||
|
|
||||||
q->last_merge = NULL;
|
q->last_merge = NULL;
|
||||||
|
|
||||||
q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
|
q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
|
||||||
if (q->id < 0)
|
if (q->id < 0)
|
||||||
goto fail_q;
|
goto fail_srcu;
|
||||||
|
|
||||||
ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, 0);
|
ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, 0);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -508,8 +515,11 @@ struct request_queue *blk_alloc_queue(int node_id)
|
|||||||
bioset_exit(&q->bio_split);
|
bioset_exit(&q->bio_split);
|
||||||
fail_id:
|
fail_id:
|
||||||
ida_simple_remove(&blk_queue_ida, q->id);
|
ida_simple_remove(&blk_queue_ida, q->id);
|
||||||
|
fail_srcu:
|
||||||
|
if (alloc_srcu)
|
||||||
|
cleanup_srcu_struct(q->srcu);
|
||||||
fail_q:
|
fail_q:
|
||||||
kmem_cache_free(blk_requestq_cachep, q);
|
kmem_cache_free(blk_get_queue_kmem_cache(alloc_srcu), q);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1301,6 +1311,9 @@ int __init blk_dev_init(void)
|
|||||||
sizeof_field(struct request, cmd_flags));
|
sizeof_field(struct request, cmd_flags));
|
||||||
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
|
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
|
||||||
sizeof_field(struct bio, bi_opf));
|
sizeof_field(struct bio, bi_opf));
|
||||||
|
BUILD_BUG_ON(ALIGN(offsetof(struct request_queue, srcu),
|
||||||
|
__alignof__(struct request_queue)) !=
|
||||||
|
sizeof(struct request_queue));
|
||||||
|
|
||||||
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
|
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
|
||||||
kblockd_workqueue = alloc_workqueue("kblockd",
|
kblockd_workqueue = alloc_workqueue("kblockd",
|
||||||
@ -1311,6 +1324,10 @@ int __init blk_dev_init(void)
|
|||||||
blk_requestq_cachep = kmem_cache_create("request_queue",
|
blk_requestq_cachep = kmem_cache_create("request_queue",
|
||||||
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
|
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
|
||||||
|
|
||||||
|
blk_requestq_srcu_cachep = kmem_cache_create("request_queue_srcu",
|
||||||
|
sizeof(struct request_queue) +
|
||||||
|
sizeof(struct srcu_struct), 0, SLAB_PANIC, NULL);
|
||||||
|
|
||||||
blk_debugfs_root = debugfs_create_dir("block", NULL);
|
blk_debugfs_root = debugfs_create_dir("block", NULL);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -36,8 +36,6 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
|
|||||||
struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
|
struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
|
||||||
kobj);
|
kobj);
|
||||||
|
|
||||||
if (hctx->flags & BLK_MQ_F_BLOCKING)
|
|
||||||
cleanup_srcu_struct(hctx->srcu);
|
|
||||||
blk_free_flush_queue(hctx->fq);
|
blk_free_flush_queue(hctx->fq);
|
||||||
sbitmap_free(&hctx->ctx_map);
|
sbitmap_free(&hctx->ctx_map);
|
||||||
free_cpumask_var(hctx->cpumask);
|
free_cpumask_var(hctx->cpumask);
|
||||||
|
@ -260,17 +260,9 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
|
|||||||
*/
|
*/
|
||||||
void blk_mq_wait_quiesce_done(struct request_queue *q)
|
void blk_mq_wait_quiesce_done(struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx;
|
if (blk_queue_has_srcu(q))
|
||||||
unsigned int i;
|
synchronize_srcu(q->srcu);
|
||||||
bool rcu = false;
|
else
|
||||||
|
|
||||||
queue_for_each_hw_ctx(q, hctx, i) {
|
|
||||||
if (hctx->flags & BLK_MQ_F_BLOCKING)
|
|
||||||
synchronize_srcu(hctx->srcu);
|
|
||||||
else
|
|
||||||
rcu = true;
|
|
||||||
}
|
|
||||||
if (rcu)
|
|
||||||
synchronize_rcu();
|
synchronize_rcu();
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_mq_wait_quiesce_done);
|
EXPORT_SYMBOL_GPL(blk_mq_wait_quiesce_done);
|
||||||
@ -3400,20 +3392,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
|
|
||||||
{
|
|
||||||
int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
|
|
||||||
|
|
||||||
BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu),
|
|
||||||
__alignof__(struct blk_mq_hw_ctx)) !=
|
|
||||||
sizeof(struct blk_mq_hw_ctx));
|
|
||||||
|
|
||||||
if (tag_set->flags & BLK_MQ_F_BLOCKING)
|
|
||||||
hw_ctx_size += sizeof(struct srcu_struct);
|
|
||||||
|
|
||||||
return hw_ctx_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int blk_mq_init_hctx(struct request_queue *q,
|
static int blk_mq_init_hctx(struct request_queue *q,
|
||||||
struct blk_mq_tag_set *set,
|
struct blk_mq_tag_set *set,
|
||||||
struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
|
struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
|
||||||
@ -3451,7 +3429,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
|
|||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
|
gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
|
||||||
|
|
||||||
hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node);
|
hctx = kzalloc_node(sizeof(struct blk_mq_hw_ctx), gfp, node);
|
||||||
if (!hctx)
|
if (!hctx)
|
||||||
goto fail_alloc_hctx;
|
goto fail_alloc_hctx;
|
||||||
|
|
||||||
@ -3493,8 +3471,6 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
|
|||||||
if (!hctx->fq)
|
if (!hctx->fq)
|
||||||
goto free_bitmap;
|
goto free_bitmap;
|
||||||
|
|
||||||
if (hctx->flags & BLK_MQ_F_BLOCKING)
|
|
||||||
init_srcu_struct(hctx->srcu);
|
|
||||||
blk_mq_hctx_kobj_init(hctx);
|
blk_mq_hctx_kobj_init(hctx);
|
||||||
|
|
||||||
return hctx;
|
return hctx;
|
||||||
@ -3830,7 +3806,7 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
|
|||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
q = blk_alloc_queue(set->numa_node);
|
q = blk_alloc_queue(set->numa_node, set->flags & BLK_MQ_F_BLOCKING);
|
||||||
if (!q)
|
if (!q)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
q->queuedata = queuedata;
|
q->queuedata = queuedata;
|
||||||
@ -3979,6 +3955,9 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
|||||||
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||||
struct request_queue *q)
|
struct request_queue *q)
|
||||||
{
|
{
|
||||||
|
WARN_ON_ONCE(blk_queue_has_srcu(q) !=
|
||||||
|
!!(set->flags & BLK_MQ_F_BLOCKING));
|
||||||
|
|
||||||
/* mark the queue as mq asap */
|
/* mark the queue as mq asap */
|
||||||
q->mq_ops = set->ops;
|
q->mq_ops = set->ops;
|
||||||
|
|
||||||
|
@ -385,9 +385,9 @@ do { \
|
|||||||
int srcu_idx; \
|
int srcu_idx; \
|
||||||
\
|
\
|
||||||
might_sleep(); \
|
might_sleep(); \
|
||||||
srcu_idx = srcu_read_lock((hctx)->srcu); \
|
srcu_idx = srcu_read_lock((hctx)->queue->srcu); \
|
||||||
(dispatch_ops); \
|
(dispatch_ops); \
|
||||||
srcu_read_unlock((hctx)->srcu, srcu_idx); \
|
srcu_read_unlock((hctx)->queue->srcu, srcu_idx); \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -735,7 +735,8 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
|
|||||||
{
|
{
|
||||||
struct request_queue *q = container_of(rcu_head, struct request_queue,
|
struct request_queue *q = container_of(rcu_head, struct request_queue,
|
||||||
rcu_head);
|
rcu_head);
|
||||||
kmem_cache_free(blk_requestq_cachep, q);
|
|
||||||
|
kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */
|
/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */
|
||||||
|
10
block/blk.h
10
block/blk.h
@ -27,6 +27,7 @@ struct blk_flush_queue {
|
|||||||
};
|
};
|
||||||
|
|
||||||
extern struct kmem_cache *blk_requestq_cachep;
|
extern struct kmem_cache *blk_requestq_cachep;
|
||||||
|
extern struct kmem_cache *blk_requestq_srcu_cachep;
|
||||||
extern struct kobj_type blk_queue_ktype;
|
extern struct kobj_type blk_queue_ktype;
|
||||||
extern struct ida blk_queue_ida;
|
extern struct ida blk_queue_ida;
|
||||||
|
|
||||||
@ -424,7 +425,14 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
|||||||
struct page *page, unsigned int len, unsigned int offset,
|
struct page *page, unsigned int len, unsigned int offset,
|
||||||
unsigned int max_sectors, bool *same_page);
|
unsigned int max_sectors, bool *same_page);
|
||||||
|
|
||||||
struct request_queue *blk_alloc_queue(int node_id);
|
static inline struct kmem_cache *blk_get_queue_kmem_cache(bool srcu)
|
||||||
|
{
|
||||||
|
if (srcu)
|
||||||
|
return blk_requestq_srcu_cachep;
|
||||||
|
return blk_requestq_cachep;
|
||||||
|
}
|
||||||
|
struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu);
|
||||||
|
|
||||||
int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
|
int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
|
||||||
|
|
||||||
int disk_alloc_events(struct gendisk *disk);
|
int disk_alloc_events(struct gendisk *disk);
|
||||||
|
@ -1338,7 +1338,7 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
|
|||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
struct gendisk *disk;
|
struct gendisk *disk;
|
||||||
|
|
||||||
q = blk_alloc_queue(node);
|
q = blk_alloc_queue(node, false);
|
||||||
if (!q)
|
if (!q)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
|
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
#include <linux/sbitmap.h>
|
#include <linux/sbitmap.h>
|
||||||
#include <linux/srcu.h>
|
|
||||||
#include <linux/lockdep.h>
|
#include <linux/lockdep.h>
|
||||||
#include <linux/scatterlist.h>
|
#include <linux/scatterlist.h>
|
||||||
#include <linux/prefetch.h>
|
#include <linux/prefetch.h>
|
||||||
@ -375,13 +374,6 @@ struct blk_mq_hw_ctx {
|
|||||||
* q->unused_hctx_list.
|
* q->unused_hctx_list.
|
||||||
*/
|
*/
|
||||||
struct list_head hctx_list;
|
struct list_head hctx_list;
|
||||||
|
|
||||||
/**
|
|
||||||
* @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
|
|
||||||
* blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
|
|
||||||
* blk_mq_hw_ctx_size().
|
|
||||||
*/
|
|
||||||
struct srcu_struct srcu[];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include <linux/percpu-refcount.h>
|
#include <linux/percpu-refcount.h>
|
||||||
#include <linux/blkzoned.h>
|
#include <linux/blkzoned.h>
|
||||||
#include <linux/sbitmap.h>
|
#include <linux/sbitmap.h>
|
||||||
|
#include <linux/srcu.h>
|
||||||
|
|
||||||
struct module;
|
struct module;
|
||||||
struct request_queue;
|
struct request_queue;
|
||||||
@ -373,11 +374,18 @@ struct request_queue {
|
|||||||
* devices that do not have multiple independent access ranges.
|
* devices that do not have multiple independent access ranges.
|
||||||
*/
|
*/
|
||||||
struct blk_independent_access_ranges *ia_ranges;
|
struct blk_independent_access_ranges *ia_ranges;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @srcu: Sleepable RCU. Use as lock when type of the request queue
|
||||||
|
* is blocking (BLK_MQ_F_BLOCKING). Must be the last member
|
||||||
|
*/
|
||||||
|
struct srcu_struct srcu[];
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Keep blk_queue_flag_name[] in sync with the definitions below */
|
/* Keep blk_queue_flag_name[] in sync with the definitions below */
|
||||||
#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */
|
#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */
|
||||||
#define QUEUE_FLAG_DYING 1 /* queue being torn down */
|
#define QUEUE_FLAG_DYING 1 /* queue being torn down */
|
||||||
|
#define QUEUE_FLAG_HAS_SRCU 2 /* SRCU is allocated */
|
||||||
#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */
|
#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */
|
||||||
#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */
|
#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */
|
||||||
#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */
|
#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */
|
||||||
@ -415,6 +423,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
|
|||||||
|
|
||||||
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
|
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
|
||||||
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
|
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
|
||||||
|
#define blk_queue_has_srcu(q) test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags)
|
||||||
#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
|
#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
|
||||||
#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
|
#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
|
||||||
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
|
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
|
||||||
|
Loading…
Reference in New Issue
Block a user