mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-04 04:06:26 +00:00
blk-mq: init hctx sched after update ctx and hctx mapping
Currently, when update nr_hw_queues, IO scheduler's init_hctx will be invoked before the mapping between ctx and hctx is adapted correctly by blk_mq_map_swqueue. The IO scheduler init_hctx (kyber) may depend on this mapping and get wrong result and panic finally. A simply way to fix this is that switch the IO scheduler to 'none' before update the nr_hw_queues, and then switch it back after update nr_hw_queues. blk_mq_sched_init_/exit_hctx are removed due to nobody use them any more. Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
fcedba42d9
commit
d48ece209f
@ -462,50 +462,6 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
|
|||||||
blk_mq_sched_free_tags(set, hctx, i);
|
blk_mq_sched_free_tags(set, hctx, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
|
||||||
unsigned int hctx_idx)
|
|
||||||
{
|
|
||||||
struct elevator_queue *e = q->elevator;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (!e)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
if (e->type->ops.mq.init_hctx) {
|
|
||||||
ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
|
|
||||||
if (ret) {
|
|
||||||
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
blk_mq_debugfs_register_sched_hctx(q, hctx);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
|
||||||
unsigned int hctx_idx)
|
|
||||||
{
|
|
||||||
struct elevator_queue *e = q->elevator;
|
|
||||||
|
|
||||||
if (!e)
|
|
||||||
return;
|
|
||||||
|
|
||||||
blk_mq_debugfs_unregister_sched_hctx(hctx);
|
|
||||||
|
|
||||||
if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
|
|
||||||
e->type->ops.mq.exit_hctx(hctx, hctx_idx);
|
|
||||||
hctx->sched_data = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx;
|
struct blk_mq_hw_ctx *hctx;
|
||||||
|
@ -28,11 +28,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
|
|||||||
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
|
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
|
||||||
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
|
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
|
||||||
|
|
||||||
int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
|
||||||
unsigned int hctx_idx);
|
|
||||||
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
|
||||||
unsigned int hctx_idx);
|
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
|
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
|
||||||
{
|
{
|
||||||
|
@ -2147,8 +2147,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
|
|||||||
if (set->ops->exit_request)
|
if (set->ops->exit_request)
|
||||||
set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
|
set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
|
||||||
|
|
||||||
blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
|
|
||||||
|
|
||||||
if (set->ops->exit_hctx)
|
if (set->ops->exit_hctx)
|
||||||
set->ops->exit_hctx(hctx, hctx_idx);
|
set->ops->exit_hctx(hctx, hctx_idx);
|
||||||
|
|
||||||
@ -2216,12 +2214,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
|||||||
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
|
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
|
||||||
goto free_bitmap;
|
goto free_bitmap;
|
||||||
|
|
||||||
if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
|
|
||||||
goto exit_hctx;
|
|
||||||
|
|
||||||
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
|
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
|
||||||
if (!hctx->fq)
|
if (!hctx->fq)
|
||||||
goto sched_exit_hctx;
|
goto exit_hctx;
|
||||||
|
|
||||||
if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
|
if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
|
||||||
goto free_fq;
|
goto free_fq;
|
||||||
@ -2235,8 +2230,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
|||||||
|
|
||||||
free_fq:
|
free_fq:
|
||||||
kfree(hctx->fq);
|
kfree(hctx->fq);
|
||||||
sched_exit_hctx:
|
|
||||||
blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
|
|
||||||
exit_hctx:
|
exit_hctx:
|
||||||
if (set->ops->exit_hctx)
|
if (set->ops->exit_hctx)
|
||||||
set->ops->exit_hctx(hctx, hctx_idx);
|
set->ops->exit_hctx(hctx, hctx_idx);
|
||||||
@ -2898,10 +2891,81 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* request_queue and elevator_type pair.
|
||||||
|
* It is just used by __blk_mq_update_nr_hw_queues to cache
|
||||||
|
* the elevator_type associated with a request_queue.
|
||||||
|
*/
|
||||||
|
struct blk_mq_qe_pair {
|
||||||
|
struct list_head node;
|
||||||
|
struct request_queue *q;
|
||||||
|
struct elevator_type *type;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cache the elevator_type in qe pair list and switch the
|
||||||
|
* io scheduler to 'none'
|
||||||
|
*/
|
||||||
|
static bool blk_mq_elv_switch_none(struct list_head *head,
|
||||||
|
struct request_queue *q)
|
||||||
|
{
|
||||||
|
struct blk_mq_qe_pair *qe;
|
||||||
|
|
||||||
|
if (!q->elevator)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
|
||||||
|
if (!qe)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&qe->node);
|
||||||
|
qe->q = q;
|
||||||
|
qe->type = q->elevator->type;
|
||||||
|
list_add(&qe->node, head);
|
||||||
|
|
||||||
|
mutex_lock(&q->sysfs_lock);
|
||||||
|
/*
|
||||||
|
* After elevator_switch_mq, the previous elevator_queue will be
|
||||||
|
* released by elevator_release. The reference of the io scheduler
|
||||||
|
* module get by elevator_get will also be put. So we need to get
|
||||||
|
* a reference of the io scheduler module here to prevent it to be
|
||||||
|
* removed.
|
||||||
|
*/
|
||||||
|
__module_get(qe->type->elevator_owner);
|
||||||
|
elevator_switch_mq(q, NULL);
|
||||||
|
mutex_unlock(&q->sysfs_lock);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blk_mq_elv_switch_back(struct list_head *head,
|
||||||
|
struct request_queue *q)
|
||||||
|
{
|
||||||
|
struct blk_mq_qe_pair *qe;
|
||||||
|
struct elevator_type *t = NULL;
|
||||||
|
|
||||||
|
list_for_each_entry(qe, head, node)
|
||||||
|
if (qe->q == q) {
|
||||||
|
t = qe->type;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!t)
|
||||||
|
return;
|
||||||
|
|
||||||
|
list_del(&qe->node);
|
||||||
|
kfree(qe);
|
||||||
|
|
||||||
|
mutex_lock(&q->sysfs_lock);
|
||||||
|
elevator_switch_mq(q, t);
|
||||||
|
mutex_unlock(&q->sysfs_lock);
|
||||||
|
}
|
||||||
|
|
||||||
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||||
int nr_hw_queues)
|
int nr_hw_queues)
|
||||||
{
|
{
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
|
LIST_HEAD(head);
|
||||||
|
|
||||||
lockdep_assert_held(&set->tag_list_lock);
|
lockdep_assert_held(&set->tag_list_lock);
|
||||||
|
|
||||||
@ -2912,6 +2976,14 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
|||||||
|
|
||||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||||
blk_mq_freeze_queue(q);
|
blk_mq_freeze_queue(q);
|
||||||
|
/*
|
||||||
|
* Switch IO scheduler to 'none', cleaning up the data associated
|
||||||
|
* with the previous scheduler. We will switch back once we are done
|
||||||
|
* updating the new sw to hw queue mappings.
|
||||||
|
*/
|
||||||
|
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||||
|
if (!blk_mq_elv_switch_none(&head, q))
|
||||||
|
goto switch_back;
|
||||||
|
|
||||||
set->nr_hw_queues = nr_hw_queues;
|
set->nr_hw_queues = nr_hw_queues;
|
||||||
blk_mq_update_queue_map(set);
|
blk_mq_update_queue_map(set);
|
||||||
@ -2920,6 +2992,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
|||||||
blk_mq_queue_reinit(q);
|
blk_mq_queue_reinit(q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch_back:
|
||||||
|
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||||
|
blk_mq_elv_switch_back(&head, q);
|
||||||
|
|
||||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||||
blk_mq_unfreeze_queue(q);
|
blk_mq_unfreeze_queue(q);
|
||||||
}
|
}
|
||||||
|
@ -234,6 +234,8 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq
|
|||||||
|
|
||||||
int elevator_init(struct request_queue *);
|
int elevator_init(struct request_queue *);
|
||||||
int elevator_init_mq(struct request_queue *q);
|
int elevator_init_mq(struct request_queue *q);
|
||||||
|
int elevator_switch_mq(struct request_queue *q,
|
||||||
|
struct elevator_type *new_e);
|
||||||
void elevator_exit(struct request_queue *, struct elevator_queue *);
|
void elevator_exit(struct request_queue *, struct elevator_queue *);
|
||||||
int elv_register_queue(struct request_queue *q);
|
int elv_register_queue(struct request_queue *q);
|
||||||
void elv_unregister_queue(struct request_queue *q);
|
void elv_unregister_queue(struct request_queue *q);
|
||||||
|
@ -933,16 +933,13 @@ void elv_unregister(struct elevator_type *e)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(elv_unregister);
|
EXPORT_SYMBOL_GPL(elv_unregister);
|
||||||
|
|
||||||
static int elevator_switch_mq(struct request_queue *q,
|
int elevator_switch_mq(struct request_queue *q,
|
||||||
struct elevator_type *new_e)
|
struct elevator_type *new_e)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
lockdep_assert_held(&q->sysfs_lock);
|
lockdep_assert_held(&q->sysfs_lock);
|
||||||
|
|
||||||
blk_mq_freeze_queue(q);
|
|
||||||
blk_mq_quiesce_queue(q);
|
|
||||||
|
|
||||||
if (q->elevator) {
|
if (q->elevator) {
|
||||||
if (q->elevator->registered)
|
if (q->elevator->registered)
|
||||||
elv_unregister_queue(q);
|
elv_unregister_queue(q);
|
||||||
@ -968,8 +965,6 @@ static int elevator_switch_mq(struct request_queue *q,
|
|||||||
blk_add_trace_msg(q, "elv switch: none");
|
blk_add_trace_msg(q, "elv switch: none");
|
||||||
|
|
||||||
out:
|
out:
|
||||||
blk_mq_unquiesce_queue(q);
|
|
||||||
blk_mq_unfreeze_queue(q);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1021,8 +1016,17 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
|
|||||||
|
|
||||||
lockdep_assert_held(&q->sysfs_lock);
|
lockdep_assert_held(&q->sysfs_lock);
|
||||||
|
|
||||||
if (q->mq_ops)
|
if (q->mq_ops) {
|
||||||
return elevator_switch_mq(q, new_e);
|
blk_mq_freeze_queue(q);
|
||||||
|
blk_mq_quiesce_queue(q);
|
||||||
|
|
||||||
|
err = elevator_switch_mq(q, new_e);
|
||||||
|
|
||||||
|
blk_mq_unquiesce_queue(q);
|
||||||
|
blk_mq_unfreeze_queue(q);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Turn on BYPASS and drain all requests w/ elevator private data.
|
* Turn on BYPASS and drain all requests w/ elevator private data.
|
||||||
|
Loading…
Reference in New Issue
Block a user