mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-29 09:13:38 +00:00
blk-mq-sched: add framework for MQ capable IO schedulers
This adds a set of hooks that intercepts the blk-mq path of allocating/inserting/issuing/completing requests, allowing us to develop a scheduler within that framework. We reuse the existing elevator scheduler API on the registration side, but augment that with the scheduler flagging support for the blk-mq interfce, and with a separate set of ops hooks for MQ devices. We split driver and scheduler tags, so we can run the scheduling independently of device queue depth. Signed-off-by: Jens Axboe <axboe@fb.com> Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> Reviewed-by: Omar Sandoval <osandov@fb.com>
This commit is contained in:
parent
2af8cbe305
commit
bd166ef183
@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
|
||||
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
|
||||
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
|
||||
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
|
||||
blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
|
||||
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
|
||||
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
|
||||
badblocks.o partitions/
|
||||
|
||||
|
@ -1223,7 +1223,11 @@ int blkcg_activate_policy(struct request_queue *q,
|
||||
if (blkcg_policy_enabled(q, pol))
|
||||
return 0;
|
||||
|
||||
blk_queue_bypass_start(q);
|
||||
if (q->mq_ops) {
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
} else
|
||||
blk_queue_bypass_start(q);
|
||||
pd_prealloc:
|
||||
if (!pd_prealloc) {
|
||||
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
|
||||
@ -1261,7 +1265,10 @@ int blkcg_activate_policy(struct request_queue *q,
|
||||
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
out_bypass_end:
|
||||
blk_queue_bypass_end(q);
|
||||
if (q->mq_ops)
|
||||
blk_mq_unfreeze_queue(q);
|
||||
else
|
||||
blk_queue_bypass_end(q);
|
||||
if (pd_prealloc)
|
||||
pol->pd_free_fn(pd_prealloc);
|
||||
return ret;
|
||||
@ -1284,7 +1291,12 @@ void blkcg_deactivate_policy(struct request_queue *q,
|
||||
if (!blkcg_policy_enabled(q, pol))
|
||||
return;
|
||||
|
||||
blk_queue_bypass_start(q);
|
||||
if (q->mq_ops) {
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
} else
|
||||
blk_queue_bypass_start(q);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
|
||||
__clear_bit(pol->plid, q->blkcg_pols);
|
||||
@ -1304,7 +1316,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
|
||||
}
|
||||
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
blk_queue_bypass_end(q);
|
||||
|
||||
if (q->mq_ops)
|
||||
blk_mq_unfreeze_queue(q);
|
||||
else
|
||||
blk_queue_bypass_end(q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
|
||||
|
||||
|
@ -39,6 +39,7 @@
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-wbt.h"
|
||||
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
|
||||
@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
|
||||
rq->cmd = rq->__cmd;
|
||||
rq->cmd_len = BLK_MAX_CDB;
|
||||
rq->tag = -1;
|
||||
rq->internal_tag = -1;
|
||||
rq->start_time = jiffies;
|
||||
set_start_time_ns(rq);
|
||||
rq->part = NULL;
|
||||
@ -2127,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
|
||||
if (q->mq_ops) {
|
||||
if (blk_queue_io_stat(q))
|
||||
blk_account_io_start(rq, true);
|
||||
blk_mq_insert_request(rq, false, true, false);
|
||||
blk_mq_sched_insert_request(rq, false, true, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/sched/sysctl.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
/*
|
||||
* for max sense size
|
||||
@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
|
||||
* be reused after dying flag is set
|
||||
*/
|
||||
if (q->mq_ops) {
|
||||
blk_mq_insert_request(rq, at_head, true, false);
|
||||
blk_mq_sched_insert_request(rq, at_head, true, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -74,6 +74,7 @@
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
/* FLUSH/FUA sequences */
|
||||
enum {
|
||||
@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
|
||||
* the comment in flush_end_io().
|
||||
*/
|
||||
spin_lock_irqsave(&fq->mq_flush_lock, flags);
|
||||
if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
|
||||
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
|
||||
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq)
|
||||
*/
|
||||
if ((policy & REQ_FSEQ_DATA) &&
|
||||
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
|
||||
if (q->mq_ops) {
|
||||
blk_mq_insert_request(rq, false, true, false);
|
||||
} else
|
||||
if (q->mq_ops)
|
||||
blk_mq_sched_insert_request(rq, false, true, false);
|
||||
else
|
||||
list_add_tail(&rq->queuelist, &q->queue_head);
|
||||
return;
|
||||
}
|
||||
|
@ -43,7 +43,9 @@ static void ioc_exit_icq(struct io_cq *icq)
|
||||
if (icq->flags & ICQ_EXITED)
|
||||
return;
|
||||
|
||||
if (et->ops.sq.elevator_exit_icq_fn)
|
||||
if (et->uses_mq && et->ops.mq.exit_icq)
|
||||
et->ops.mq.exit_icq(icq);
|
||||
else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
|
||||
et->ops.sq.elevator_exit_icq_fn(icq);
|
||||
|
||||
icq->flags |= ICQ_EXITED;
|
||||
@ -383,7 +385,9 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
|
||||
if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
|
||||
hlist_add_head(&icq->ioc_node, &ioc->icq_list);
|
||||
list_add(&icq->q_node, &q->icq_list);
|
||||
if (et->ops.sq.elevator_init_icq_fn)
|
||||
if (et->uses_mq && et->ops.mq.init_icq)
|
||||
et->ops.mq.init_icq(icq);
|
||||
else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
|
||||
et->ops.sq.elevator_init_icq_fn(icq);
|
||||
} else {
|
||||
kmem_cache_free(et->icq_cache, icq);
|
||||
|
@ -763,7 +763,7 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e->type->ops.sq.elevator_allow_rq_merge_fn)
|
||||
if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn)
|
||||
if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
|
||||
return 0;
|
||||
|
||||
|
368
block/blk-mq-sched.c
Normal file
368
block/blk-mq-sched.c
Normal file
@ -0,0 +1,368 @@
|
||||
/*
|
||||
* blk-mq scheduling framework
|
||||
*
|
||||
* Copyright (C) 2016 Jens Axboe
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-wbt.h"
|
||||
|
||||
void blk_mq_sched_free_hctx_data(struct request_queue *q,
|
||||
void (*exit)(struct blk_mq_hw_ctx *))
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (exit && hctx->sched_data)
|
||||
exit(hctx);
|
||||
kfree(hctx->sched_data);
|
||||
hctx->sched_data = NULL;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
|
||||
|
||||
int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
|
||||
int (*init)(struct blk_mq_hw_ctx *),
|
||||
void (*exit)(struct blk_mq_hw_ctx *))
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
|
||||
if (!hctx->sched_data) {
|
||||
ret = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (init) {
|
||||
ret = init(hctx);
|
||||
if (ret) {
|
||||
/*
|
||||
* We don't want to give exit() a partially
|
||||
* initialized sched_data. init() must clean up
|
||||
* if it fails.
|
||||
*/
|
||||
kfree(hctx->sched_data);
|
||||
hctx->sched_data = NULL;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
error:
|
||||
blk_mq_sched_free_hctx_data(q, exit);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
|
||||
|
||||
static void __blk_mq_sched_assign_ioc(struct request_queue *q,
|
||||
struct request *rq, struct io_context *ioc)
|
||||
{
|
||||
struct io_cq *icq;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
icq = ioc_lookup_icq(ioc, q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
if (!icq) {
|
||||
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
|
||||
if (!icq)
|
||||
return;
|
||||
}
|
||||
|
||||
rq->elv.icq = icq;
|
||||
if (!blk_mq_sched_get_rq_priv(q, rq)) {
|
||||
rq->rq_flags |= RQF_ELVPRIV;
|
||||
get_io_context(icq->ioc);
|
||||
return;
|
||||
}
|
||||
|
||||
rq->elv.icq = NULL;
|
||||
}
|
||||
|
||||
static void blk_mq_sched_assign_ioc(struct request_queue *q,
|
||||
struct request *rq, struct bio *bio)
|
||||
{
|
||||
struct io_context *ioc;
|
||||
|
||||
ioc = rq_ioc(bio);
|
||||
if (ioc)
|
||||
__blk_mq_sched_assign_ioc(q, rq, ioc);
|
||||
}
|
||||
|
||||
struct request *blk_mq_sched_get_request(struct request_queue *q,
|
||||
struct bio *bio,
|
||||
unsigned int op,
|
||||
struct blk_mq_alloc_data *data)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct blk_mq_ctx *ctx;
|
||||
struct request *rq;
|
||||
const bool is_flush = op & (REQ_PREFLUSH | REQ_FUA);
|
||||
|
||||
blk_queue_enter_live(q);
|
||||
ctx = blk_mq_get_ctx(q);
|
||||
hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
|
||||
|
||||
if (e) {
|
||||
data->flags |= BLK_MQ_REQ_INTERNAL;
|
||||
|
||||
/*
|
||||
* Flush requests are special and go directly to the
|
||||
* dispatch list.
|
||||
*/
|
||||
if (!is_flush && e->type->ops.mq.get_request) {
|
||||
rq = e->type->ops.mq.get_request(q, op, data);
|
||||
if (rq)
|
||||
rq->rq_flags |= RQF_QUEUED;
|
||||
} else
|
||||
rq = __blk_mq_alloc_request(data, op);
|
||||
} else {
|
||||
rq = __blk_mq_alloc_request(data, op);
|
||||
data->hctx->tags->rqs[rq->tag] = rq;
|
||||
}
|
||||
|
||||
if (rq) {
|
||||
if (!is_flush) {
|
||||
rq->elv.icq = NULL;
|
||||
if (e && e->type->icq_cache)
|
||||
blk_mq_sched_assign_ioc(q, rq, bio);
|
||||
}
|
||||
data->hctx->queued++;
|
||||
return rq;
|
||||
}
|
||||
|
||||
blk_queue_exit(q);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void blk_mq_sched_put_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (rq->rq_flags & RQF_ELVPRIV) {
|
||||
blk_mq_sched_put_rq_priv(rq->q, rq);
|
||||
if (rq->elv.icq) {
|
||||
put_io_context(rq->elv.icq->ioc);
|
||||
rq->elv.icq = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
|
||||
e->type->ops.mq.put_request(rq);
|
||||
else
|
||||
blk_mq_finish_request(rq);
|
||||
}
|
||||
|
||||
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct elevator_queue *e = hctx->queue->elevator;
|
||||
LIST_HEAD(rq_list);
|
||||
|
||||
if (unlikely(blk_mq_hctx_stopped(hctx)))
|
||||
return;
|
||||
|
||||
hctx->run++;
|
||||
|
||||
/*
|
||||
* If we have previous entries on our dispatch list, grab them first for
|
||||
* more fair dispatch.
|
||||
*/
|
||||
if (!list_empty_careful(&hctx->dispatch)) {
|
||||
spin_lock(&hctx->lock);
|
||||
if (!list_empty(&hctx->dispatch))
|
||||
list_splice_init(&hctx->dispatch, &rq_list);
|
||||
spin_unlock(&hctx->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Only ask the scheduler for requests, if we didn't have residual
|
||||
* requests from the dispatch list. This is to avoid the case where
|
||||
* we only ever dispatch a fraction of the requests available because
|
||||
* of low device queue depth. Once we pull requests out of the IO
|
||||
* scheduler, we can no longer merge or sort them. So it's best to
|
||||
* leave them there for as long as we can. Mark the hw queue as
|
||||
* needing a restart in that case.
|
||||
*/
|
||||
if (list_empty(&rq_list)) {
|
||||
if (e && e->type->ops.mq.dispatch_requests)
|
||||
e->type->ops.mq.dispatch_requests(hctx, &rq_list);
|
||||
else
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
} else
|
||||
blk_mq_sched_mark_restart(hctx);
|
||||
|
||||
blk_mq_dispatch_rq_list(hctx, &rq_list);
|
||||
}
|
||||
|
||||
void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *rq_list,
|
||||
struct request *(*get_rq)(struct blk_mq_hw_ctx *))
|
||||
{
|
||||
do {
|
||||
struct request *rq;
|
||||
|
||||
rq = get_rq(hctx);
|
||||
if (!rq)
|
||||
break;
|
||||
|
||||
list_add_tail(&rq->queuelist, rq_list);
|
||||
} while (1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
|
||||
|
||||
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct request *rq;
|
||||
int ret;
|
||||
|
||||
ret = elv_merge(q, &rq, bio);
|
||||
if (ret == ELEVATOR_BACK_MERGE) {
|
||||
if (!blk_mq_sched_allow_merge(q, rq, bio))
|
||||
return false;
|
||||
if (bio_attempt_back_merge(q, rq, bio)) {
|
||||
if (!attempt_back_merge(q, rq))
|
||||
elv_merged_request(q, rq, ret);
|
||||
return true;
|
||||
}
|
||||
} else if (ret == ELEVATOR_FRONT_MERGE) {
|
||||
if (!blk_mq_sched_allow_merge(q, rq, bio))
|
||||
return false;
|
||||
if (bio_attempt_front_merge(q, rq, bio)) {
|
||||
if (!attempt_front_merge(q, rq))
|
||||
elv_merged_request(q, rq, ret);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
|
||||
|
||||
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e->type->ops.mq.bio_merge) {
|
||||
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
blk_mq_put_ctx(ctx);
|
||||
return e->type->ops.mq.bio_merge(hctx, bio);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
|
||||
|
||||
void blk_mq_sched_request_inserted(struct request *rq)
|
||||
{
|
||||
trace_block_rq_insert(rq->q, rq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
|
||||
|
||||
bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq)
|
||||
{
|
||||
if (rq->tag == -1) {
|
||||
rq->rq_flags |= RQF_SORTED;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we already have a real request tag, send directly to
|
||||
* the dispatch list.
|
||||
*/
|
||||
spin_lock(&hctx->lock);
|
||||
list_add(&rq->queuelist, &hctx->dispatch);
|
||||
spin_unlock(&hctx->lock);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert);
|
||||
|
||||
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
|
||||
struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
if (hctx->sched_tags) {
|
||||
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
|
||||
blk_mq_free_rq_map(hctx->sched_tags);
|
||||
hctx->sched_tags = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int blk_mq_sched_setup(struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_tag_set *set = q->tag_set;
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int ret, i;
|
||||
|
||||
/*
|
||||
* Default to 256, since we don't split into sync/async like the
|
||||
* old code did. Additionally, this is a per-hw queue depth.
|
||||
*/
|
||||
q->nr_requests = 2 * BLKDEV_MAX_RQ;
|
||||
|
||||
/*
|
||||
* We're switching to using an IO scheduler, so setup the hctx
|
||||
* scheduler tags and switch the request map from the regular
|
||||
* tags to scheduler tags. First allocate what we need, so we
|
||||
* can safely fail and fallback, if needed.
|
||||
*/
|
||||
ret = 0;
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
|
||||
if (!hctx->sched_tags) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we failed, free what we did allocate
|
||||
*/
|
||||
if (ret) {
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (!hctx->sched_tags)
|
||||
continue;
|
||||
blk_mq_sched_free_tags(set, hctx, i);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blk_mq_sched_teardown(struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_tag_set *set = q->tag_set;
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i)
|
||||
blk_mq_sched_free_tags(set, hctx, i);
|
||||
}
|
170
block/blk-mq-sched.h
Normal file
170
block/blk-mq-sched.h
Normal file
@ -0,0 +1,170 @@
|
||||
#ifndef BLK_MQ_SCHED_H
|
||||
#define BLK_MQ_SCHED_H
|
||||
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
|
||||
int (*init)(struct blk_mq_hw_ctx *),
|
||||
void (*exit)(struct blk_mq_hw_ctx *));
|
||||
|
||||
void blk_mq_sched_free_hctx_data(struct request_queue *q,
|
||||
void (*exit)(struct blk_mq_hw_ctx *));
|
||||
|
||||
struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
|
||||
void blk_mq_sched_put_request(struct request *rq);
|
||||
|
||||
void blk_mq_sched_request_inserted(struct request *rq);
|
||||
bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq);
|
||||
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio);
|
||||
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
|
||||
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
|
||||
|
||||
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
|
||||
void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *rq_list,
|
||||
struct request *(*get_rq)(struct blk_mq_hw_ctx *));
|
||||
|
||||
int blk_mq_sched_setup(struct request_queue *q);
|
||||
void blk_mq_sched_teardown(struct request_queue *q);
|
||||
|
||||
static inline bool
|
||||
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
|
||||
return false;
|
||||
|
||||
return __blk_mq_sched_bio_merge(q, bio);
|
||||
}
|
||||
|
||||
static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e && e->type->ops.mq.get_rq_priv)
|
||||
return e->type->ops.mq.get_rq_priv(q, rq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e && e->type->ops.mq.put_rq_priv)
|
||||
e->type->ops.mq.put_rq_priv(q, rq);
|
||||
}
|
||||
|
||||
static inline void
|
||||
blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue,
|
||||
bool async)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
if (e && e->type->ops.mq.insert_requests) {
|
||||
LIST_HEAD(list);
|
||||
|
||||
list_add(&rq->queuelist, &list);
|
||||
e->type->ops.mq.insert_requests(hctx, &list, at_head);
|
||||
} else {
|
||||
spin_lock(&ctx->lock);
|
||||
__blk_mq_insert_request(hctx, rq, at_head);
|
||||
spin_unlock(&ctx->lock);
|
||||
}
|
||||
|
||||
if (run_queue)
|
||||
blk_mq_run_hw_queue(hctx, async);
|
||||
}
|
||||
|
||||
static inline void
|
||||
blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_ctx *ctx,
|
||||
struct list_head *list, bool run_queue_async)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
struct elevator_queue *e = hctx->queue->elevator;
|
||||
|
||||
if (e && e->type->ops.mq.insert_requests)
|
||||
e->type->ops.mq.insert_requests(hctx, list, false);
|
||||
else
|
||||
blk_mq_insert_requests(hctx, ctx, list);
|
||||
|
||||
blk_mq_run_hw_queue(hctx, run_queue_async);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e && e->type->ops.mq.allow_merge)
|
||||
return e->type->ops.mq.allow_merge(q, rq, bio);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void
|
||||
blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
|
||||
{
|
||||
struct elevator_queue *e = hctx->queue->elevator;
|
||||
|
||||
if (e && e->type->ops.mq.completed_request)
|
||||
e->type->ops.mq.completed_request(hctx, rq);
|
||||
|
||||
BUG_ON(rq->internal_tag == -1);
|
||||
|
||||
blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag);
|
||||
|
||||
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
|
||||
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void blk_mq_sched_started_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e && e->type->ops.mq.started_request)
|
||||
e->type->ops.mq.started_request(rq);
|
||||
}
|
||||
|
||||
static inline void blk_mq_sched_requeue_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e && e->type->ops.mq.requeue_request)
|
||||
e->type->ops.mq.requeue_request(rq);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct elevator_queue *e = hctx->queue->elevator;
|
||||
|
||||
if (e && e->type->ops.mq.has_work)
|
||||
return e->type->ops.mq.has_work(hctx);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
}
|
||||
|
||||
#endif
|
@ -231,6 +231,14 @@ static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t blk_mq_hw_sysfs_sched_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
|
||||
{
|
||||
if (hctx->sched_tags)
|
||||
return blk_mq_tag_sysfs_show(hctx->sched_tags, page);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
|
||||
{
|
||||
return blk_mq_tag_sysfs_show(hctx->tags, page);
|
||||
@ -345,6 +353,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
|
||||
.attr = {.name = "pending", .mode = S_IRUGO },
|
||||
.show = blk_mq_hw_sysfs_rq_list_show,
|
||||
};
|
||||
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_sched_tags = {
|
||||
.attr = {.name = "sched_tags", .mode = S_IRUGO },
|
||||
.show = blk_mq_hw_sysfs_sched_tags_show,
|
||||
};
|
||||
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
|
||||
.attr = {.name = "tags", .mode = S_IRUGO },
|
||||
.show = blk_mq_hw_sysfs_tags_show,
|
||||
@ -370,6 +382,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
|
||||
&blk_mq_hw_sysfs_dispatched.attr,
|
||||
&blk_mq_hw_sysfs_pending.attr,
|
||||
&blk_mq_hw_sysfs_tags.attr,
|
||||
&blk_mq_hw_sysfs_sched_tags.attr,
|
||||
&blk_mq_hw_sysfs_cpus.attr,
|
||||
&blk_mq_hw_sysfs_active.attr,
|
||||
&blk_mq_hw_sysfs_poll.attr,
|
||||
|
318
block/blk-mq.c
318
block/blk-mq.c
@ -32,6 +32,7 @@
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-stat.h"
|
||||
#include "blk-wbt.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
static DEFINE_MUTEX(all_q_mutex);
|
||||
static LIST_HEAD(all_q_list);
|
||||
@ -41,7 +42,9 @@ static LIST_HEAD(all_q_list);
|
||||
*/
|
||||
static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
return sbitmap_any_bit_set(&hctx->ctx_map);
|
||||
return sbitmap_any_bit_set(&hctx->ctx_map) ||
|
||||
!list_empty_careful(&hctx->dispatch) ||
|
||||
blk_mq_sched_has_work(hctx);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -223,15 +226,23 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
|
||||
|
||||
tag = blk_mq_get_tag(data);
|
||||
if (tag != BLK_MQ_TAG_FAIL) {
|
||||
rq = data->hctx->tags->static_rqs[tag];
|
||||
struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
|
||||
|
||||
rq = tags->static_rqs[tag];
|
||||
|
||||
if (blk_mq_tag_busy(data->hctx)) {
|
||||
rq->rq_flags = RQF_MQ_INFLIGHT;
|
||||
atomic_inc(&data->hctx->nr_active);
|
||||
}
|
||||
|
||||
rq->tag = tag;
|
||||
data->hctx->tags->rqs[tag] = rq;
|
||||
if (data->flags & BLK_MQ_REQ_INTERNAL) {
|
||||
rq->tag = -1;
|
||||
rq->internal_tag = tag;
|
||||
} else {
|
||||
rq->tag = tag;
|
||||
rq->internal_tag = -1;
|
||||
}
|
||||
|
||||
blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
|
||||
return rq;
|
||||
}
|
||||
@ -243,26 +254,21 @@ EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
|
||||
struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct blk_mq_ctx *ctx;
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct request *rq;
|
||||
struct blk_mq_alloc_data alloc_data;
|
||||
struct request *rq;
|
||||
int ret;
|
||||
|
||||
ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
ctx = blk_mq_get_ctx(q);
|
||||
hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
|
||||
rq = __blk_mq_alloc_request(&alloc_data, rw);
|
||||
blk_mq_put_ctx(ctx);
|
||||
rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
|
||||
|
||||
if (!rq) {
|
||||
blk_queue_exit(q);
|
||||
blk_mq_put_ctx(alloc_data.ctx);
|
||||
blk_queue_exit(q);
|
||||
|
||||
if (!rq)
|
||||
return ERR_PTR(-EWOULDBLOCK);
|
||||
}
|
||||
|
||||
rq->__data_len = 0;
|
||||
rq->__sector = (sector_t) -1;
|
||||
@ -322,10 +328,10 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
|
||||
|
||||
void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct request *rq)
|
||||
void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct request *rq)
|
||||
{
|
||||
const int tag = rq->tag;
|
||||
const int sched_tag = rq->internal_tag;
|
||||
struct request_queue *q = rq->q;
|
||||
|
||||
if (rq->rq_flags & RQF_MQ_INFLIGHT)
|
||||
@ -336,22 +342,30 @@ void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
|
||||
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
|
||||
clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
|
||||
blk_mq_put_tag(hctx, hctx->tags, ctx, tag);
|
||||
if (rq->tag != -1)
|
||||
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
|
||||
if (sched_tag != -1)
|
||||
blk_mq_sched_completed_request(hctx, rq);
|
||||
blk_queue_exit(q);
|
||||
}
|
||||
|
||||
static void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx,
|
||||
static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
|
||||
ctx->rq_completed[rq_is_sync(rq)]++;
|
||||
__blk_mq_free_request(hctx, ctx, rq);
|
||||
__blk_mq_finish_request(hctx, ctx, rq);
|
||||
}
|
||||
|
||||
void blk_mq_finish_request(struct request *rq)
|
||||
{
|
||||
blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
|
||||
}
|
||||
|
||||
void blk_mq_free_request(struct request *rq)
|
||||
{
|
||||
blk_mq_free_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
|
||||
blk_mq_sched_put_request(rq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_free_request);
|
||||
|
||||
@ -469,6 +483,8 @@ void blk_mq_start_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
|
||||
blk_mq_sched_started_request(rq);
|
||||
|
||||
trace_block_rq_issue(q, rq);
|
||||
|
||||
rq->resid_len = blk_rq_bytes(rq);
|
||||
@ -517,6 +533,7 @@ static void __blk_mq_requeue_request(struct request *rq)
|
||||
|
||||
trace_block_rq_requeue(q, rq);
|
||||
wbt_requeue(q->rq_wb, &rq->issue_stat);
|
||||
blk_mq_sched_requeue_request(rq);
|
||||
|
||||
if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
|
||||
if (q->dma_drain_size && blk_rq_bytes(rq))
|
||||
@ -551,13 +568,13 @@ static void blk_mq_requeue_work(struct work_struct *work)
|
||||
|
||||
rq->rq_flags &= ~RQF_SOFTBARRIER;
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_insert_request(rq, true, false, false);
|
||||
blk_mq_sched_insert_request(rq, true, false, false);
|
||||
}
|
||||
|
||||
while (!list_empty(&rq_list)) {
|
||||
rq = list_entry(rq_list.next, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_insert_request(rq, false, false, false);
|
||||
blk_mq_sched_insert_request(rq, false, false, false);
|
||||
}
|
||||
|
||||
blk_mq_run_hw_queues(q, false);
|
||||
@ -765,6 +782,12 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
|
||||
continue;
|
||||
|
||||
el_ret = blk_try_merge(rq, bio);
|
||||
if (el_ret == ELEVATOR_NO_MERGE)
|
||||
continue;
|
||||
|
||||
if (!blk_mq_sched_allow_merge(q, rq, bio))
|
||||
break;
|
||||
|
||||
if (el_ret == ELEVATOR_BACK_MERGE) {
|
||||
if (bio_attempt_back_merge(q, rq, bio)) {
|
||||
ctx->rq_merged++;
|
||||
@ -824,6 +847,59 @@ static inline unsigned int queued_to_index(unsigned int queued)
|
||||
return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
|
||||
}
|
||||
|
||||
static bool blk_mq_get_driver_tag(struct request *rq,
|
||||
struct blk_mq_hw_ctx **hctx, bool wait)
|
||||
{
|
||||
struct blk_mq_alloc_data data = {
|
||||
.q = rq->q,
|
||||
.ctx = rq->mq_ctx,
|
||||
.hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu),
|
||||
.flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
|
||||
};
|
||||
|
||||
if (blk_mq_hctx_stopped(data.hctx))
|
||||
return false;
|
||||
|
||||
if (rq->tag != -1) {
|
||||
done:
|
||||
if (hctx)
|
||||
*hctx = data.hctx;
|
||||
return true;
|
||||
}
|
||||
|
||||
rq->tag = blk_mq_get_tag(&data);
|
||||
if (rq->tag >= 0) {
|
||||
data.hctx->tags->rqs[rq->tag] = rq;
|
||||
goto done;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we fail getting a driver tag because all the driver tags are already
|
||||
* assigned and on the dispatch list, BUT the first entry does not have a
|
||||
* tag, then we could deadlock. For that case, move entries with assigned
|
||||
* driver tags to the front, leaving the set of tagged requests in the
|
||||
* same order, and the untagged set in the same order.
|
||||
*/
|
||||
static bool reorder_tags_to_front(struct list_head *list)
|
||||
{
|
||||
struct request *rq, *tmp, *first = NULL;
|
||||
|
||||
list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) {
|
||||
if (rq == first)
|
||||
break;
|
||||
if (rq->tag != -1) {
|
||||
list_move(&rq->queuelist, list);
|
||||
if (!first)
|
||||
first = rq;
|
||||
}
|
||||
}
|
||||
|
||||
return first != NULL;
|
||||
}
|
||||
|
||||
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
@ -846,6 +922,12 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
|
||||
struct blk_mq_queue_data bd;
|
||||
|
||||
rq = list_first_entry(list, struct request, queuelist);
|
||||
if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
|
||||
if (!queued && reorder_tags_to_front(list))
|
||||
continue;
|
||||
blk_mq_sched_mark_restart(hctx);
|
||||
break;
|
||||
}
|
||||
list_del_init(&rq->queuelist);
|
||||
|
||||
bd.rq = rq;
|
||||
@ -899,48 +981,17 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
|
||||
* the requests in rq_list might get lost.
|
||||
*
|
||||
* blk_mq_run_hw_queue() already checks the STOPPED bit
|
||||
**/
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
*
|
||||
* If RESTART is set, then let completion restart the queue
|
||||
* instead of potentially looping here.
|
||||
*/
|
||||
if (!blk_mq_sched_needs_restart(hctx))
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
}
|
||||
|
||||
return ret != BLK_MQ_RQ_QUEUE_BUSY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Run this hardware queue, pulling any software queues mapped to it in.
|
||||
* Note that this function currently has various problems around ordering
|
||||
* of IO. In particular, we'd like FIFO behaviour on handling existing
|
||||
* items on the hctx->dispatch list. Ignore that for now.
|
||||
*/
|
||||
static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
LIST_HEAD(rq_list);
|
||||
LIST_HEAD(driver_list);
|
||||
|
||||
if (unlikely(blk_mq_hctx_stopped(hctx)))
|
||||
return;
|
||||
|
||||
hctx->run++;
|
||||
|
||||
/*
|
||||
* Touch any software queue that has pending entries.
|
||||
*/
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
|
||||
/*
|
||||
* If we have previous entries on our dispatch list, grab them
|
||||
* and stuff them at the front for more fair dispatch.
|
||||
*/
|
||||
if (!list_empty_careful(&hctx->dispatch)) {
|
||||
spin_lock(&hctx->lock);
|
||||
if (!list_empty(&hctx->dispatch))
|
||||
list_splice_init(&hctx->dispatch, &rq_list);
|
||||
spin_unlock(&hctx->lock);
|
||||
}
|
||||
|
||||
blk_mq_dispatch_rq_list(hctx, &rq_list);
|
||||
}
|
||||
|
||||
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
int srcu_idx;
|
||||
@ -950,11 +1001,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||
|
||||
if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
|
||||
rcu_read_lock();
|
||||
blk_mq_process_rq_list(hctx);
|
||||
blk_mq_sched_dispatch_requests(hctx);
|
||||
rcu_read_unlock();
|
||||
} else {
|
||||
srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
|
||||
blk_mq_process_rq_list(hctx);
|
||||
blk_mq_sched_dispatch_requests(hctx);
|
||||
srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
|
||||
}
|
||||
}
|
||||
@ -1010,8 +1061,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
|
||||
int i;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if ((!blk_mq_hctx_has_pending(hctx) &&
|
||||
list_empty_careful(&hctx->dispatch)) ||
|
||||
if (!blk_mq_hctx_has_pending(hctx) ||
|
||||
blk_mq_hctx_stopped(hctx))
|
||||
continue;
|
||||
|
||||
@ -1148,32 +1198,10 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
blk_mq_hctx_mark_pending(hctx, ctx);
|
||||
}
|
||||
|
||||
void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
|
||||
bool async)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
spin_lock(&ctx->lock);
|
||||
__blk_mq_insert_request(hctx, rq, at_head);
|
||||
spin_unlock(&ctx->lock);
|
||||
|
||||
if (run_queue)
|
||||
blk_mq_run_hw_queue(hctx, async);
|
||||
}
|
||||
|
||||
static void blk_mq_insert_requests(struct request_queue *q,
|
||||
struct blk_mq_ctx *ctx,
|
||||
struct list_head *list,
|
||||
int depth,
|
||||
bool from_schedule)
|
||||
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct list_head *list)
|
||||
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
trace_block_unplug(q, depth, !from_schedule);
|
||||
|
||||
/*
|
||||
* preemption doesn't flush plug list, so it's possible ctx->cpu is
|
||||
* offline now
|
||||
@ -1189,8 +1217,6 @@ static void blk_mq_insert_requests(struct request_queue *q,
|
||||
}
|
||||
blk_mq_hctx_mark_pending(hctx, ctx);
|
||||
spin_unlock(&ctx->lock);
|
||||
|
||||
blk_mq_run_hw_queue(hctx, from_schedule);
|
||||
}
|
||||
|
||||
static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
|
||||
@ -1226,9 +1252,10 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
BUG_ON(!rq->q);
|
||||
if (rq->mq_ctx != this_ctx) {
|
||||
if (this_ctx) {
|
||||
blk_mq_insert_requests(this_q, this_ctx,
|
||||
&ctx_list, depth,
|
||||
from_schedule);
|
||||
trace_block_unplug(this_q, depth, from_schedule);
|
||||
blk_mq_sched_insert_requests(this_q, this_ctx,
|
||||
&ctx_list,
|
||||
from_schedule);
|
||||
}
|
||||
|
||||
this_ctx = rq->mq_ctx;
|
||||
@ -1245,8 +1272,9 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
* on 'ctx_list'. Do those.
|
||||
*/
|
||||
if (this_ctx) {
|
||||
blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth,
|
||||
from_schedule);
|
||||
trace_block_unplug(this_q, depth, from_schedule);
|
||||
blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list,
|
||||
from_schedule);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1284,51 +1312,39 @@ static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
|
||||
}
|
||||
|
||||
spin_unlock(&ctx->lock);
|
||||
__blk_mq_free_request(hctx, ctx, rq);
|
||||
__blk_mq_finish_request(hctx, ctx, rq);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static struct request *blk_mq_map_request(struct request_queue *q,
|
||||
struct bio *bio,
|
||||
struct blk_mq_alloc_data *data)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct blk_mq_ctx *ctx;
|
||||
struct request *rq;
|
||||
|
||||
blk_queue_enter_live(q);
|
||||
ctx = blk_mq_get_ctx(q);
|
||||
hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
trace_block_getrq(q, bio, bio->bi_opf);
|
||||
blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
|
||||
rq = __blk_mq_alloc_request(data, bio->bi_opf);
|
||||
|
||||
data->hctx->queued++;
|
||||
return rq;
|
||||
}
|
||||
|
||||
static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
|
||||
{
|
||||
return blk_tag_to_qc_t(rq->tag, hctx->queue_num, false);
|
||||
if (rq->tag != -1)
|
||||
return blk_tag_to_qc_t(rq->tag, hctx->queue_num, false);
|
||||
|
||||
return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
|
||||
}
|
||||
|
||||
static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
|
||||
{
|
||||
int ret;
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
|
||||
struct blk_mq_queue_data bd = {
|
||||
.rq = rq,
|
||||
.list = NULL,
|
||||
.last = 1
|
||||
};
|
||||
blk_qc_t new_cookie = request_to_qc_t(hctx, rq);
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
blk_qc_t new_cookie;
|
||||
int ret;
|
||||
|
||||
if (blk_mq_hctx_stopped(hctx))
|
||||
if (q->elevator)
|
||||
goto insert;
|
||||
|
||||
if (!blk_mq_get_driver_tag(rq, &hctx, false))
|
||||
goto insert;
|
||||
|
||||
new_cookie = request_to_qc_t(hctx, rq);
|
||||
|
||||
/*
|
||||
* For OK queue, we are done. For error, kill it. Any other
|
||||
* error (busy), just add it to our list as we previously
|
||||
@ -1350,7 +1366,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
|
||||
}
|
||||
|
||||
insert:
|
||||
blk_mq_insert_request(rq, false, true, true);
|
||||
blk_mq_sched_insert_request(rq, false, true, true);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1383,9 +1399,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
if (blk_mq_sched_bio_merge(q, bio))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
wb_acct = wbt_wait(q->rq_wb, bio, NULL);
|
||||
|
||||
rq = blk_mq_map_request(q, bio, &data);
|
||||
trace_block_getrq(q, bio, bio->bi_opf);
|
||||
|
||||
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
|
||||
if (unlikely(!rq)) {
|
||||
__wbt_done(q->rq_wb, wb_acct);
|
||||
return BLK_QC_T_NONE;
|
||||
@ -1397,6 +1418,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
|
||||
if (unlikely(is_flush_fua)) {
|
||||
blk_mq_bio_to_request(rq, bio);
|
||||
blk_mq_get_driver_tag(rq, NULL, true);
|
||||
blk_insert_flush(rq);
|
||||
goto run_queue;
|
||||
}
|
||||
@ -1447,6 +1469,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (q->elevator) {
|
||||
blk_mq_put_ctx(data.ctx);
|
||||
blk_mq_bio_to_request(rq, bio);
|
||||
blk_mq_sched_insert_request(rq, false, true, true);
|
||||
goto done;
|
||||
}
|
||||
if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
|
||||
/*
|
||||
* For a SYNC request, send it to the hardware immediately. For
|
||||
@ -1492,9 +1520,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
|
||||
} else
|
||||
request_count = blk_plug_queued_count(q);
|
||||
|
||||
if (blk_mq_sched_bio_merge(q, bio))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
wb_acct = wbt_wait(q->rq_wb, bio, NULL);
|
||||
|
||||
rq = blk_mq_map_request(q, bio, &data);
|
||||
trace_block_getrq(q, bio, bio->bi_opf);
|
||||
|
||||
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
|
||||
if (unlikely(!rq)) {
|
||||
__wbt_done(q->rq_wb, wb_acct);
|
||||
return BLK_QC_T_NONE;
|
||||
@ -1506,6 +1539,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
|
||||
|
||||
if (unlikely(is_flush_fua)) {
|
||||
blk_mq_bio_to_request(rq, bio);
|
||||
blk_mq_get_driver_tag(rq, NULL, true);
|
||||
blk_insert_flush(rq);
|
||||
goto run_queue;
|
||||
}
|
||||
@ -1544,6 +1578,12 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
|
||||
return cookie;
|
||||
}
|
||||
|
||||
if (q->elevator) {
|
||||
blk_mq_put_ctx(data.ctx);
|
||||
blk_mq_bio_to_request(rq, bio);
|
||||
blk_mq_sched_insert_request(rq, false, true, true);
|
||||
goto done;
|
||||
}
|
||||
if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
|
||||
/*
|
||||
* For a SYNC request, send it to the hardware immediately. For
|
||||
@ -1556,6 +1596,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
|
||||
}
|
||||
|
||||
blk_mq_put_ctx(data.ctx);
|
||||
done:
|
||||
return cookie;
|
||||
}
|
||||
|
||||
@ -1925,9 +1966,11 @@ static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
|
||||
static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
|
||||
blk_mq_free_rq_map(set->tags[hctx_idx]);
|
||||
set->tags[hctx_idx] = NULL;
|
||||
if (set->tags[hctx_idx]) {
|
||||
blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
|
||||
blk_mq_free_rq_map(set->tags[hctx_idx]);
|
||||
set->tags[hctx_idx] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_mq_map_swqueue(struct request_queue *q,
|
||||
@ -2084,6 +2127,8 @@ void blk_mq_release(struct request_queue *q)
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
unsigned int i;
|
||||
|
||||
blk_mq_sched_teardown(q);
|
||||
|
||||
/* hctx kobj stays in hctx */
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (!hctx)
|
||||
@ -2504,14 +2549,22 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i, ret;
|
||||
|
||||
if (!set || nr > set->queue_depth)
|
||||
if (!set)
|
||||
return -EINVAL;
|
||||
|
||||
ret = 0;
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (!hctx->tags)
|
||||
continue;
|
||||
ret = blk_mq_tag_update_depth(hctx->tags, nr);
|
||||
/*
|
||||
* If we're using an MQ scheduler, just update the scheduler
|
||||
* queue depth. This is similar to what the old code would do.
|
||||
*/
|
||||
if (!hctx->sched_tags)
|
||||
ret = blk_mq_tag_update_depth(hctx->tags,
|
||||
min(nr, set->queue_depth));
|
||||
else
|
||||
ret = blk_mq_tag_update_depth(hctx->sched_tags, nr);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@ -2704,7 +2757,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
|
||||
blk_flush_plug_list(plug, false);
|
||||
|
||||
hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
|
||||
rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
|
||||
if (!blk_qc_t_is_internal(cookie))
|
||||
rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
|
||||
else
|
||||
rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
|
||||
|
||||
return __blk_mq_poll(hctx, rq);
|
||||
}
|
||||
|
@ -52,6 +52,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
|
||||
*/
|
||||
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head);
|
||||
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct list_head *list);
|
||||
/*
|
||||
* CPU hotplug helpers
|
||||
*/
|
||||
@ -124,6 +126,9 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
|
||||
|
||||
static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
|
||||
{
|
||||
if (data->flags & BLK_MQ_REQ_INTERNAL)
|
||||
return data->hctx->sched_tags;
|
||||
|
||||
return data->hctx->tags;
|
||||
}
|
||||
|
||||
@ -132,8 +137,9 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
|
||||
*/
|
||||
void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
|
||||
struct request *rq, unsigned int op);
|
||||
void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct request *rq);
|
||||
void blk_mq_finish_request(struct request *rq);
|
||||
struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
|
||||
unsigned int op);
|
||||
|
||||
|
@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
|
||||
list_del_init(&rq->queuelist);
|
||||
rq->rq_flags &= ~RQF_QUEUED;
|
||||
rq->tag = -1;
|
||||
rq->internal_tag = -1;
|
||||
|
||||
if (unlikely(bqt->tag_index[tag] == NULL))
|
||||
printk(KERN_ERR "%s: tag %d is missing\n",
|
||||
|
202
block/elevator.c
202
block/elevator.c
@ -40,6 +40,7 @@
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
static DEFINE_SPINLOCK(elv_list_lock);
|
||||
static LIST_HEAD(elv_list);
|
||||
@ -58,7 +59,9 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
|
||||
struct request_queue *q = rq->q;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e->type->ops.sq.elevator_allow_bio_merge_fn)
|
||||
if (e->uses_mq && e->type->ops.mq.allow_merge)
|
||||
return e->type->ops.mq.allow_merge(q, rq, bio);
|
||||
else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
|
||||
return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
|
||||
|
||||
return 1;
|
||||
@ -163,6 +166,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
|
||||
kobject_init(&eq->kobj, &elv_ktype);
|
||||
mutex_init(&eq->sysfs_lock);
|
||||
hash_init(eq->hash);
|
||||
eq->uses_mq = e->uses_mq;
|
||||
|
||||
return eq;
|
||||
}
|
||||
@ -219,14 +223,26 @@ int elevator_init(struct request_queue *q, char *name)
|
||||
if (!e) {
|
||||
printk(KERN_ERR
|
||||
"Default I/O scheduler not found. " \
|
||||
"Using noop.\n");
|
||||
"Using noop/none.\n");
|
||||
if (q->mq_ops) {
|
||||
elevator_put(e);
|
||||
return 0;
|
||||
}
|
||||
e = elevator_get("noop", false);
|
||||
}
|
||||
}
|
||||
|
||||
err = e->ops.sq.elevator_init_fn(q, e);
|
||||
if (err)
|
||||
if (e->uses_mq) {
|
||||
err = blk_mq_sched_setup(q);
|
||||
if (!err)
|
||||
err = e->ops.mq.init_sched(q, e);
|
||||
} else
|
||||
err = e->ops.sq.elevator_init_fn(q, e);
|
||||
if (err) {
|
||||
if (e->uses_mq)
|
||||
blk_mq_sched_teardown(q);
|
||||
elevator_put(e);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL(elevator_init);
|
||||
@ -234,7 +250,9 @@ EXPORT_SYMBOL(elevator_init);
|
||||
void elevator_exit(struct elevator_queue *e)
|
||||
{
|
||||
mutex_lock(&e->sysfs_lock);
|
||||
if (e->type->ops.sq.elevator_exit_fn)
|
||||
if (e->uses_mq && e->type->ops.mq.exit_sched)
|
||||
e->type->ops.mq.exit_sched(e);
|
||||
else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
|
||||
e->type->ops.sq.elevator_exit_fn(e);
|
||||
mutex_unlock(&e->sysfs_lock);
|
||||
|
||||
@ -253,6 +271,7 @@ void elv_rqhash_del(struct request_queue *q, struct request *rq)
|
||||
if (ELV_ON_HASH(rq))
|
||||
__elv_rqhash_del(rq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(elv_rqhash_del);
|
||||
|
||||
void elv_rqhash_add(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
@ -262,6 +281,7 @@ void elv_rqhash_add(struct request_queue *q, struct request *rq)
|
||||
hash_add(e->hash, &rq->hash, rq_hash_key(rq));
|
||||
rq->rq_flags |= RQF_HASHED;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(elv_rqhash_add);
|
||||
|
||||
void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
@ -443,7 +463,9 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
|
||||
return ELEVATOR_BACK_MERGE;
|
||||
}
|
||||
|
||||
if (e->type->ops.sq.elevator_merge_fn)
|
||||
if (e->uses_mq && e->type->ops.mq.request_merge)
|
||||
return e->type->ops.mq.request_merge(q, req, bio);
|
||||
else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
|
||||
return e->type->ops.sq.elevator_merge_fn(q, req, bio);
|
||||
|
||||
return ELEVATOR_NO_MERGE;
|
||||
@ -456,8 +478,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
|
||||
*
|
||||
* Returns true if we merged, false otherwise
|
||||
*/
|
||||
static bool elv_attempt_insert_merge(struct request_queue *q,
|
||||
struct request *rq)
|
||||
bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct request *__rq;
|
||||
bool ret;
|
||||
@ -495,7 +516,9 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e->type->ops.sq.elevator_merged_fn)
|
||||
if (e->uses_mq && e->type->ops.mq.request_merged)
|
||||
e->type->ops.mq.request_merged(q, rq, type);
|
||||
else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
|
||||
e->type->ops.sq.elevator_merged_fn(q, rq, type);
|
||||
|
||||
if (type == ELEVATOR_BACK_MERGE)
|
||||
@ -508,10 +531,15 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
|
||||
struct request *next)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
const int next_sorted = next->rq_flags & RQF_SORTED;
|
||||
bool next_sorted = false;
|
||||
|
||||
if (next_sorted && e->type->ops.sq.elevator_merge_req_fn)
|
||||
e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
|
||||
if (e->uses_mq && e->type->ops.mq.requests_merged)
|
||||
e->type->ops.mq.requests_merged(q, rq, next);
|
||||
else if (e->type->ops.sq.elevator_merge_req_fn) {
|
||||
next_sorted = next->rq_flags & RQF_SORTED;
|
||||
if (next_sorted)
|
||||
e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
|
||||
}
|
||||
|
||||
elv_rqhash_reposition(q, rq);
|
||||
|
||||
@ -528,6 +556,9 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (WARN_ON_ONCE(e->uses_mq))
|
||||
return;
|
||||
|
||||
if (e->type->ops.sq.elevator_bio_merged_fn)
|
||||
e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
|
||||
}
|
||||
@ -574,11 +605,15 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
|
||||
|
||||
void elv_drain_elevator(struct request_queue *q)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
static int printed;
|
||||
|
||||
if (WARN_ON_ONCE(e->uses_mq))
|
||||
return;
|
||||
|
||||
lockdep_assert_held(q->queue_lock);
|
||||
|
||||
while (q->elevator->type->ops.sq.elevator_dispatch_fn(q, 1))
|
||||
while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
|
||||
;
|
||||
if (q->nr_sorted && printed++ < 10) {
|
||||
printk(KERN_ERR "%s: forced dispatching is broken "
|
||||
@ -682,8 +717,11 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e->type->ops.sq.elevator_latter_req_fn)
|
||||
if (e->uses_mq && e->type->ops.mq.next_request)
|
||||
return e->type->ops.mq.next_request(q, rq);
|
||||
else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
|
||||
return e->type->ops.sq.elevator_latter_req_fn(q, rq);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -691,7 +729,9 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e->type->ops.sq.elevator_former_req_fn)
|
||||
if (e->uses_mq && e->type->ops.mq.former_request)
|
||||
return e->type->ops.mq.former_request(q, rq);
|
||||
if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
|
||||
return e->type->ops.sq.elevator_former_req_fn(q, rq);
|
||||
return NULL;
|
||||
}
|
||||
@ -701,6 +741,9 @@ int elv_set_request(struct request_queue *q, struct request *rq,
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (WARN_ON_ONCE(e->uses_mq))
|
||||
return 0;
|
||||
|
||||
if (e->type->ops.sq.elevator_set_req_fn)
|
||||
return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
|
||||
return 0;
|
||||
@ -710,6 +753,9 @@ void elv_put_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (WARN_ON_ONCE(e->uses_mq))
|
||||
return;
|
||||
|
||||
if (e->type->ops.sq.elevator_put_req_fn)
|
||||
e->type->ops.sq.elevator_put_req_fn(rq);
|
||||
}
|
||||
@ -718,6 +764,9 @@ int elv_may_queue(struct request_queue *q, unsigned int op)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (WARN_ON_ONCE(e->uses_mq))
|
||||
return 0;
|
||||
|
||||
if (e->type->ops.sq.elevator_may_queue_fn)
|
||||
return e->type->ops.sq.elevator_may_queue_fn(q, op);
|
||||
|
||||
@ -728,6 +777,9 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (WARN_ON_ONCE(e->uses_mq))
|
||||
return;
|
||||
|
||||
/*
|
||||
* request is released from the driver, io must be done
|
||||
*/
|
||||
@ -803,7 +855,7 @@ int elv_register_queue(struct request_queue *q)
|
||||
}
|
||||
kobject_uevent(&e->kobj, KOBJ_ADD);
|
||||
e->registered = 1;
|
||||
if (e->type->ops.sq.elevator_registered_fn)
|
||||
if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
|
||||
e->type->ops.sq.elevator_registered_fn(q);
|
||||
}
|
||||
return error;
|
||||
@ -891,9 +943,14 @@ EXPORT_SYMBOL_GPL(elv_unregister);
|
||||
static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
|
||||
{
|
||||
struct elevator_queue *old = q->elevator;
|
||||
bool registered = old->registered;
|
||||
bool old_registered = false;
|
||||
int err;
|
||||
|
||||
if (q->mq_ops) {
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn on BYPASS and drain all requests w/ elevator private data.
|
||||
* Block layer doesn't call into a quiesced elevator - all requests
|
||||
@ -901,42 +958,76 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
|
||||
* using INSERT_BACK. All requests have SOFTBARRIER set and no
|
||||
* merge happens either.
|
||||
*/
|
||||
blk_queue_bypass_start(q);
|
||||
if (old) {
|
||||
old_registered = old->registered;
|
||||
|
||||
/* unregister and clear all auxiliary data of the old elevator */
|
||||
if (registered)
|
||||
elv_unregister_queue(q);
|
||||
if (old->uses_mq)
|
||||
blk_mq_sched_teardown(q);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
ioc_clear_queue(q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
if (!q->mq_ops)
|
||||
blk_queue_bypass_start(q);
|
||||
|
||||
/* unregister and clear all auxiliary data of the old elevator */
|
||||
if (old_registered)
|
||||
elv_unregister_queue(q);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
ioc_clear_queue(q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
/* allocate, init and register new elevator */
|
||||
err = new_e->ops.sq.elevator_init_fn(q, new_e);
|
||||
if (err)
|
||||
goto fail_init;
|
||||
if (new_e) {
|
||||
if (new_e->uses_mq) {
|
||||
err = blk_mq_sched_setup(q);
|
||||
if (!err)
|
||||
err = new_e->ops.mq.init_sched(q, new_e);
|
||||
} else
|
||||
err = new_e->ops.sq.elevator_init_fn(q, new_e);
|
||||
if (err)
|
||||
goto fail_init;
|
||||
|
||||
if (registered) {
|
||||
err = elv_register_queue(q);
|
||||
if (err)
|
||||
goto fail_register;
|
||||
}
|
||||
} else
|
||||
q->elevator = NULL;
|
||||
|
||||
/* done, kill the old one and finish */
|
||||
elevator_exit(old);
|
||||
blk_queue_bypass_end(q);
|
||||
if (old) {
|
||||
elevator_exit(old);
|
||||
if (!q->mq_ops)
|
||||
blk_queue_bypass_end(q);
|
||||
}
|
||||
|
||||
blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
|
||||
if (q->mq_ops) {
|
||||
blk_mq_unfreeze_queue(q);
|
||||
blk_mq_start_stopped_hw_queues(q, true);
|
||||
}
|
||||
|
||||
if (new_e)
|
||||
blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
|
||||
else
|
||||
blk_add_trace_msg(q, "elv switch: none");
|
||||
|
||||
return 0;
|
||||
|
||||
fail_register:
|
||||
if (q->mq_ops)
|
||||
blk_mq_sched_teardown(q);
|
||||
elevator_exit(q->elevator);
|
||||
fail_init:
|
||||
/* switch failed, restore and re-register old elevator */
|
||||
q->elevator = old;
|
||||
elv_register_queue(q);
|
||||
blk_queue_bypass_end(q);
|
||||
if (old) {
|
||||
q->elevator = old;
|
||||
elv_register_queue(q);
|
||||
if (!q->mq_ops)
|
||||
blk_queue_bypass_end(q);
|
||||
}
|
||||
if (q->mq_ops) {
|
||||
blk_mq_unfreeze_queue(q);
|
||||
blk_mq_start_stopped_hw_queues(q, true);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -949,8 +1040,11 @@ static int __elevator_change(struct request_queue *q, const char *name)
|
||||
char elevator_name[ELV_NAME_MAX];
|
||||
struct elevator_type *e;
|
||||
|
||||
if (!q->elevator)
|
||||
return -ENXIO;
|
||||
/*
|
||||
* Special case for mq, turn off scheduling
|
||||
*/
|
||||
if (q->mq_ops && !strncmp(name, "none", 4))
|
||||
return elevator_switch(q, NULL);
|
||||
|
||||
strlcpy(elevator_name, name, sizeof(elevator_name));
|
||||
e = elevator_get(strstrip(elevator_name), true);
|
||||
@ -959,11 +1053,21 @@ static int __elevator_change(struct request_queue *q, const char *name)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!strcmp(elevator_name, q->elevator->type->elevator_name)) {
|
||||
if (q->elevator &&
|
||||
!strcmp(elevator_name, q->elevator->type->elevator_name)) {
|
||||
elevator_put(e);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!e->uses_mq && q->mq_ops) {
|
||||
elevator_put(e);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (e->uses_mq && !q->mq_ops) {
|
||||
elevator_put(e);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return elevator_switch(q, e);
|
||||
}
|
||||
|
||||
@ -985,7 +1089,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!q->elevator)
|
||||
if (!(q->mq_ops || q->request_fn))
|
||||
return count;
|
||||
|
||||
ret = __elevator_change(q, name);
|
||||
@ -999,24 +1103,34 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
|
||||
ssize_t elv_iosched_show(struct request_queue *q, char *name)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
struct elevator_type *elv;
|
||||
struct elevator_type *elv = NULL;
|
||||
struct elevator_type *__e;
|
||||
int len = 0;
|
||||
|
||||
if (!q->elevator || !blk_queue_stackable(q))
|
||||
if (!blk_queue_stackable(q))
|
||||
return sprintf(name, "none\n");
|
||||
|
||||
elv = e->type;
|
||||
if (!q->elevator)
|
||||
len += sprintf(name+len, "[none] ");
|
||||
else
|
||||
elv = e->type;
|
||||
|
||||
spin_lock(&elv_list_lock);
|
||||
list_for_each_entry(__e, &elv_list, list) {
|
||||
if (!strcmp(elv->elevator_name, __e->elevator_name))
|
||||
if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) {
|
||||
len += sprintf(name+len, "[%s] ", elv->elevator_name);
|
||||
else
|
||||
continue;
|
||||
}
|
||||
if (__e->uses_mq && q->mq_ops)
|
||||
len += sprintf(name+len, "%s ", __e->elevator_name);
|
||||
else if (!__e->uses_mq && !q->mq_ops)
|
||||
len += sprintf(name+len, "%s ", __e->elevator_name);
|
||||
}
|
||||
spin_unlock(&elv_list_lock);
|
||||
|
||||
if (q->mq_ops && q->elevator)
|
||||
len += sprintf(name+len, "none");
|
||||
|
||||
len += sprintf(len+name, "\n");
|
||||
return len;
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ struct blk_mq_hw_ctx {
|
||||
|
||||
unsigned long flags; /* BLK_MQ_F_* flags */
|
||||
|
||||
void *sched_data;
|
||||
struct request_queue *queue;
|
||||
struct blk_flush_queue *fq;
|
||||
|
||||
@ -35,6 +36,7 @@ struct blk_mq_hw_ctx {
|
||||
atomic_t wait_index;
|
||||
|
||||
struct blk_mq_tags *tags;
|
||||
struct blk_mq_tags *sched_tags;
|
||||
|
||||
struct srcu_struct queue_rq_srcu;
|
||||
|
||||
@ -156,6 +158,7 @@ enum {
|
||||
|
||||
BLK_MQ_S_STOPPED = 0,
|
||||
BLK_MQ_S_TAG_ACTIVE = 1,
|
||||
BLK_MQ_S_SCHED_RESTART = 2,
|
||||
|
||||
BLK_MQ_MAX_DEPTH = 10240,
|
||||
|
||||
@ -179,13 +182,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
|
||||
|
||||
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
|
||||
|
||||
void blk_mq_insert_request(struct request *, bool, bool, bool);
|
||||
void blk_mq_free_request(struct request *rq);
|
||||
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
|
||||
|
||||
enum {
|
||||
BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */
|
||||
BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */
|
||||
BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
|
||||
};
|
||||
|
||||
struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
|
||||
|
@ -154,6 +154,7 @@ struct request {
|
||||
|
||||
/* the following two fields are internal, NEVER access directly */
|
||||
unsigned int __data_len; /* total data len */
|
||||
int tag;
|
||||
sector_t __sector; /* sector cursor */
|
||||
|
||||
struct bio *bio;
|
||||
@ -220,9 +221,10 @@ struct request {
|
||||
|
||||
unsigned short ioprio;
|
||||
|
||||
int internal_tag;
|
||||
|
||||
void *special; /* opaque pointer available for LLD use */
|
||||
|
||||
int tag;
|
||||
int errors;
|
||||
|
||||
/*
|
||||
|
@ -77,6 +77,34 @@ struct elevator_ops
|
||||
elevator_registered_fn *elevator_registered_fn;
|
||||
};
|
||||
|
||||
struct blk_mq_alloc_data;
|
||||
struct blk_mq_hw_ctx;
|
||||
|
||||
struct elevator_mq_ops {
|
||||
int (*init_sched)(struct request_queue *, struct elevator_type *);
|
||||
void (*exit_sched)(struct elevator_queue *);
|
||||
|
||||
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
|
||||
bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
|
||||
int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
|
||||
void (*request_merged)(struct request_queue *, struct request *, int);
|
||||
void (*requests_merged)(struct request_queue *, struct request *, struct request *);
|
||||
struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
|
||||
void (*put_request)(struct request *);
|
||||
void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
|
||||
void (*dispatch_requests)(struct blk_mq_hw_ctx *, struct list_head *);
|
||||
bool (*has_work)(struct blk_mq_hw_ctx *);
|
||||
void (*completed_request)(struct blk_mq_hw_ctx *, struct request *);
|
||||
void (*started_request)(struct request *);
|
||||
void (*requeue_request)(struct request *);
|
||||
struct request *(*former_request)(struct request_queue *, struct request *);
|
||||
struct request *(*next_request)(struct request_queue *, struct request *);
|
||||
int (*get_rq_priv)(struct request_queue *, struct request *);
|
||||
void (*put_rq_priv)(struct request_queue *, struct request *);
|
||||
void (*init_icq)(struct io_cq *);
|
||||
void (*exit_icq)(struct io_cq *);
|
||||
};
|
||||
|
||||
#define ELV_NAME_MAX (16)
|
||||
|
||||
struct elv_fs_entry {
|
||||
@ -96,12 +124,14 @@ struct elevator_type
|
||||
/* fields provided by elevator implementation */
|
||||
union {
|
||||
struct elevator_ops sq;
|
||||
struct elevator_mq_ops mq;
|
||||
} ops;
|
||||
size_t icq_size; /* see iocontext.h */
|
||||
size_t icq_align; /* ditto */
|
||||
struct elv_fs_entry *elevator_attrs;
|
||||
char elevator_name[ELV_NAME_MAX];
|
||||
struct module *elevator_owner;
|
||||
bool uses_mq;
|
||||
|
||||
/* managed by elevator core */
|
||||
char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */
|
||||
@ -125,6 +155,7 @@ struct elevator_queue
|
||||
struct kobject kobj;
|
||||
struct mutex sysfs_lock;
|
||||
unsigned int registered:1;
|
||||
unsigned int uses_mq:1;
|
||||
DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
|
||||
};
|
||||
|
||||
@ -141,6 +172,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *,
|
||||
extern void elv_merged_request(struct request_queue *, struct request *, int);
|
||||
extern void elv_bio_merged(struct request_queue *q, struct request *,
|
||||
struct bio *);
|
||||
extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
|
||||
extern void elv_requeue_request(struct request_queue *, struct request *);
|
||||
extern struct request *elv_former_request(struct request_queue *, struct request *);
|
||||
extern struct request *elv_latter_request(struct request_queue *, struct request *);
|
||||
|
Loading…
Reference in New Issue
Block a user