mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
block, bfq: make waker-queue detection more robust
In the presence of many parallel I/O flows, the detection of waker bfq_queues suffers from false positives. This commits addresses this issue by making the filtering of actual wakers more selective. In more detail, a candidate waker must be found to meet waker requirements three times before being promoted to actual waker. Tested-by: Jan Kara <jack@suse.cz> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
5a5436b98d
commit
71217df39d
@ -158,7 +158,6 @@ BFQ_BFQQ_FNS(in_large_burst);
|
||||
BFQ_BFQQ_FNS(coop);
|
||||
BFQ_BFQQ_FNS(split_coop);
|
||||
BFQ_BFQQ_FNS(softrt_update);
|
||||
BFQ_BFQQ_FNS(has_waker);
|
||||
#undef BFQ_BFQQ_FNS \
|
||||
|
||||
/* Expiration time of sync (0) and async (1) requests, in ns. */
|
||||
@ -1905,6 +1904,107 @@ static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect whether bfqq's I/O seems synchronized with that of some
|
||||
* other queue, i.e., whether bfqq, after remaining empty, happens to
|
||||
* receive new I/O only right after some I/O request of the other
|
||||
* queue has been completed. We call waker queue the other queue, and
|
||||
* we assume, for simplicity, that bfqq may have at most one waker
|
||||
* queue.
|
||||
*
|
||||
* A remarkable throughput boost can be reached by unconditionally
|
||||
* injecting the I/O of the waker queue, every time a new
|
||||
* bfq_dispatch_request happens to be invoked while I/O is being
|
||||
* plugged for bfqq. In addition to boosting throughput, this
|
||||
* unblocks bfqq's I/O, thereby improving bandwidth and latency for
|
||||
* bfqq. Note that these same results may be achieved with the general
|
||||
* injection mechanism, but less effectively. For details on this
|
||||
* aspect, see the comments on the choice of the queue for injection
|
||||
* in bfq_select_queue().
|
||||
*
|
||||
* Turning back to the detection of a waker queue, a queue Q is deemed
|
||||
* as a waker queue for bfqq if, for three consecutive times, bfqq
|
||||
* happens to become non empty right after a request of Q has been
|
||||
* completed. In particular, on the first time, Q is tentatively set
|
||||
* as a candidate waker queue, while on the third consecutive time
|
||||
* that Q is detected, the field waker_bfqq is set to Q, to confirm
|
||||
* that Q is a waker queue for bfqq. These detection steps are
|
||||
* performed only if bfqq has a long think time, so as to make it more
|
||||
* likely that bfqq's I/O is actually being blocked by a
|
||||
* synchronization. This last filter, plus the above three-times
|
||||
* requirement, make false positives less likely.
|
||||
*
|
||||
* NOTE
|
||||
*
|
||||
* The sooner a waker queue is detected, the sooner throughput can be
|
||||
* boosted by injecting I/O from the waker queue. Fortunately,
|
||||
* detection is likely to be actually fast, for the following
|
||||
* reasons. While blocked by synchronization, bfqq has a long think
|
||||
* time. This implies that bfqq's inject limit is at least equal to 1
|
||||
* (see the comments in bfq_update_inject_limit()). So, thanks to
|
||||
* injection, the waker queue is likely to be served during the very
|
||||
* first I/O-plugging time interval for bfqq. This triggers the first
|
||||
* step of the detection mechanism. Thanks again to injection, the
|
||||
* candidate waker queue is then likely to be confirmed no later than
|
||||
* during the next I/O-plugging interval for bfqq.
|
||||
*
|
||||
* ISSUE
|
||||
*
|
||||
* On queue merging all waker information is lost.
|
||||
*/
|
||||
void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, u64 now_ns)
|
||||
{
|
||||
if (!bfqd->last_completed_rq_bfqq ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq ||
|
||||
bfq_bfqq_has_short_ttime(bfqq) ||
|
||||
now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
|
||||
return;
|
||||
|
||||
if (bfqd->last_completed_rq_bfqq !=
|
||||
bfqq->tentative_waker_bfqq) {
|
||||
/*
|
||||
* First synchronization detected with a
|
||||
* candidate waker queue, or with a different
|
||||
* candidate waker queue from the current one.
|
||||
*/
|
||||
bfqq->tentative_waker_bfqq =
|
||||
bfqd->last_completed_rq_bfqq;
|
||||
bfqq->num_waker_detections = 1;
|
||||
} else /* Same tentative waker queue detected again */
|
||||
bfqq->num_waker_detections++;
|
||||
|
||||
if (bfqq->num_waker_detections == 3) {
|
||||
bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
|
||||
bfqq->tentative_waker_bfqq = NULL;
|
||||
|
||||
/*
|
||||
* If the waker queue disappears, then
|
||||
* bfqq->waker_bfqq must be reset. To
|
||||
* this goal, we maintain in each
|
||||
* waker queue a list, woken_list, of
|
||||
* all the queues that reference the
|
||||
* waker queue through their
|
||||
* waker_bfqq pointer. When the waker
|
||||
* queue exits, the waker_bfqq pointer
|
||||
* of all the queues in the woken_list
|
||||
* is reset.
|
||||
*
|
||||
* In addition, if bfqq is already in
|
||||
* the woken_list of a waker queue,
|
||||
* then, before being inserted into
|
||||
* the woken_list of a new waker
|
||||
* queue, bfqq must be removed from
|
||||
* the woken_list of the old waker
|
||||
* queue.
|
||||
*/
|
||||
if (!hlist_unhashed(&bfqq->woken_list_node))
|
||||
hlist_del_init(&bfqq->woken_list_node);
|
||||
hlist_add_head(&bfqq->woken_list_node,
|
||||
&bfqd->last_completed_rq_bfqq->woken_list);
|
||||
}
|
||||
}
|
||||
|
||||
static void bfq_add_request(struct request *rq)
|
||||
{
|
||||
struct bfq_queue *bfqq = RQ_BFQQ(rq);
|
||||
@ -1919,111 +2019,7 @@ static void bfq_add_request(struct request *rq)
|
||||
bfqd->queued++;
|
||||
|
||||
if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
|
||||
/*
|
||||
* Detect whether bfqq's I/O seems synchronized with
|
||||
* that of some other queue, i.e., whether bfqq, after
|
||||
* remaining empty, happens to receive new I/O only
|
||||
* right after some I/O request of the other queue has
|
||||
* been completed. We call waker queue the other
|
||||
* queue, and we assume, for simplicity, that bfqq may
|
||||
* have at most one waker queue.
|
||||
*
|
||||
* A remarkable throughput boost can be reached by
|
||||
* unconditionally injecting the I/O of the waker
|
||||
* queue, every time a new bfq_dispatch_request
|
||||
* happens to be invoked while I/O is being plugged
|
||||
* for bfqq. In addition to boosting throughput, this
|
||||
* unblocks bfqq's I/O, thereby improving bandwidth
|
||||
* and latency for bfqq. Note that these same results
|
||||
* may be achieved with the general injection
|
||||
* mechanism, but less effectively. For details on
|
||||
* this aspect, see the comments on the choice of the
|
||||
* queue for injection in bfq_select_queue().
|
||||
*
|
||||
* Turning back to the detection of a waker queue, a
|
||||
* queue Q is deemed as a waker queue for bfqq if, for
|
||||
* two consecutive times, bfqq happens to become non
|
||||
* empty right after a request of Q has been
|
||||
* completed. In particular, on the first time, Q is
|
||||
* tentatively set as a candidate waker queue, while
|
||||
* on the second time, the flag
|
||||
* bfq_bfqq_has_waker(bfqq) is set to confirm that Q
|
||||
* is a waker queue for bfqq. These detection steps
|
||||
* are performed only if bfqq has a long think time,
|
||||
* so as to make it more likely that bfqq's I/O is
|
||||
* actually being blocked by a synchronization. This
|
||||
* last filter, plus the above two-times requirement,
|
||||
* make false positives less likely.
|
||||
*
|
||||
* NOTE
|
||||
*
|
||||
* The sooner a waker queue is detected, the sooner
|
||||
* throughput can be boosted by injecting I/O from the
|
||||
* waker queue. Fortunately, detection is likely to be
|
||||
* actually fast, for the following reasons. While
|
||||
* blocked by synchronization, bfqq has a long think
|
||||
* time. This implies that bfqq's inject limit is at
|
||||
* least equal to 1 (see the comments in
|
||||
* bfq_update_inject_limit()). So, thanks to
|
||||
* injection, the waker queue is likely to be served
|
||||
* during the very first I/O-plugging time interval
|
||||
* for bfqq. This triggers the first step of the
|
||||
* detection mechanism. Thanks again to injection, the
|
||||
* candidate waker queue is then likely to be
|
||||
* confirmed no later than during the next
|
||||
* I/O-plugging interval for bfqq.
|
||||
*/
|
||||
if (bfqd->last_completed_rq_bfqq &&
|
||||
!bfq_bfqq_has_short_ttime(bfqq) &&
|
||||
now_ns - bfqd->last_completion <
|
||||
4 * NSEC_PER_MSEC) {
|
||||
if (bfqd->last_completed_rq_bfqq != bfqq &&
|
||||
bfqd->last_completed_rq_bfqq !=
|
||||
bfqq->waker_bfqq) {
|
||||
/*
|
||||
* First synchronization detected with
|
||||
* a candidate waker queue, or with a
|
||||
* different candidate waker queue
|
||||
* from the current one.
|
||||
*/
|
||||
bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
|
||||
|
||||
/*
|
||||
* If the waker queue disappears, then
|
||||
* bfqq->waker_bfqq must be reset. To
|
||||
* this goal, we maintain in each
|
||||
* waker queue a list, woken_list, of
|
||||
* all the queues that reference the
|
||||
* waker queue through their
|
||||
* waker_bfqq pointer. When the waker
|
||||
* queue exits, the waker_bfqq pointer
|
||||
* of all the queues in the woken_list
|
||||
* is reset.
|
||||
*
|
||||
* In addition, if bfqq is already in
|
||||
* the woken_list of a waker queue,
|
||||
* then, before being inserted into
|
||||
* the woken_list of a new waker
|
||||
* queue, bfqq must be removed from
|
||||
* the woken_list of the old waker
|
||||
* queue.
|
||||
*/
|
||||
if (!hlist_unhashed(&bfqq->woken_list_node))
|
||||
hlist_del_init(&bfqq->woken_list_node);
|
||||
hlist_add_head(&bfqq->woken_list_node,
|
||||
&bfqd->last_completed_rq_bfqq->woken_list);
|
||||
|
||||
bfq_clear_bfqq_has_waker(bfqq);
|
||||
} else if (bfqd->last_completed_rq_bfqq ==
|
||||
bfqq->waker_bfqq &&
|
||||
!bfq_bfqq_has_waker(bfqq)) {
|
||||
/*
|
||||
* synchronization with waker_bfqq
|
||||
* seen for the second time
|
||||
*/
|
||||
bfq_mark_bfqq_has_waker(bfqq);
|
||||
}
|
||||
}
|
||||
bfq_check_waker(bfqd, bfqq, now_ns);
|
||||
|
||||
/*
|
||||
* Periodically reset inject limit, to make sure that
|
||||
@ -4569,7 +4565,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
|
||||
bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <=
|
||||
bfq_bfqq_budget_left(async_bfqq))
|
||||
bfqq = bfqq->bic->bfqq[0];
|
||||
else if (bfq_bfqq_has_waker(bfqq) &&
|
||||
else if (bfqq->waker_bfqq &&
|
||||
bfq_bfqq_busy(bfqq->waker_bfqq) &&
|
||||
bfqq->waker_bfqq->next_rq &&
|
||||
bfq_serv_to_charge(bfqq->waker_bfqq->next_rq,
|
||||
@ -4973,7 +4969,6 @@ void bfq_put_queue(struct bfq_queue *bfqq)
|
||||
hlist_for_each_entry_safe(item, n, &bfqq->woken_list,
|
||||
woken_list_node) {
|
||||
item->waker_bfqq = NULL;
|
||||
bfq_clear_bfqq_has_waker(item);
|
||||
hlist_del_init(&item->woken_list_node);
|
||||
}
|
||||
|
||||
|
@ -376,6 +376,11 @@ struct bfq_queue {
|
||||
* bfq_select_queue().
|
||||
*/
|
||||
struct bfq_queue *waker_bfqq;
|
||||
/* pointer to the curr. tentative waker queue, see bfq_check_waker() */
|
||||
struct bfq_queue *tentative_waker_bfqq;
|
||||
/* number of times the same tentative waker has been detected */
|
||||
unsigned int num_waker_detections;
|
||||
|
||||
/* node for woken_list, see below */
|
||||
struct hlist_node woken_list_node;
|
||||
/*
|
||||
@ -776,7 +781,6 @@ enum bfqq_state_flags {
|
||||
*/
|
||||
BFQQF_coop, /* bfqq is shared */
|
||||
BFQQF_split_coop, /* shared bfqq will be split */
|
||||
BFQQF_has_waker /* bfqq has a waker queue */
|
||||
};
|
||||
|
||||
#define BFQ_BFQQ_FNS(name) \
|
||||
@ -796,7 +800,6 @@ BFQ_BFQQ_FNS(in_large_burst);
|
||||
BFQ_BFQQ_FNS(coop);
|
||||
BFQ_BFQQ_FNS(split_coop);
|
||||
BFQ_BFQQ_FNS(softrt_update);
|
||||
BFQ_BFQQ_FNS(has_waker);
|
||||
#undef BFQ_BFQQ_FNS
|
||||
|
||||
/* Expiration reasons. */
|
||||
|
Loading…
Reference in New Issue
Block a user