mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 10:43:43 +00:00
for-5.12/block-2021-02-17
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmAtmIwQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgplzLEAC5O+3rBM8QuiJdo39Yppmuw4hDJ6hOKynP EJQLKQQi0VfXgU+MprGvcbpFYmNbgICvUICQkEzJuk++kPCu/BJtJz0yErQeLgS+ RdXiPV6enbF7iRML5TVRTr1q/z7sJMXcIIJ8Pz/rU/JNfGYExVd0WfnEY9mp1jOt Bl9V+qyTazdP+Ma4+uEPatSayqcdi1rxB5I+7v/sLiOvKZZWkaRZjUZ/mxAjUfvK dBOOPjMygEo3tCLkIyyA6lpLvr1r+SUZhLuebRLEKa3To3TW6RtoG0qwpKmI2iKw ylLeVLB60nM9RUxjflVOfBsHxz1bDg5Ve86y5nCjQd4Jo8x1c4DnecyGE5/Tu8Rg rgbsfD6nFWzhDCvcZT0XrfQ4ZAjIL2IfT+ypQiQ6UlRd3hvIKRmzWMkjuH2svr0u ey9Kq+lYerI4cM0F3W73gzUKdIQOuCzBCYxQuSQQomscBa7FCInyU192dAI9Aj6l Yd06mgKu6qCx6zLv6JfpBqaBHZMwyGE4dmZgPQFuuwO+b4N+Ck3Jm5fzEzw/xIxQ wdo/DlsAl60BXentB6FByGBJaCjVdSymRqN/xNCAbFKCjmr6TLBuXPfg1gYYO7xC VOcVjWe8iN3wWHZab3t2mxMKH9B9B/KKzIhu6TNHSmgtQ5paZPRCBx995pDyRw26 WC22RGC2MA== =os1E -----END PGP SIGNATURE----- Merge tag 'for-5.12/block-2021-02-17' of git://git.kernel.dk/linux-block Pull core block updates from Jens Axboe: "Another nice round of removing more code than what is added, mostly due to Christoph's relentless pursuit of tech debt removal/cleanups. This pull request contains: - Two series of BFQ improvements (Paolo, Jan, Jia) - Block iov_iter improvements (Pavel) - bsg error path fix (Pan) - blk-mq scheduler improvements (Jan) - -EBUSY discard fix (Jan) - bvec allocation improvements (Ming, Christoph) - bio allocation and init improvements (Christoph) - Store bdev pointer in bio instead of gendisk + partno (Christoph) - Block trace point cleanups (Christoph) - hard read-only vs read-only split (Christoph) - Block based swap cleanups (Christoph) - Zoned write granularity support (Damien) - Various fixes/tweaks (Chunguang, Guoqing, Lei, Lukas, Huhai)" * tag 'for-5.12/block-2021-02-17' of git://git.kernel.dk/linux-block: (104 commits) mm: simplify swapdev_block sd_zbc: clear zone resources for non-zoned case block: introduce blk_queue_clear_zone_settings() zonefs: use zone write granularity as block size block: introduce zone_write_granularity limit block: use blk_queue_set_zoned in add_partition() nullb: use blk_queue_set_zoned() to setup zoned devices nvme: cleanup zone information initialization block: document zone_append_max_bytes attribute block: use bi_max_vecs to find the bvec pool md/raid10: remove dead code in reshape_request block: mark the bio as cloned in bio_iov_bvec_set block: set BIO_NO_PAGE_REF in bio_iov_bvec_set block: remove a layer of indentation in bio_iov_iter_get_pages block: turn the nr_iovecs argument to bio_alloc* into an unsigned short block: remove the 1 and 4 vec bvec_slabs entries block: streamline bvec_alloc block: factor out a bvec_alloc_gfp helper block: move struct biovec_slab to bio.c block: reuse BIO_INLINE_VECS for integrity bvecs ...
This commit is contained in:
commit
582cd91f69
@ -40,6 +40,8 @@ normal code doesn't have to deal with bi_bvec_done.
|
||||
There is a lower level advance function - bvec_iter_advance() - which takes
|
||||
a pointer to a biovec, not a bio; this is used by the bio integrity code.
|
||||
|
||||
As of 5.12 bvec segments with zero bv_len are not supported.
|
||||
|
||||
What's all this get us?
|
||||
=======================
|
||||
|
||||
|
@ -261,6 +261,12 @@ For block drivers that support REQ_OP_WRITE_ZEROES, the maximum number of
|
||||
bytes that can be zeroed at once. The value 0 means that REQ_OP_WRITE_ZEROES
|
||||
is not supported.
|
||||
|
||||
zone_append_max_bytes (RO)
|
||||
--------------------------
|
||||
This is the maximum number of bytes that can be written to a sequential
|
||||
zone of a zoned block device using a zone append write operation
|
||||
(REQ_OP_ZONE_APPEND). This value is always 0 for regular block devices.
|
||||
|
||||
zoned (RO)
|
||||
----------
|
||||
This indicates if the device is a zoned block device and the zone model of the
|
||||
@ -273,4 +279,11 @@ devices are described in the ZBC (Zoned Block Commands) and ZAC
|
||||
do not support zone commands, they will be treated as regular block devices
|
||||
and zoned will report "none".
|
||||
|
||||
zone_write_granularity (RO)
|
||||
---------------------------
|
||||
This indicates the alignment constraint, in bytes, for write operations in
|
||||
sequential zones of zoned block devices (devices with a zoned attributed
|
||||
that reports "host-managed" or "host-aware"). This value is always 0 for
|
||||
regular block devices.
|
||||
|
||||
Jens Axboe <jens.axboe@oracle.com>, February 2009
|
||||
|
@ -179,7 +179,6 @@ fault_type=%d Support configuring fault injection type, should be
|
||||
FAULT_KVMALLOC 0x000000002
|
||||
FAULT_PAGE_ALLOC 0x000000004
|
||||
FAULT_PAGE_GET 0x000000008
|
||||
FAULT_ALLOC_BIO 0x000000010
|
||||
FAULT_ALLOC_NID 0x000000020
|
||||
FAULT_ORPHAN 0x000000040
|
||||
FAULT_BLOCK 0x000000080
|
||||
|
@ -865,3 +865,19 @@ no matter what. Everything is handled by the caller.
|
||||
|
||||
clone_private_mount() returns a longterm mount now, so the proper destructor of
|
||||
its result is kern_unmount() or kern_unmount_array().
|
||||
|
||||
---
|
||||
|
||||
**mandatory**
|
||||
|
||||
zero-length bvec segments are disallowed, they must be filtered out before
|
||||
passed on to an iterator.
|
||||
|
||||
---
|
||||
|
||||
**mandatory**
|
||||
|
||||
For bvec based itererators bio_iov_iter_get_pages() now doesn't copy bvecs but
|
||||
uses the one provided. Anyone issuing kiocb-I/O should ensure that the bvec and
|
||||
page references stay until I/O has completed, i.e. until ->ki_complete() has
|
||||
been called or returned with non -EIOCBQUEUED code.
|
||||
|
@ -61,7 +61,7 @@ struct nfhd_device {
|
||||
|
||||
static blk_qc_t nfhd_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct nfhd_device *dev = bio->bi_disk->private_data;
|
||||
struct nfhd_device *dev = bio->bi_bdev->bd_disk->private_data;
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
int dir, len, shift;
|
||||
|
@ -103,7 +103,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector,
|
||||
|
||||
static blk_qc_t simdisk_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct simdisk *dev = bio->bi_disk->private_data;
|
||||
struct simdisk *dev = bio->bi_bdev->bd_disk->private_data;
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
|
@ -158,7 +158,6 @@ BFQ_BFQQ_FNS(in_large_burst);
|
||||
BFQ_BFQQ_FNS(coop);
|
||||
BFQ_BFQQ_FNS(split_coop);
|
||||
BFQ_BFQQ_FNS(softrt_update);
|
||||
BFQ_BFQQ_FNS(has_waker);
|
||||
#undef BFQ_BFQQ_FNS \
|
||||
|
||||
/* Expiration time of sync (0) and async (1) requests, in ns. */
|
||||
@ -1024,9 +1023,16 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
|
||||
else
|
||||
bfq_clear_bfqq_IO_bound(bfqq);
|
||||
|
||||
bfqq->last_serv_time_ns = bic->saved_last_serv_time_ns;
|
||||
bfqq->inject_limit = bic->saved_inject_limit;
|
||||
bfqq->decrease_time_jif = bic->saved_decrease_time_jif;
|
||||
|
||||
bfqq->entity.new_weight = bic->saved_weight;
|
||||
bfqq->ttime = bic->saved_ttime;
|
||||
bfqq->io_start_time = bic->saved_io_start_time;
|
||||
bfqq->tot_idle_time = bic->saved_tot_idle_time;
|
||||
bfqq->wr_coeff = bic->saved_wr_coeff;
|
||||
bfqq->service_from_wr = bic->saved_service_from_wr;
|
||||
bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
|
||||
bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
|
||||
bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
|
||||
@ -1647,6 +1653,8 @@ static bool bfq_bfqq_higher_class_or_weight(struct bfq_queue *bfqq,
|
||||
return bfqq_weight > in_serv_weight;
|
||||
}
|
||||
|
||||
static bool bfq_better_to_idle(struct bfq_queue *bfqq);
|
||||
|
||||
static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq,
|
||||
int old_wr_coeff,
|
||||
@ -1671,15 +1679,19 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
|
||||
* - it is sync,
|
||||
* - it does not belong to a large burst,
|
||||
* - it has been idle for enough time or is soft real-time,
|
||||
* - is linked to a bfq_io_cq (it is not shared in any sense).
|
||||
* - is linked to a bfq_io_cq (it is not shared in any sense),
|
||||
* - has a default weight (otherwise we assume the user wanted
|
||||
* to control its weight explicitly)
|
||||
*/
|
||||
in_burst = bfq_bfqq_in_large_burst(bfqq);
|
||||
soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
|
||||
!BFQQ_TOTALLY_SEEKY(bfqq) &&
|
||||
!in_burst &&
|
||||
time_is_before_jiffies(bfqq->soft_rt_next_start) &&
|
||||
bfqq->dispatched == 0;
|
||||
*interactive = !in_burst && idle_for_long_time;
|
||||
bfqq->dispatched == 0 &&
|
||||
bfqq->entity.new_weight == 40;
|
||||
*interactive = !in_burst && idle_for_long_time &&
|
||||
bfqq->entity.new_weight == 40;
|
||||
wr_or_deserves_wr = bfqd->low_latency &&
|
||||
(bfqq->wr_coeff > 1 ||
|
||||
(bfq_bfqq_sync(bfqq) &&
|
||||
@ -1717,17 +1729,6 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
|
||||
|
||||
bfq_clear_bfqq_just_created(bfqq);
|
||||
|
||||
|
||||
if (!bfq_bfqq_IO_bound(bfqq)) {
|
||||
if (arrived_in_time) {
|
||||
bfqq->requests_within_timer++;
|
||||
if (bfqq->requests_within_timer >=
|
||||
bfqd->bfq_requests_within_timer)
|
||||
bfq_mark_bfqq_IO_bound(bfqq);
|
||||
} else
|
||||
bfqq->requests_within_timer = 0;
|
||||
}
|
||||
|
||||
if (bfqd->low_latency) {
|
||||
if (unlikely(time_is_after_jiffies(bfqq->split_time)))
|
||||
/* wraparound */
|
||||
@ -1755,10 +1756,10 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
|
||||
bfq_add_bfqq_busy(bfqd, bfqq);
|
||||
|
||||
/*
|
||||
* Expire in-service queue only if preemption may be needed
|
||||
* for guarantees. In particular, we care only about two
|
||||
* cases. The first is that bfqq has to recover a service
|
||||
* hole, as explained in the comments on
|
||||
* Expire in-service queue if preemption may be needed for
|
||||
* guarantees or throughput. As for guarantees, we care
|
||||
* explicitly about two cases. The first is that bfqq has to
|
||||
* recover a service hole, as explained in the comments on
|
||||
* bfq_bfqq_update_budg_for_activation(), i.e., that
|
||||
* bfqq_wants_to_preempt is true. However, if bfqq does not
|
||||
* carry time-critical I/O, then bfqq's bandwidth is less
|
||||
@ -1785,11 +1786,23 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
|
||||
* timestamps of the in-service queue would need to be
|
||||
* updated, and this operation is quite costly (see the
|
||||
* comments on bfq_bfqq_update_budg_for_activation()).
|
||||
*
|
||||
* As for throughput, we ask bfq_better_to_idle() whether we
|
||||
* still need to plug I/O dispatching. If bfq_better_to_idle()
|
||||
* says no, then plugging is not needed any longer, either to
|
||||
* boost throughput or to perserve service guarantees. Then
|
||||
* the best option is to stop plugging I/O, as not doing so
|
||||
* would certainly lower throughput. We may end up in this
|
||||
* case if: (1) upon a dispatch attempt, we detected that it
|
||||
* was better to plug I/O dispatch, and to wait for a new
|
||||
* request to arrive for the currently in-service queue, but
|
||||
* (2) this switch of bfqq to busy changes the scenario.
|
||||
*/
|
||||
if (bfqd->in_service_queue &&
|
||||
((bfqq_wants_to_preempt &&
|
||||
bfqq->wr_coeff >= bfqd->in_service_queue->wr_coeff) ||
|
||||
bfq_bfqq_higher_class_or_weight(bfqq, bfqd->in_service_queue)) &&
|
||||
bfq_bfqq_higher_class_or_weight(bfqq, bfqd->in_service_queue) ||
|
||||
!bfq_better_to_idle(bfqd->in_service_queue)) &&
|
||||
next_queue_may_preempt(bfqd))
|
||||
bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
|
||||
false, BFQQE_PREEMPTED);
|
||||
@ -1861,6 +1874,138 @@ static void bfq_reset_inject_limit(struct bfq_data *bfqd,
|
||||
bfqq->decrease_time_jif = jiffies;
|
||||
}
|
||||
|
||||
static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
|
||||
{
|
||||
u64 tot_io_time = now_ns - bfqq->io_start_time;
|
||||
|
||||
if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfqq->dispatched == 0)
|
||||
bfqq->tot_idle_time +=
|
||||
now_ns - bfqq->ttime.last_end_request;
|
||||
|
||||
if (unlikely(bfq_bfqq_just_created(bfqq)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Must be busy for at least about 80% of the time to be
|
||||
* considered I/O bound.
|
||||
*/
|
||||
if (bfqq->tot_idle_time * 5 > tot_io_time)
|
||||
bfq_clear_bfqq_IO_bound(bfqq);
|
||||
else
|
||||
bfq_mark_bfqq_IO_bound(bfqq);
|
||||
|
||||
/*
|
||||
* Keep an observation window of at most 200 ms in the past
|
||||
* from now.
|
||||
*/
|
||||
if (tot_io_time > 200 * NSEC_PER_MSEC) {
|
||||
bfqq->io_start_time = now_ns - (tot_io_time>>1);
|
||||
bfqq->tot_idle_time >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect whether bfqq's I/O seems synchronized with that of some
|
||||
* other queue, i.e., whether bfqq, after remaining empty, happens to
|
||||
* receive new I/O only right after some I/O request of the other
|
||||
* queue has been completed. We call waker queue the other queue, and
|
||||
* we assume, for simplicity, that bfqq may have at most one waker
|
||||
* queue.
|
||||
*
|
||||
* A remarkable throughput boost can be reached by unconditionally
|
||||
* injecting the I/O of the waker queue, every time a new
|
||||
* bfq_dispatch_request happens to be invoked while I/O is being
|
||||
* plugged for bfqq. In addition to boosting throughput, this
|
||||
* unblocks bfqq's I/O, thereby improving bandwidth and latency for
|
||||
* bfqq. Note that these same results may be achieved with the general
|
||||
* injection mechanism, but less effectively. For details on this
|
||||
* aspect, see the comments on the choice of the queue for injection
|
||||
* in bfq_select_queue().
|
||||
*
|
||||
* Turning back to the detection of a waker queue, a queue Q is deemed
|
||||
* as a waker queue for bfqq if, for three consecutive times, bfqq
|
||||
* happens to become non empty right after a request of Q has been
|
||||
* completed. In particular, on the first time, Q is tentatively set
|
||||
* as a candidate waker queue, while on the third consecutive time
|
||||
* that Q is detected, the field waker_bfqq is set to Q, to confirm
|
||||
* that Q is a waker queue for bfqq. These detection steps are
|
||||
* performed only if bfqq has a long think time, so as to make it more
|
||||
* likely that bfqq's I/O is actually being blocked by a
|
||||
* synchronization. This last filter, plus the above three-times
|
||||
* requirement, make false positives less likely.
|
||||
*
|
||||
* NOTE
|
||||
*
|
||||
* The sooner a waker queue is detected, the sooner throughput can be
|
||||
* boosted by injecting I/O from the waker queue. Fortunately,
|
||||
* detection is likely to be actually fast, for the following
|
||||
* reasons. While blocked by synchronization, bfqq has a long think
|
||||
* time. This implies that bfqq's inject limit is at least equal to 1
|
||||
* (see the comments in bfq_update_inject_limit()). So, thanks to
|
||||
* injection, the waker queue is likely to be served during the very
|
||||
* first I/O-plugging time interval for bfqq. This triggers the first
|
||||
* step of the detection mechanism. Thanks again to injection, the
|
||||
* candidate waker queue is then likely to be confirmed no later than
|
||||
* during the next I/O-plugging interval for bfqq.
|
||||
*
|
||||
* ISSUE
|
||||
*
|
||||
* On queue merging all waker information is lost.
|
||||
*/
|
||||
static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
u64 now_ns)
|
||||
{
|
||||
if (!bfqd->last_completed_rq_bfqq ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq ||
|
||||
bfq_bfqq_has_short_ttime(bfqq) ||
|
||||
now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
|
||||
return;
|
||||
|
||||
if (bfqd->last_completed_rq_bfqq !=
|
||||
bfqq->tentative_waker_bfqq) {
|
||||
/*
|
||||
* First synchronization detected with a
|
||||
* candidate waker queue, or with a different
|
||||
* candidate waker queue from the current one.
|
||||
*/
|
||||
bfqq->tentative_waker_bfqq =
|
||||
bfqd->last_completed_rq_bfqq;
|
||||
bfqq->num_waker_detections = 1;
|
||||
} else /* Same tentative waker queue detected again */
|
||||
bfqq->num_waker_detections++;
|
||||
|
||||
if (bfqq->num_waker_detections == 3) {
|
||||
bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
|
||||
bfqq->tentative_waker_bfqq = NULL;
|
||||
|
||||
/*
|
||||
* If the waker queue disappears, then
|
||||
* bfqq->waker_bfqq must be reset. To
|
||||
* this goal, we maintain in each
|
||||
* waker queue a list, woken_list, of
|
||||
* all the queues that reference the
|
||||
* waker queue through their
|
||||
* waker_bfqq pointer. When the waker
|
||||
* queue exits, the waker_bfqq pointer
|
||||
* of all the queues in the woken_list
|
||||
* is reset.
|
||||
*
|
||||
* In addition, if bfqq is already in
|
||||
* the woken_list of a waker queue,
|
||||
* then, before being inserted into
|
||||
* the woken_list of a new waker
|
||||
* queue, bfqq must be removed from
|
||||
* the woken_list of the old waker
|
||||
* queue.
|
||||
*/
|
||||
if (!hlist_unhashed(&bfqq->woken_list_node))
|
||||
hlist_del_init(&bfqq->woken_list_node);
|
||||
hlist_add_head(&bfqq->woken_list_node,
|
||||
&bfqd->last_completed_rq_bfqq->woken_list);
|
||||
}
|
||||
}
|
||||
|
||||
static void bfq_add_request(struct request *rq)
|
||||
{
|
||||
struct bfq_queue *bfqq = RQ_BFQQ(rq);
|
||||
@ -1868,117 +2013,14 @@ static void bfq_add_request(struct request *rq)
|
||||
struct request *next_rq, *prev;
|
||||
unsigned int old_wr_coeff = bfqq->wr_coeff;
|
||||
bool interactive = false;
|
||||
u64 now_ns = ktime_get_ns();
|
||||
|
||||
bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
|
||||
bfqq->queued[rq_is_sync(rq)]++;
|
||||
bfqd->queued++;
|
||||
|
||||
if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
|
||||
/*
|
||||
* Detect whether bfqq's I/O seems synchronized with
|
||||
* that of some other queue, i.e., whether bfqq, after
|
||||
* remaining empty, happens to receive new I/O only
|
||||
* right after some I/O request of the other queue has
|
||||
* been completed. We call waker queue the other
|
||||
* queue, and we assume, for simplicity, that bfqq may
|
||||
* have at most one waker queue.
|
||||
*
|
||||
* A remarkable throughput boost can be reached by
|
||||
* unconditionally injecting the I/O of the waker
|
||||
* queue, every time a new bfq_dispatch_request
|
||||
* happens to be invoked while I/O is being plugged
|
||||
* for bfqq. In addition to boosting throughput, this
|
||||
* unblocks bfqq's I/O, thereby improving bandwidth
|
||||
* and latency for bfqq. Note that these same results
|
||||
* may be achieved with the general injection
|
||||
* mechanism, but less effectively. For details on
|
||||
* this aspect, see the comments on the choice of the
|
||||
* queue for injection in bfq_select_queue().
|
||||
*
|
||||
* Turning back to the detection of a waker queue, a
|
||||
* queue Q is deemed as a waker queue for bfqq if, for
|
||||
* two consecutive times, bfqq happens to become non
|
||||
* empty right after a request of Q has been
|
||||
* completed. In particular, on the first time, Q is
|
||||
* tentatively set as a candidate waker queue, while
|
||||
* on the second time, the flag
|
||||
* bfq_bfqq_has_waker(bfqq) is set to confirm that Q
|
||||
* is a waker queue for bfqq. These detection steps
|
||||
* are performed only if bfqq has a long think time,
|
||||
* so as to make it more likely that bfqq's I/O is
|
||||
* actually being blocked by a synchronization. This
|
||||
* last filter, plus the above two-times requirement,
|
||||
* make false positives less likely.
|
||||
*
|
||||
* NOTE
|
||||
*
|
||||
* The sooner a waker queue is detected, the sooner
|
||||
* throughput can be boosted by injecting I/O from the
|
||||
* waker queue. Fortunately, detection is likely to be
|
||||
* actually fast, for the following reasons. While
|
||||
* blocked by synchronization, bfqq has a long think
|
||||
* time. This implies that bfqq's inject limit is at
|
||||
* least equal to 1 (see the comments in
|
||||
* bfq_update_inject_limit()). So, thanks to
|
||||
* injection, the waker queue is likely to be served
|
||||
* during the very first I/O-plugging time interval
|
||||
* for bfqq. This triggers the first step of the
|
||||
* detection mechanism. Thanks again to injection, the
|
||||
* candidate waker queue is then likely to be
|
||||
* confirmed no later than during the next
|
||||
* I/O-plugging interval for bfqq.
|
||||
*/
|
||||
if (bfqd->last_completed_rq_bfqq &&
|
||||
!bfq_bfqq_has_short_ttime(bfqq) &&
|
||||
ktime_get_ns() - bfqd->last_completion <
|
||||
200 * NSEC_PER_USEC) {
|
||||
if (bfqd->last_completed_rq_bfqq != bfqq &&
|
||||
bfqd->last_completed_rq_bfqq !=
|
||||
bfqq->waker_bfqq) {
|
||||
/*
|
||||
* First synchronization detected with
|
||||
* a candidate waker queue, or with a
|
||||
* different candidate waker queue
|
||||
* from the current one.
|
||||
*/
|
||||
bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
|
||||
|
||||
/*
|
||||
* If the waker queue disappears, then
|
||||
* bfqq->waker_bfqq must be reset. To
|
||||
* this goal, we maintain in each
|
||||
* waker queue a list, woken_list, of
|
||||
* all the queues that reference the
|
||||
* waker queue through their
|
||||
* waker_bfqq pointer. When the waker
|
||||
* queue exits, the waker_bfqq pointer
|
||||
* of all the queues in the woken_list
|
||||
* is reset.
|
||||
*
|
||||
* In addition, if bfqq is already in
|
||||
* the woken_list of a waker queue,
|
||||
* then, before being inserted into
|
||||
* the woken_list of a new waker
|
||||
* queue, bfqq must be removed from
|
||||
* the woken_list of the old waker
|
||||
* queue.
|
||||
*/
|
||||
if (!hlist_unhashed(&bfqq->woken_list_node))
|
||||
hlist_del_init(&bfqq->woken_list_node);
|
||||
hlist_add_head(&bfqq->woken_list_node,
|
||||
&bfqd->last_completed_rq_bfqq->woken_list);
|
||||
|
||||
bfq_clear_bfqq_has_waker(bfqq);
|
||||
} else if (bfqd->last_completed_rq_bfqq ==
|
||||
bfqq->waker_bfqq &&
|
||||
!bfq_bfqq_has_waker(bfqq)) {
|
||||
/*
|
||||
* synchronization with waker_bfqq
|
||||
* seen for the second time
|
||||
*/
|
||||
bfq_mark_bfqq_has_waker(bfqq);
|
||||
}
|
||||
}
|
||||
bfq_check_waker(bfqd, bfqq, now_ns);
|
||||
|
||||
/*
|
||||
* Periodically reset inject limit, to make sure that
|
||||
@ -2047,6 +2089,9 @@ static void bfq_add_request(struct request *rq)
|
||||
}
|
||||
}
|
||||
|
||||
if (bfq_bfqq_sync(bfqq))
|
||||
bfq_update_io_intensity(bfqq, now_ns);
|
||||
|
||||
elv_rb_add(&bfqq->sort_list, rq);
|
||||
|
||||
/*
|
||||
@ -2352,6 +2397,24 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
|
||||
/* Must be called with bfqq != NULL */
|
||||
static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
|
||||
{
|
||||
/*
|
||||
* If bfqq has been enjoying interactive weight-raising, then
|
||||
* reset soft_rt_next_start. We do it for the following
|
||||
* reason. bfqq may have been conveying the I/O needed to load
|
||||
* a soft real-time application. Such an application actually
|
||||
* exhibits a soft real-time I/O pattern after it finishes
|
||||
* loading, and finally starts doing its job. But, if bfqq has
|
||||
* been receiving a lot of bandwidth so far (likely to happen
|
||||
* on a fast device), then soft_rt_next_start now contains a
|
||||
* high value that. So, without this reset, bfqq would be
|
||||
* prevented from being possibly considered as soft_rt for a
|
||||
* very long time.
|
||||
*/
|
||||
|
||||
if (bfqq->wr_cur_max_time !=
|
||||
bfqq->bfqd->bfq_wr_rt_max_time)
|
||||
bfqq->soft_rt_next_start = jiffies;
|
||||
|
||||
if (bfq_bfqq_busy(bfqq))
|
||||
bfqq->bfqd->wr_busy_queues--;
|
||||
bfqq->wr_coeff = 1;
|
||||
@ -2686,10 +2749,16 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
|
||||
if (!bic)
|
||||
return;
|
||||
|
||||
bic->saved_last_serv_time_ns = bfqq->last_serv_time_ns;
|
||||
bic->saved_inject_limit = bfqq->inject_limit;
|
||||
bic->saved_decrease_time_jif = bfqq->decrease_time_jif;
|
||||
|
||||
bic->saved_weight = bfqq->entity.orig_weight;
|
||||
bic->saved_ttime = bfqq->ttime;
|
||||
bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq);
|
||||
bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
|
||||
bic->saved_io_start_time = bfqq->io_start_time;
|
||||
bic->saved_tot_idle_time = bfqq->tot_idle_time;
|
||||
bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
|
||||
bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
|
||||
if (unlikely(bfq_bfqq_just_created(bfqq) &&
|
||||
@ -2712,6 +2781,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
|
||||
bic->saved_wr_coeff = bfqq->wr_coeff;
|
||||
bic->saved_wr_start_at_switch_to_srt =
|
||||
bfqq->wr_start_at_switch_to_srt;
|
||||
bic->saved_service_from_wr = bfqq->service_from_wr;
|
||||
bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
|
||||
bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
|
||||
}
|
||||
@ -2937,6 +3007,7 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
|
||||
}
|
||||
|
||||
bfqd->in_service_queue = bfqq;
|
||||
bfqd->in_serv_last_pos = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3442,20 +3513,38 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
|
||||
* order until all the requests already queued in the device have been
|
||||
* served. The last sub-condition commented above somewhat mitigates
|
||||
* this problem for weight-raised queues.
|
||||
*
|
||||
* However, as an additional mitigation for this problem, we preserve
|
||||
* plugging for a special symmetric case that may suddenly turn into
|
||||
* asymmetric: the case where only bfqq is busy. In this case, not
|
||||
* expiring bfqq does not cause any harm to any other queues in terms
|
||||
* of service guarantees. In contrast, it avoids the following unlucky
|
||||
* sequence of events: (1) bfqq is expired, (2) a new queue with a
|
||||
* lower weight than bfqq becomes busy (or more queues), (3) the new
|
||||
* queue is served until a new request arrives for bfqq, (4) when bfqq
|
||||
* is finally served, there are so many requests of the new queue in
|
||||
* the drive that the pending requests for bfqq take a lot of time to
|
||||
* be served. In particular, event (2) may case even already
|
||||
* dispatched requests of bfqq to be delayed, inside the drive. So, to
|
||||
* avoid this series of events, the scenario is preventively declared
|
||||
* as asymmetric also if bfqq is the only busy queues
|
||||
*/
|
||||
static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq)
|
||||
{
|
||||
int tot_busy_queues = bfq_tot_busy_queues(bfqd);
|
||||
|
||||
/* No point in idling for bfqq if it won't get requests any longer */
|
||||
if (unlikely(!bfqq_process_refs(bfqq)))
|
||||
return false;
|
||||
|
||||
return (bfqq->wr_coeff > 1 &&
|
||||
(bfqd->wr_busy_queues <
|
||||
bfq_tot_busy_queues(bfqd) ||
|
||||
tot_busy_queues ||
|
||||
bfqd->rq_in_driver >=
|
||||
bfqq->dispatched + 4)) ||
|
||||
bfq_asymmetric_scenario(bfqd, bfqq);
|
||||
bfq_asymmetric_scenario(bfqd, bfqq) ||
|
||||
tot_busy_queues == 1;
|
||||
}
|
||||
|
||||
static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
@ -3939,10 +4028,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
|
||||
bfq_bfqq_budget_left(bfqq) >= entity->budget / 3)))
|
||||
bfq_bfqq_charge_time(bfqd, bfqq, delta);
|
||||
|
||||
if (reason == BFQQE_TOO_IDLE &&
|
||||
entity->service <= 2 * entity->budget / 10)
|
||||
bfq_clear_bfqq_IO_bound(bfqq);
|
||||
|
||||
if (bfqd->low_latency && bfqq->wr_coeff == 1)
|
||||
bfqq->last_wr_start_finish = jiffies;
|
||||
|
||||
@ -3952,30 +4037,15 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
|
||||
* If we get here, and there are no outstanding
|
||||
* requests, then the request pattern is isochronous
|
||||
* (see the comments on the function
|
||||
* bfq_bfqq_softrt_next_start()). Thus we can compute
|
||||
* soft_rt_next_start. And we do it, unless bfqq is in
|
||||
* interactive weight raising. We do not do it in the
|
||||
* latter subcase, for the following reason. bfqq may
|
||||
* be conveying the I/O needed to load a soft
|
||||
* real-time application. Such an application will
|
||||
* actually exhibit a soft real-time I/O pattern after
|
||||
* it finally starts doing its job. But, if
|
||||
* soft_rt_next_start is computed here for an
|
||||
* interactive bfqq, and bfqq had received a lot of
|
||||
* service before remaining with no outstanding
|
||||
* request (likely to happen on a fast device), then
|
||||
* soft_rt_next_start would be assigned such a high
|
||||
* value that, for a very long time, bfqq would be
|
||||
* prevented from being possibly considered as soft
|
||||
* real time.
|
||||
* bfq_bfqq_softrt_next_start()). Therefore we can
|
||||
* compute soft_rt_next_start.
|
||||
*
|
||||
* If, instead, the queue still has outstanding
|
||||
* requests, then we have to wait for the completion
|
||||
* of all the outstanding requests to discover whether
|
||||
* the request pattern is actually isochronous.
|
||||
*/
|
||||
if (bfqq->dispatched == 0 &&
|
||||
bfqq->wr_coeff != bfqd->bfq_wr_coeff)
|
||||
if (bfqq->dispatched == 0)
|
||||
bfqq->soft_rt_next_start =
|
||||
bfq_bfqq_softrt_next_start(bfqd, bfqq);
|
||||
else if (bfqq->dispatched > 0) {
|
||||
@ -4497,9 +4567,9 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
|
||||
bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <=
|
||||
bfq_bfqq_budget_left(async_bfqq))
|
||||
bfqq = bfqq->bic->bfqq[0];
|
||||
else if (bfq_bfqq_has_waker(bfqq) &&
|
||||
else if (bfqq->waker_bfqq &&
|
||||
bfq_bfqq_busy(bfqq->waker_bfqq) &&
|
||||
bfqq->next_rq &&
|
||||
bfqq->waker_bfqq->next_rq &&
|
||||
bfq_serv_to_charge(bfqq->waker_bfqq->next_rq,
|
||||
bfqq->waker_bfqq) <=
|
||||
bfq_bfqq_budget_left(bfqq->waker_bfqq)
|
||||
@ -4559,9 +4629,21 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||||
bfqq->wr_cur_max_time)) {
|
||||
if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time ||
|
||||
time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt +
|
||||
bfq_wr_duration(bfqd)))
|
||||
bfq_wr_duration(bfqd))) {
|
||||
/*
|
||||
* Either in interactive weight
|
||||
* raising, or in soft_rt weight
|
||||
* raising with the
|
||||
* interactive-weight-raising period
|
||||
* elapsed (so no switch back to
|
||||
* interactive weight raising).
|
||||
*/
|
||||
bfq_bfqq_end_wr(bfqq);
|
||||
else {
|
||||
} else { /*
|
||||
* soft_rt finishing while still in
|
||||
* interactive period, switch back to
|
||||
* interactive weight raising
|
||||
*/
|
||||
switch_back_to_interactive_wr(bfqq, bfqd);
|
||||
bfqq->entity.prio_changed = 1;
|
||||
}
|
||||
@ -4640,9 +4722,6 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
|
||||
|
||||
if (!atomic_read(&hctx->elevator_queued))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Avoiding lock: a race on bfqd->busy_queues should cause at
|
||||
* most a call to dispatch for nothing
|
||||
@ -4892,7 +4971,6 @@ void bfq_put_queue(struct bfq_queue *bfqq)
|
||||
hlist_for_each_entry_safe(item, n, &bfqq->woken_list,
|
||||
woken_list_node) {
|
||||
item->waker_bfqq = NULL;
|
||||
bfq_clear_bfqq_has_waker(item);
|
||||
hlist_del_init(&item->woken_list_node);
|
||||
}
|
||||
|
||||
@ -5012,6 +5090,8 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
|
||||
}
|
||||
|
||||
bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
|
||||
bfq_log_bfqq(bfqd, bfqq, "new_ioprio %d new_weight %d",
|
||||
bfqq->new_ioprio, bfqq->entity.new_weight);
|
||||
bfqq->entity.prio_changed = 1;
|
||||
}
|
||||
|
||||
@ -5049,6 +5129,8 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
|
||||
static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
struct bfq_io_cq *bic, pid_t pid, int is_sync)
|
||||
{
|
||||
u64 now_ns = ktime_get_ns();
|
||||
|
||||
RB_CLEAR_NODE(&bfqq->entity.rb_node);
|
||||
INIT_LIST_HEAD(&bfqq->fifo);
|
||||
INIT_HLIST_NODE(&bfqq->burst_list_node);
|
||||
@ -5076,7 +5158,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
bfq_clear_bfqq_sync(bfqq);
|
||||
|
||||
/* set end request to minus infinity from now */
|
||||
bfqq->ttime.last_end_request = ktime_get_ns() + 1;
|
||||
bfqq->ttime.last_end_request = now_ns + 1;
|
||||
|
||||
bfqq->io_start_time = now_ns;
|
||||
|
||||
bfq_mark_bfqq_IO_bound(bfqq);
|
||||
|
||||
@ -5194,11 +5278,19 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq)
|
||||
{
|
||||
struct bfq_ttime *ttime = &bfqq->ttime;
|
||||
u64 elapsed = ktime_get_ns() - bfqq->ttime.last_end_request;
|
||||
u64 elapsed;
|
||||
|
||||
/*
|
||||
* We are really interested in how long it takes for the queue to
|
||||
* become busy when there is no outstanding IO for this queue. So
|
||||
* ignore cases when the bfq queue has already IO queued.
|
||||
*/
|
||||
if (bfqq->dispatched || bfq_bfqq_busy(bfqq))
|
||||
return;
|
||||
elapsed = ktime_get_ns() - bfqq->ttime.last_end_request;
|
||||
elapsed = min_t(u64, elapsed, 2ULL * bfqd->bfq_slice_idle);
|
||||
|
||||
ttime->ttime_samples = (7*bfqq->ttime.ttime_samples + 256) / 8;
|
||||
ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
|
||||
ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8);
|
||||
ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
|
||||
ttime->ttime_samples);
|
||||
@ -5213,8 +5305,26 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
|
||||
if (bfqq->wr_coeff > 1 &&
|
||||
bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
|
||||
BFQQ_TOTALLY_SEEKY(bfqq))
|
||||
bfq_bfqq_end_wr(bfqq);
|
||||
BFQQ_TOTALLY_SEEKY(bfqq)) {
|
||||
if (time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt +
|
||||
bfq_wr_duration(bfqd))) {
|
||||
/*
|
||||
* In soft_rt weight raising with the
|
||||
* interactive-weight-raising period
|
||||
* elapsed (so no switch back to
|
||||
* interactive weight raising).
|
||||
*/
|
||||
bfq_bfqq_end_wr(bfqq);
|
||||
} else { /*
|
||||
* stopping soft_rt weight raising
|
||||
* while still in interactive period,
|
||||
* switch back to interactive weight
|
||||
* raising
|
||||
*/
|
||||
switch_back_to_interactive_wr(bfqq, bfqd);
|
||||
bfqq->entity.prio_changed = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
|
||||
@ -5238,12 +5348,13 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
|
||||
return;
|
||||
|
||||
/* Think time is infinite if no process is linked to
|
||||
* bfqq. Otherwise check average think time to
|
||||
* decide whether to mark as has_short_ttime
|
||||
* bfqq. Otherwise check average think time to decide whether
|
||||
* to mark as has_short_ttime. To this goal, compare average
|
||||
* think time with half the I/O-plugging timeout.
|
||||
*/
|
||||
if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
|
||||
(bfq_sample_valid(bfqq->ttime.ttime_samples) &&
|
||||
bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle))
|
||||
bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle>>1))
|
||||
has_short_ttime = false;
|
||||
|
||||
state_changed = has_short_ttime != bfq_bfqq_has_short_ttime(bfqq);
|
||||
@ -5557,7 +5668,6 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
rq = list_first_entry(list, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
bfq_insert_request(hctx, rq, at_head);
|
||||
atomic_inc(&hctx->elevator_queued);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5925,7 +6035,6 @@ static void bfq_finish_requeue_request(struct request *rq)
|
||||
|
||||
bfq_completed_request(bfqq, bfqd);
|
||||
bfq_finish_requeue_request_body(bfqq);
|
||||
atomic_dec(&rq->mq_hctx->elevator_queued);
|
||||
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
} else {
|
||||
@ -6489,8 +6598,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
|
||||
bfqd->bfq_slice_idle = bfq_slice_idle;
|
||||
bfqd->bfq_timeout = bfq_timeout;
|
||||
|
||||
bfqd->bfq_requests_within_timer = 120;
|
||||
|
||||
bfqd->bfq_large_burst_thresh = 8;
|
||||
bfqd->bfq_burst_interval = msecs_to_jiffies(180);
|
||||
|
||||
|
@ -291,6 +291,11 @@ struct bfq_queue {
|
||||
/* associated @bfq_ttime struct */
|
||||
struct bfq_ttime ttime;
|
||||
|
||||
/* when bfqq started to do I/O within the last observation window */
|
||||
u64 io_start_time;
|
||||
/* how long bfqq has remained empty during the last observ. window */
|
||||
u64 tot_idle_time;
|
||||
|
||||
/* bit vector: a 1 for each seeky requests in history */
|
||||
u32 seek_history;
|
||||
|
||||
@ -371,6 +376,11 @@ struct bfq_queue {
|
||||
* bfq_select_queue().
|
||||
*/
|
||||
struct bfq_queue *waker_bfqq;
|
||||
/* pointer to the curr. tentative waker queue, see bfq_check_waker() */
|
||||
struct bfq_queue *tentative_waker_bfqq;
|
||||
/* number of times the same tentative waker has been detected */
|
||||
unsigned int num_waker_detections;
|
||||
|
||||
/* node for woken_list, see below */
|
||||
struct hlist_node woken_list_node;
|
||||
/*
|
||||
@ -407,6 +417,9 @@ struct bfq_io_cq {
|
||||
*/
|
||||
bool saved_IO_bound;
|
||||
|
||||
u64 saved_io_start_time;
|
||||
u64 saved_tot_idle_time;
|
||||
|
||||
/*
|
||||
* Same purpose as the previous fields for the value of the
|
||||
* field keeping the queue's belonging to a large burst
|
||||
@ -432,9 +445,15 @@ struct bfq_io_cq {
|
||||
*/
|
||||
unsigned long saved_wr_coeff;
|
||||
unsigned long saved_last_wr_start_finish;
|
||||
unsigned long saved_service_from_wr;
|
||||
unsigned long saved_wr_start_at_switch_to_srt;
|
||||
unsigned int saved_wr_cur_max_time;
|
||||
struct bfq_ttime saved_ttime;
|
||||
|
||||
/* Save also injection state */
|
||||
u64 saved_last_serv_time_ns;
|
||||
unsigned int saved_inject_limit;
|
||||
unsigned long saved_decrease_time_jif;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -641,14 +660,6 @@ struct bfq_data {
|
||||
*/
|
||||
unsigned int bfq_timeout;
|
||||
|
||||
/*
|
||||
* Number of consecutive requests that must be issued within
|
||||
* the idle time slice to set again idling to a queue which
|
||||
* was marked as non-I/O-bound (see the definition of the
|
||||
* IO_bound flag for further details).
|
||||
*/
|
||||
unsigned int bfq_requests_within_timer;
|
||||
|
||||
/*
|
||||
* Force device idling whenever needed to provide accurate
|
||||
* service guarantees, without caring about throughput
|
||||
@ -770,7 +781,6 @@ enum bfqq_state_flags {
|
||||
*/
|
||||
BFQQF_coop, /* bfqq is shared */
|
||||
BFQQF_split_coop, /* shared bfqq will be split */
|
||||
BFQQF_has_waker /* bfqq has a waker queue */
|
||||
};
|
||||
|
||||
#define BFQ_BFQQ_FNS(name) \
|
||||
@ -790,7 +800,6 @@ BFQ_BFQQ_FNS(in_large_burst);
|
||||
BFQ_BFQQ_FNS(coop);
|
||||
BFQ_BFQQ_FNS(split_coop);
|
||||
BFQ_BFQQ_FNS(softrt_update);
|
||||
BFQ_BFQQ_FNS(has_waker);
|
||||
#undef BFQ_BFQQ_FNS
|
||||
|
||||
/* Expiration reasons. */
|
||||
|
@ -137,9 +137,6 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
|
||||
|
||||
sd->next_in_service = next_in_service;
|
||||
|
||||
if (!next_in_service)
|
||||
return parent_sched_may_change;
|
||||
|
||||
return parent_sched_may_change;
|
||||
}
|
||||
|
||||
|
@ -14,8 +14,6 @@
|
||||
#include <linux/slab.h>
|
||||
#include "blk.h"
|
||||
|
||||
#define BIP_INLINE_VECS 4
|
||||
|
||||
static struct kmem_cache *bip_slab;
|
||||
static struct workqueue_struct *kintegrityd_wq;
|
||||
|
||||
@ -30,7 +28,7 @@ static void __bio_integrity_free(struct bio_set *bs,
|
||||
if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
|
||||
if (bip->bip_vec)
|
||||
bvec_free(&bs->bvec_integrity_pool, bip->bip_vec,
|
||||
bip->bip_slab);
|
||||
bip->bip_max_vcnt);
|
||||
mempool_free(bip, &bs->bio_integrity_pool);
|
||||
} else {
|
||||
kfree(bip);
|
||||
@ -63,7 +61,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
||||
inline_vecs = nr_vecs;
|
||||
} else {
|
||||
bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
|
||||
inline_vecs = BIP_INLINE_VECS;
|
||||
inline_vecs = BIO_INLINE_VECS;
|
||||
}
|
||||
|
||||
if (unlikely(!bip))
|
||||
@ -72,14 +70,11 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
||||
memset(bip, 0, sizeof(*bip));
|
||||
|
||||
if (nr_vecs > inline_vecs) {
|
||||
unsigned long idx = 0;
|
||||
|
||||
bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
|
||||
&bs->bvec_integrity_pool);
|
||||
bip->bip_max_vcnt = nr_vecs;
|
||||
bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
|
||||
&bip->bip_max_vcnt, gfp_mask);
|
||||
if (!bip->bip_vec)
|
||||
goto err;
|
||||
bip->bip_max_vcnt = bvec_nr_vecs(idx);
|
||||
bip->bip_slab = idx;
|
||||
} else {
|
||||
bip->bip_vec = bip->bip_inline_vecs;
|
||||
bip->bip_max_vcnt = inline_vecs;
|
||||
@ -140,7 +135,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
|
||||
iv = bip->bip_vec + bip->bip_vcnt;
|
||||
|
||||
if (bip->bip_vcnt &&
|
||||
bvec_gap_to_prev(bio->bi_disk->queue,
|
||||
bvec_gap_to_prev(bio->bi_bdev->bd_disk->queue,
|
||||
&bip->bip_vec[bip->bip_vcnt - 1], offset))
|
||||
return 0;
|
||||
|
||||
@ -162,7 +157,7 @@ EXPORT_SYMBOL(bio_integrity_add_page);
|
||||
static blk_status_t bio_integrity_process(struct bio *bio,
|
||||
struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct blk_integrity_iter iter;
|
||||
struct bvec_iter bviter;
|
||||
struct bio_vec bv;
|
||||
@ -171,7 +166,7 @@ static blk_status_t bio_integrity_process(struct bio *bio,
|
||||
void *prot_buf = page_address(bip->bip_vec->bv_page) +
|
||||
bip->bip_vec->bv_offset;
|
||||
|
||||
iter.disk_name = bio->bi_disk->disk_name;
|
||||
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
|
||||
iter.interval = 1 << bi->interval_exp;
|
||||
iter.seed = proc_iter->bi_sector;
|
||||
iter.prot_buf = prot_buf;
|
||||
@ -208,8 +203,8 @@ static blk_status_t bio_integrity_process(struct bio *bio,
|
||||
bool bio_integrity_prep(struct bio *bio)
|
||||
{
|
||||
struct bio_integrity_payload *bip;
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
void *buf;
|
||||
unsigned long start, end;
|
||||
unsigned int len, nr_pages;
|
||||
@ -329,7 +324,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
|
||||
struct bio_integrity_payload *bip =
|
||||
container_of(work, struct bio_integrity_payload, bip_work);
|
||||
struct bio *bio = bip->bip_bio;
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
|
||||
/*
|
||||
* At the moment verify is called bio's iterator was advanced
|
||||
@ -355,7 +350,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
|
||||
*/
|
||||
bool __bio_integrity_endio(struct bio *bio)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
|
||||
@ -381,7 +376,7 @@ bool __bio_integrity_endio(struct bio *bio)
|
||||
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
|
||||
{
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
|
||||
|
||||
bip->bip_iter.bi_sector += bytes_done >> 9;
|
||||
@ -397,7 +392,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
|
||||
void bio_integrity_trim(struct bio *bio)
|
||||
{
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
|
||||
bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
|
||||
}
|
||||
@ -470,6 +465,6 @@ void __init bio_integrity_init(void)
|
||||
|
||||
bip_slab = kmem_cache_create("bio_integrity_payload",
|
||||
sizeof(struct bio_integrity_payload) +
|
||||
sizeof(struct bio_vec) * BIP_INLINE_VECS,
|
||||
sizeof(struct bio_vec) * BIO_INLINE_VECS,
|
||||
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
|
||||
}
|
||||
|
577
block/bio.c
577
block/bio.c
@ -19,27 +19,40 @@
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/blk-crypto.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
#include "blk.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
/*
|
||||
* Test patch to inline a certain number of bi_io_vec's inside the bio
|
||||
* itself, to shrink a bio data allocation from two mempool calls to one
|
||||
*/
|
||||
#define BIO_INLINE_VECS 4
|
||||
|
||||
/*
|
||||
* if you change this list, also change bvec_alloc or things will
|
||||
* break badly! cannot be bigger than what you can fit into an
|
||||
* unsigned short
|
||||
*/
|
||||
#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
|
||||
static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
|
||||
BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max),
|
||||
static struct biovec_slab {
|
||||
int nr_vecs;
|
||||
char *name;
|
||||
struct kmem_cache *slab;
|
||||
} bvec_slabs[] __read_mostly = {
|
||||
{ .nr_vecs = 16, .name = "biovec-16" },
|
||||
{ .nr_vecs = 64, .name = "biovec-64" },
|
||||
{ .nr_vecs = 128, .name = "biovec-128" },
|
||||
{ .nr_vecs = BIO_MAX_PAGES, .name = "biovec-max" },
|
||||
};
|
||||
#undef BV
|
||||
|
||||
static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
|
||||
{
|
||||
switch (nr_vecs) {
|
||||
/* smaller bios use inline vecs */
|
||||
case 5 ... 16:
|
||||
return &bvec_slabs[0];
|
||||
case 17 ... 64:
|
||||
return &bvec_slabs[1];
|
||||
case 65 ... 128:
|
||||
return &bvec_slabs[2];
|
||||
case 129 ... BIO_MAX_PAGES:
|
||||
return &bvec_slabs[3];
|
||||
default:
|
||||
BUG();
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* fs_bio_set is the bio_set containing bio and iovec memory pools used by
|
||||
@ -58,178 +71,133 @@ struct bio_slab {
|
||||
char name[8];
|
||||
};
|
||||
static DEFINE_MUTEX(bio_slab_lock);
|
||||
static struct bio_slab *bio_slabs;
|
||||
static unsigned int bio_slab_nr, bio_slab_max;
|
||||
static DEFINE_XARRAY(bio_slabs);
|
||||
|
||||
static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
|
||||
static struct bio_slab *create_bio_slab(unsigned int size)
|
||||
{
|
||||
unsigned int sz = sizeof(struct bio) + extra_size;
|
||||
struct kmem_cache *slab = NULL;
|
||||
struct bio_slab *bslab, *new_bio_slabs;
|
||||
unsigned int new_bio_slab_max;
|
||||
unsigned int i, entry = -1;
|
||||
struct bio_slab *bslab = kzalloc(sizeof(*bslab), GFP_KERNEL);
|
||||
|
||||
if (!bslab)
|
||||
return NULL;
|
||||
|
||||
snprintf(bslab->name, sizeof(bslab->name), "bio-%d", size);
|
||||
bslab->slab = kmem_cache_create(bslab->name, size,
|
||||
ARCH_KMALLOC_MINALIGN, SLAB_HWCACHE_ALIGN, NULL);
|
||||
if (!bslab->slab)
|
||||
goto fail_alloc_slab;
|
||||
|
||||
bslab->slab_ref = 1;
|
||||
bslab->slab_size = size;
|
||||
|
||||
if (!xa_err(xa_store(&bio_slabs, size, bslab, GFP_KERNEL)))
|
||||
return bslab;
|
||||
|
||||
kmem_cache_destroy(bslab->slab);
|
||||
|
||||
fail_alloc_slab:
|
||||
kfree(bslab);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline unsigned int bs_bio_slab_size(struct bio_set *bs)
|
||||
{
|
||||
return bs->front_pad + sizeof(struct bio) + bs->back_pad;
|
||||
}
|
||||
|
||||
static struct kmem_cache *bio_find_or_create_slab(struct bio_set *bs)
|
||||
{
|
||||
unsigned int size = bs_bio_slab_size(bs);
|
||||
struct bio_slab *bslab;
|
||||
|
||||
mutex_lock(&bio_slab_lock);
|
||||
|
||||
i = 0;
|
||||
while (i < bio_slab_nr) {
|
||||
bslab = &bio_slabs[i];
|
||||
|
||||
if (!bslab->slab && entry == -1)
|
||||
entry = i;
|
||||
else if (bslab->slab_size == sz) {
|
||||
slab = bslab->slab;
|
||||
bslab->slab_ref++;
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (slab)
|
||||
goto out_unlock;
|
||||
|
||||
if (bio_slab_nr == bio_slab_max && entry == -1) {
|
||||
new_bio_slab_max = bio_slab_max << 1;
|
||||
new_bio_slabs = krealloc(bio_slabs,
|
||||
new_bio_slab_max * sizeof(struct bio_slab),
|
||||
GFP_KERNEL);
|
||||
if (!new_bio_slabs)
|
||||
goto out_unlock;
|
||||
bio_slab_max = new_bio_slab_max;
|
||||
bio_slabs = new_bio_slabs;
|
||||
}
|
||||
if (entry == -1)
|
||||
entry = bio_slab_nr++;
|
||||
|
||||
bslab = &bio_slabs[entry];
|
||||
|
||||
snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
|
||||
slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
|
||||
SLAB_HWCACHE_ALIGN, NULL);
|
||||
if (!slab)
|
||||
goto out_unlock;
|
||||
|
||||
bslab->slab = slab;
|
||||
bslab->slab_ref = 1;
|
||||
bslab->slab_size = sz;
|
||||
out_unlock:
|
||||
bslab = xa_load(&bio_slabs, size);
|
||||
if (bslab)
|
||||
bslab->slab_ref++;
|
||||
else
|
||||
bslab = create_bio_slab(size);
|
||||
mutex_unlock(&bio_slab_lock);
|
||||
return slab;
|
||||
|
||||
if (bslab)
|
||||
return bslab->slab;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void bio_put_slab(struct bio_set *bs)
|
||||
{
|
||||
struct bio_slab *bslab = NULL;
|
||||
unsigned int i;
|
||||
unsigned int slab_size = bs_bio_slab_size(bs);
|
||||
|
||||
mutex_lock(&bio_slab_lock);
|
||||
|
||||
for (i = 0; i < bio_slab_nr; i++) {
|
||||
if (bs->bio_slab == bio_slabs[i].slab) {
|
||||
bslab = &bio_slabs[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bslab = xa_load(&bio_slabs, slab_size);
|
||||
if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
|
||||
goto out;
|
||||
|
||||
WARN_ON_ONCE(bslab->slab != bs->bio_slab);
|
||||
|
||||
WARN_ON(!bslab->slab_ref);
|
||||
|
||||
if (--bslab->slab_ref)
|
||||
goto out;
|
||||
|
||||
xa_erase(&bio_slabs, slab_size);
|
||||
|
||||
kmem_cache_destroy(bslab->slab);
|
||||
bslab->slab = NULL;
|
||||
kfree(bslab);
|
||||
|
||||
out:
|
||||
mutex_unlock(&bio_slab_lock);
|
||||
}
|
||||
|
||||
unsigned int bvec_nr_vecs(unsigned short idx)
|
||||
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
|
||||
{
|
||||
return bvec_slabs[--idx].nr_vecs;
|
||||
}
|
||||
BIO_BUG_ON(nr_vecs > BIO_MAX_PAGES);
|
||||
|
||||
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx)
|
||||
{
|
||||
if (!idx)
|
||||
return;
|
||||
idx--;
|
||||
|
||||
BIO_BUG_ON(idx >= BVEC_POOL_NR);
|
||||
|
||||
if (idx == BVEC_POOL_MAX) {
|
||||
if (nr_vecs == BIO_MAX_PAGES)
|
||||
mempool_free(bv, pool);
|
||||
} else {
|
||||
struct biovec_slab *bvs = bvec_slabs + idx;
|
||||
|
||||
kmem_cache_free(bvs->slab, bv);
|
||||
}
|
||||
else if (nr_vecs > BIO_INLINE_VECS)
|
||||
kmem_cache_free(biovec_slab(nr_vecs)->slab, bv);
|
||||
}
|
||||
|
||||
struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
|
||||
mempool_t *pool)
|
||||
/*
|
||||
* Make the first allocation restricted and don't dump info on allocation
|
||||
* failures, since we'll fall back to the mempool in case of failure.
|
||||
*/
|
||||
static inline gfp_t bvec_alloc_gfp(gfp_t gfp)
|
||||
{
|
||||
struct bio_vec *bvl;
|
||||
return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) |
|
||||
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
|
||||
}
|
||||
|
||||
/*
|
||||
* see comment near bvec_array define!
|
||||
*/
|
||||
switch (nr) {
|
||||
case 1:
|
||||
*idx = 0;
|
||||
break;
|
||||
case 2 ... 4:
|
||||
*idx = 1;
|
||||
break;
|
||||
case 5 ... 16:
|
||||
*idx = 2;
|
||||
break;
|
||||
case 17 ... 64:
|
||||
*idx = 3;
|
||||
break;
|
||||
case 65 ... 128:
|
||||
*idx = 4;
|
||||
break;
|
||||
case 129 ... BIO_MAX_PAGES:
|
||||
*idx = 5;
|
||||
break;
|
||||
default:
|
||||
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct biovec_slab *bvs = biovec_slab(*nr_vecs);
|
||||
|
||||
if (WARN_ON_ONCE(!bvs))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* idx now points to the pool we want to allocate from. only the
|
||||
* 1-vec entry pool is mempool backed.
|
||||
* Upgrade the nr_vecs request to take full advantage of the allocation.
|
||||
* We also rely on this in the bvec_free path.
|
||||
*/
|
||||
if (*idx == BVEC_POOL_MAX) {
|
||||
fallback:
|
||||
bvl = mempool_alloc(pool, gfp_mask);
|
||||
} else {
|
||||
struct biovec_slab *bvs = bvec_slabs + *idx;
|
||||
gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO);
|
||||
*nr_vecs = bvs->nr_vecs;
|
||||
|
||||
/*
|
||||
* Make this allocation restricted and don't dump info on
|
||||
* allocation failures, since we'll fallback to the mempool
|
||||
* in case of failure.
|
||||
*/
|
||||
__gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
|
||||
/*
|
||||
* Try a slab allocation first for all smaller allocations. If that
|
||||
* fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
|
||||
* The mempool is sized to handle up to BIO_MAX_PAGES entries.
|
||||
*/
|
||||
if (*nr_vecs < BIO_MAX_PAGES) {
|
||||
struct bio_vec *bvl;
|
||||
|
||||
/*
|
||||
* Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
|
||||
* is set, retry with the 1-entry mempool
|
||||
*/
|
||||
bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
|
||||
if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
|
||||
*idx = BVEC_POOL_MAX;
|
||||
goto fallback;
|
||||
}
|
||||
bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
|
||||
if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
|
||||
return bvl;
|
||||
*nr_vecs = BIO_MAX_PAGES;
|
||||
}
|
||||
|
||||
(*idx)++;
|
||||
return bvl;
|
||||
return mempool_alloc(pool, gfp_mask);
|
||||
}
|
||||
|
||||
void bio_uninit(struct bio *bio)
|
||||
@ -255,7 +223,7 @@ static void bio_free(struct bio *bio)
|
||||
bio_uninit(bio);
|
||||
|
||||
if (bs) {
|
||||
bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
|
||||
bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
|
||||
|
||||
/*
|
||||
* If we have front padding, adjust the bio pointer before freeing
|
||||
@ -299,12 +267,8 @@ EXPORT_SYMBOL(bio_init);
|
||||
*/
|
||||
void bio_reset(struct bio *bio)
|
||||
{
|
||||
unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
|
||||
|
||||
bio_uninit(bio);
|
||||
|
||||
memset(bio, 0, BIO_RESET_BYTES);
|
||||
bio->bi_flags = flags;
|
||||
atomic_set(&bio->__bi_remaining, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_reset);
|
||||
@ -405,122 +369,97 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
|
||||
* @nr_iovecs: number of iovecs to pre-allocate
|
||||
* @bs: the bio_set to allocate from.
|
||||
*
|
||||
* Description:
|
||||
* If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
|
||||
* backed by the @bs's mempool.
|
||||
* Allocate a bio from the mempools in @bs.
|
||||
*
|
||||
* When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
|
||||
* always be able to allocate a bio. This is due to the mempool guarantees.
|
||||
* To make this work, callers must never allocate more than 1 bio at a time
|
||||
* from this pool. Callers that need to allocate more than 1 bio must always
|
||||
* submit the previously allocated bio for IO before attempting to allocate
|
||||
* a new one. Failure to do so can cause deadlocks under memory pressure.
|
||||
* If %__GFP_DIRECT_RECLAIM is set then bio_alloc will always be able to
|
||||
* allocate a bio. This is due to the mempool guarantees. To make this work,
|
||||
* callers must never allocate more than 1 bio at a time from the general pool.
|
||||
* Callers that need to allocate more than 1 bio must always submit the
|
||||
* previously allocated bio for IO before attempting to allocate a new one.
|
||||
* Failure to do so can cause deadlocks under memory pressure.
|
||||
*
|
||||
* Note that when running under submit_bio_noacct() (i.e. any block
|
||||
* driver), bios are not submitted until after you return - see the code in
|
||||
* submit_bio_noacct() that converts recursion into iteration, to prevent
|
||||
* stack overflows.
|
||||
* Note that when running under submit_bio_noacct() (i.e. any block driver),
|
||||
* bios are not submitted until after you return - see the code in
|
||||
* submit_bio_noacct() that converts recursion into iteration, to prevent
|
||||
* stack overflows.
|
||||
*
|
||||
* This would normally mean allocating multiple bios under
|
||||
* submit_bio_noacct() would be susceptible to deadlocks, but we have
|
||||
* deadlock avoidance code that resubmits any blocked bios from a rescuer
|
||||
* thread.
|
||||
* This would normally mean allocating multiple bios under submit_bio_noacct()
|
||||
* would be susceptible to deadlocks, but we have
|
||||
* deadlock avoidance code that resubmits any blocked bios from a rescuer
|
||||
* thread.
|
||||
*
|
||||
* However, we do not guarantee forward progress for allocations from other
|
||||
* mempools. Doing multiple allocations from the same mempool under
|
||||
* submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
|
||||
* for per bio allocations.
|
||||
* However, we do not guarantee forward progress for allocations from other
|
||||
* mempools. Doing multiple allocations from the same mempool under
|
||||
* submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
|
||||
* for per bio allocations.
|
||||
*
|
||||
* RETURNS:
|
||||
* Pointer to new bio on success, NULL on failure.
|
||||
* Returns: Pointer to new bio on success, NULL on failure.
|
||||
*/
|
||||
struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
|
||||
struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
|
||||
struct bio_set *bs)
|
||||
{
|
||||
gfp_t saved_gfp = gfp_mask;
|
||||
unsigned front_pad;
|
||||
unsigned inline_vecs;
|
||||
struct bio_vec *bvl = NULL;
|
||||
struct bio *bio;
|
||||
void *p;
|
||||
|
||||
if (!bs) {
|
||||
if (nr_iovecs > UIO_MAXIOV)
|
||||
return NULL;
|
||||
/* should not use nobvec bioset for nr_iovecs > 0 */
|
||||
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_iovecs > 0))
|
||||
return NULL;
|
||||
|
||||
p = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
|
||||
front_pad = 0;
|
||||
inline_vecs = nr_iovecs;
|
||||
} else {
|
||||
/* should not use nobvec bioset for nr_iovecs > 0 */
|
||||
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) &&
|
||||
nr_iovecs > 0))
|
||||
return NULL;
|
||||
/*
|
||||
* submit_bio_noacct() converts recursion to iteration; this
|
||||
* means if we're running beneath it, any bios we allocate and
|
||||
* submit will not be submitted (and thus freed) until after we
|
||||
* return.
|
||||
*
|
||||
* This exposes us to a potential deadlock if we allocate
|
||||
* multiple bios from the same bio_set() while running
|
||||
* underneath submit_bio_noacct(). If we were to allocate
|
||||
* multiple bios (say a stacking block driver that was splitting
|
||||
* bios), we would deadlock if we exhausted the mempool's
|
||||
* reserve.
|
||||
*
|
||||
* We solve this, and guarantee forward progress, with a rescuer
|
||||
* workqueue per bio_set. If we go to allocate and there are
|
||||
* bios on current->bio_list, we first try the allocation
|
||||
* without __GFP_DIRECT_RECLAIM; if that fails, we punt those
|
||||
* bios we would be blocking to the rescuer workqueue before
|
||||
* we retry with the original gfp_flags.
|
||||
*/
|
||||
|
||||
if (current->bio_list &&
|
||||
(!bio_list_empty(¤t->bio_list[0]) ||
|
||||
!bio_list_empty(¤t->bio_list[1])) &&
|
||||
bs->rescue_workqueue)
|
||||
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
|
||||
/*
|
||||
* submit_bio_noacct() converts recursion to iteration; this means if
|
||||
* we're running beneath it, any bios we allocate and submit will not be
|
||||
* submitted (and thus freed) until after we return.
|
||||
*
|
||||
* This exposes us to a potential deadlock if we allocate multiple bios
|
||||
* from the same bio_set() while running underneath submit_bio_noacct().
|
||||
* If we were to allocate multiple bios (say a stacking block driver
|
||||
* that was splitting bios), we would deadlock if we exhausted the
|
||||
* mempool's reserve.
|
||||
*
|
||||
* We solve this, and guarantee forward progress, with a rescuer
|
||||
* workqueue per bio_set. If we go to allocate and there are bios on
|
||||
* current->bio_list, we first try the allocation without
|
||||
* __GFP_DIRECT_RECLAIM; if that fails, we punt those bios we would be
|
||||
* blocking to the rescuer workqueue before we retry with the original
|
||||
* gfp_flags.
|
||||
*/
|
||||
if (current->bio_list &&
|
||||
(!bio_list_empty(¤t->bio_list[0]) ||
|
||||
!bio_list_empty(¤t->bio_list[1])) &&
|
||||
bs->rescue_workqueue)
|
||||
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
|
||||
|
||||
p = mempool_alloc(&bs->bio_pool, gfp_mask);
|
||||
if (!p && gfp_mask != saved_gfp) {
|
||||
punt_bios_to_rescuer(bs);
|
||||
gfp_mask = saved_gfp;
|
||||
p = mempool_alloc(&bs->bio_pool, gfp_mask);
|
||||
if (!p && gfp_mask != saved_gfp) {
|
||||
punt_bios_to_rescuer(bs);
|
||||
gfp_mask = saved_gfp;
|
||||
p = mempool_alloc(&bs->bio_pool, gfp_mask);
|
||||
}
|
||||
|
||||
front_pad = bs->front_pad;
|
||||
inline_vecs = BIO_INLINE_VECS;
|
||||
}
|
||||
|
||||
if (unlikely(!p))
|
||||
return NULL;
|
||||
|
||||
bio = p + front_pad;
|
||||
bio_init(bio, NULL, 0);
|
||||
bio = p + bs->front_pad;
|
||||
if (nr_iovecs > BIO_INLINE_VECS) {
|
||||
struct bio_vec *bvl = NULL;
|
||||
|
||||
if (nr_iovecs > inline_vecs) {
|
||||
unsigned long idx = 0;
|
||||
|
||||
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
|
||||
bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
|
||||
if (!bvl && gfp_mask != saved_gfp) {
|
||||
punt_bios_to_rescuer(bs);
|
||||
gfp_mask = saved_gfp;
|
||||
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
|
||||
bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
|
||||
}
|
||||
|
||||
if (unlikely(!bvl))
|
||||
goto err_free;
|
||||
|
||||
bio->bi_flags |= idx << BVEC_POOL_OFFSET;
|
||||
bio_init(bio, bvl, nr_iovecs);
|
||||
} else if (nr_iovecs) {
|
||||
bvl = bio->bi_inline_vecs;
|
||||
bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS);
|
||||
} else {
|
||||
bio_init(bio, NULL, 0);
|
||||
}
|
||||
|
||||
bio->bi_pool = bs;
|
||||
bio->bi_max_vecs = nr_iovecs;
|
||||
bio->bi_io_vec = bvl;
|
||||
return bio;
|
||||
|
||||
err_free:
|
||||
@ -529,6 +468,31 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
|
||||
}
|
||||
EXPORT_SYMBOL(bio_alloc_bioset);
|
||||
|
||||
/**
|
||||
* bio_kmalloc - kmalloc a bio for I/O
|
||||
* @gfp_mask: the GFP_* mask given to the slab allocator
|
||||
* @nr_iovecs: number of iovecs to pre-allocate
|
||||
*
|
||||
* Use kmalloc to allocate and initialize a bio.
|
||||
*
|
||||
* Returns: Pointer to new bio on success, NULL on failure.
|
||||
*/
|
||||
struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
if (nr_iovecs > UIO_MAXIOV)
|
||||
return NULL;
|
||||
|
||||
bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
|
||||
if (unlikely(!bio))
|
||||
return NULL;
|
||||
bio_init(bio, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs);
|
||||
bio->bi_pool = NULL;
|
||||
return bio;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_kmalloc);
|
||||
|
||||
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
|
||||
{
|
||||
unsigned long flags;
|
||||
@ -607,16 +571,7 @@ void bio_truncate(struct bio *bio, unsigned new_size)
|
||||
*/
|
||||
void guard_bio_eod(struct bio *bio)
|
||||
{
|
||||
sector_t maxsector;
|
||||
struct block_device *part;
|
||||
|
||||
rcu_read_lock();
|
||||
part = __disk_get_part(bio->bi_disk, bio->bi_partno);
|
||||
if (part)
|
||||
maxsector = bdev_nr_sectors(part);
|
||||
else
|
||||
maxsector = get_capacity(bio->bi_disk);
|
||||
rcu_read_unlock();
|
||||
sector_t maxsector = bdev_nr_sectors(bio->bi_bdev);
|
||||
|
||||
if (!maxsector)
|
||||
return;
|
||||
@ -673,17 +628,18 @@ EXPORT_SYMBOL(bio_put);
|
||||
*/
|
||||
void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
|
||||
{
|
||||
BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio));
|
||||
WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs);
|
||||
|
||||
/*
|
||||
* most users will be overriding ->bi_disk with a new target,
|
||||
* most users will be overriding ->bi_bdev with a new target,
|
||||
* so we don't set nor calculate new physical/hw segment counts here
|
||||
*/
|
||||
bio->bi_disk = bio_src->bi_disk;
|
||||
bio->bi_partno = bio_src->bi_partno;
|
||||
bio->bi_bdev = bio_src->bi_bdev;
|
||||
bio_set_flag(bio, BIO_CLONED);
|
||||
if (bio_flagged(bio_src, BIO_THROTTLED))
|
||||
bio_set_flag(bio, BIO_THROTTLED);
|
||||
if (bio_flagged(bio_src, BIO_REMAPPED))
|
||||
bio_set_flag(bio, BIO_REMAPPED);
|
||||
bio->bi_opf = bio_src->bi_opf;
|
||||
bio->bi_ioprio = bio_src->bi_ioprio;
|
||||
bio->bi_write_hint = bio_src->bi_write_hint;
|
||||
@ -730,7 +686,7 @@ EXPORT_SYMBOL(bio_clone_fast);
|
||||
|
||||
const char *bio_devname(struct bio *bio, char *buf)
|
||||
{
|
||||
return disk_name(bio->bi_disk, bio->bi_partno, buf);
|
||||
return bdevname(bio->bi_bdev, buf);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_devname);
|
||||
|
||||
@ -870,7 +826,7 @@ EXPORT_SYMBOL(bio_add_pc_page);
|
||||
int bio_add_zone_append_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int offset)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
bool same_page = false;
|
||||
|
||||
if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND))
|
||||
@ -993,21 +949,18 @@ void bio_release_pages(struct bio *bio, bool mark_dirty)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_release_pages);
|
||||
|
||||
static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
|
||||
static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
const struct bio_vec *bv = iter->bvec;
|
||||
unsigned int len;
|
||||
size_t size;
|
||||
WARN_ON_ONCE(bio->bi_max_vecs);
|
||||
|
||||
if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len))
|
||||
return -EINVAL;
|
||||
bio->bi_vcnt = iter->nr_segs;
|
||||
bio->bi_io_vec = (struct bio_vec *)iter->bvec;
|
||||
bio->bi_iter.bi_bvec_done = iter->iov_offset;
|
||||
bio->bi_iter.bi_size = iter->count;
|
||||
bio_set_flag(bio, BIO_NO_PAGE_REF);
|
||||
bio_set_flag(bio, BIO_CLONED);
|
||||
|
||||
len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count);
|
||||
size = bio_add_page(bio, bv->bv_page, len,
|
||||
bv->bv_offset + iter->iov_offset);
|
||||
if (unlikely(size != len))
|
||||
return -EINVAL;
|
||||
iov_iter_advance(iter, size);
|
||||
iov_iter_advance(iter, iter->count);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1070,7 +1023,7 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
|
||||
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
|
||||
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
|
||||
struct page **pages = (struct page **)bv;
|
||||
@ -1121,41 +1074,40 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
* This takes either an iterator pointing to user memory, or one pointing to
|
||||
* kernel pages (BVEC iterator). If we're adding user pages, we pin them and
|
||||
* map them into the kernel. On IO completion, the caller should put those
|
||||
* pages. If we're adding kernel pages, and the caller told us it's safe to
|
||||
* do so, we just have to add the pages to the bio directly. We don't grab an
|
||||
* extra reference to those pages (the user should already have that), and we
|
||||
* don't put the page on IO completion. The caller needs to check if the bio is
|
||||
* flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
|
||||
* released.
|
||||
* pages. For bvec based iterators bio_iov_iter_get_pages() uses the provided
|
||||
* bvecs rather than copying them. Hence anyone issuing kiocb based IO needs
|
||||
* to ensure the bvecs and pages stay referenced until the submitted I/O is
|
||||
* completed by a call to ->ki_complete() or returns with an error other than
|
||||
* -EIOCBQUEUED. The caller needs to check if the bio is flagged BIO_NO_PAGE_REF
|
||||
* on IO completion. If it isn't, then pages should be released.
|
||||
*
|
||||
* The function tries, but does not guarantee, to pin as many pages as
|
||||
* fit into the bio, or are requested in @iter, whatever is smaller. If
|
||||
* MM encounters an error pinning the requested pages, it stops. Error
|
||||
* is returned only if 0 pages could be pinned.
|
||||
*
|
||||
* It's intended for direct IO, so doesn't do PSI tracking, the caller is
|
||||
* responsible for setting BIO_WORKINGSET if necessary.
|
||||
*/
|
||||
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
const bool is_bvec = iov_iter_is_bvec(iter);
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
if (WARN_ON_ONCE(bio->bi_vcnt))
|
||||
return -EINVAL;
|
||||
if (iov_iter_is_bvec(iter)) {
|
||||
if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
|
||||
return -EINVAL;
|
||||
return bio_iov_bvec_set(bio, iter);
|
||||
}
|
||||
|
||||
do {
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
if (WARN_ON_ONCE(is_bvec))
|
||||
return -EINVAL;
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
|
||||
ret = __bio_iov_append_get_pages(bio, iter);
|
||||
} else {
|
||||
if (is_bvec)
|
||||
ret = __bio_iov_bvec_add_pages(bio, iter);
|
||||
else
|
||||
ret = __bio_iov_iter_get_pages(bio, iter);
|
||||
}
|
||||
else
|
||||
ret = __bio_iov_iter_get_pages(bio, iter);
|
||||
} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
|
||||
|
||||
if (is_bvec)
|
||||
bio_set_flag(bio, BIO_NO_PAGE_REF);
|
||||
/* don't account direct I/O as memory stall */
|
||||
bio_clear_flag(bio, BIO_WORKINGSET);
|
||||
return bio->bi_vcnt ? 0 : ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
|
||||
@ -1178,7 +1130,8 @@ static void submit_bio_wait_endio(struct bio *bio)
|
||||
*/
|
||||
int submit_bio_wait(struct bio *bio)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
|
||||
DECLARE_COMPLETION_ONSTACK_MAP(done,
|
||||
bio->bi_bdev->bd_disk->lockdep_map);
|
||||
unsigned long hang_check;
|
||||
|
||||
bio->bi_private = &done;
|
||||
@ -1455,8 +1408,8 @@ void bio_endio(struct bio *bio)
|
||||
if (!bio_integrity_endio(bio))
|
||||
return;
|
||||
|
||||
if (bio->bi_disk)
|
||||
rq_qos_done_bio(bio->bi_disk->queue, bio);
|
||||
if (bio->bi_bdev)
|
||||
rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio);
|
||||
|
||||
/*
|
||||
* Need to have a real endio function for chained bios, otherwise
|
||||
@ -1471,8 +1424,8 @@ void bio_endio(struct bio *bio)
|
||||
goto again;
|
||||
}
|
||||
|
||||
if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
trace_block_bio_complete(bio->bi_disk->queue, bio);
|
||||
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
|
||||
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
|
||||
}
|
||||
|
||||
@ -1559,7 +1512,7 @@ EXPORT_SYMBOL_GPL(bio_trim);
|
||||
*/
|
||||
int biovec_init_pool(mempool_t *pool, int pool_entries)
|
||||
{
|
||||
struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
|
||||
struct biovec_slab *bp = bvec_slabs + ARRAY_SIZE(bvec_slabs) - 1;
|
||||
|
||||
return mempool_init_slab_pool(pool, pool_entries, bp->slab);
|
||||
}
|
||||
@ -1612,15 +1565,17 @@ int bioset_init(struct bio_set *bs,
|
||||
unsigned int front_pad,
|
||||
int flags)
|
||||
{
|
||||
unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
|
||||
|
||||
bs->front_pad = front_pad;
|
||||
if (flags & BIOSET_NEED_BVECS)
|
||||
bs->back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
|
||||
else
|
||||
bs->back_pad = 0;
|
||||
|
||||
spin_lock_init(&bs->rescue_lock);
|
||||
bio_list_init(&bs->rescue_list);
|
||||
INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
|
||||
|
||||
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
|
||||
bs->bio_slab = bio_find_or_create_slab(bs);
|
||||
if (!bs->bio_slab)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1663,39 +1618,19 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
|
||||
}
|
||||
EXPORT_SYMBOL(bioset_init_from_src);
|
||||
|
||||
static void __init biovec_init_slabs(void)
|
||||
static int __init init_bio(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BVEC_POOL_NR; i++) {
|
||||
int size;
|
||||
bio_integrity_init();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(bvec_slabs); i++) {
|
||||
struct biovec_slab *bvs = bvec_slabs + i;
|
||||
|
||||
if (bvs->nr_vecs <= BIO_INLINE_VECS) {
|
||||
bvs->slab = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
size = bvs->nr_vecs * sizeof(struct bio_vec);
|
||||
bvs->slab = kmem_cache_create(bvs->name, size, 0,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
|
||||
bvs->slab = kmem_cache_create(bvs->name,
|
||||
bvs->nr_vecs * sizeof(struct bio_vec), 0,
|
||||
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init init_bio(void)
|
||||
{
|
||||
bio_slab_max = 2;
|
||||
bio_slab_nr = 0;
|
||||
bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab),
|
||||
GFP_KERNEL);
|
||||
|
||||
BUILD_BUG_ON(BIO_FLAG_LAST > BVEC_POOL_OFFSET);
|
||||
|
||||
if (!bio_slabs)
|
||||
panic("bio: can't allocate bios\n");
|
||||
|
||||
bio_integrity_init();
|
||||
biovec_init_slabs();
|
||||
|
||||
if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
|
||||
panic("bio: can't allocate bios\n");
|
||||
|
@ -32,8 +32,6 @@
|
||||
#include <linux/psi.h>
|
||||
#include "blk.h"
|
||||
|
||||
#define MAX_KEY_LEN 100
|
||||
|
||||
/*
|
||||
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
|
||||
* blkcg_pol_register_mutex nests outside of it and synchronizes entire
|
||||
@ -1765,12 +1763,15 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
|
||||
if (unlikely(current->flags & PF_KTHREAD))
|
||||
return;
|
||||
|
||||
if (!blk_get_queue(q))
|
||||
return;
|
||||
if (current->throttle_queue != q) {
|
||||
if (!blk_get_queue(q))
|
||||
return;
|
||||
|
||||
if (current->throttle_queue)
|
||||
blk_put_queue(current->throttle_queue);
|
||||
current->throttle_queue = q;
|
||||
}
|
||||
|
||||
if (current->throttle_queue)
|
||||
blk_put_queue(current->throttle_queue);
|
||||
current->throttle_queue = q;
|
||||
if (use_memdelay)
|
||||
current->use_memdelay = use_memdelay;
|
||||
set_notify_resume(current);
|
||||
@ -1808,7 +1809,8 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
|
||||
struct blkcg_gq *blkg, *ret_blkg = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue);
|
||||
blkg = blkg_lookup_create(css_to_blkcg(css),
|
||||
bio->bi_bdev->bd_disk->queue);
|
||||
while (blkg) {
|
||||
if (blkg_tryget(blkg)) {
|
||||
ret_blkg = blkg;
|
||||
@ -1844,8 +1846,8 @@ void bio_associate_blkg_from_css(struct bio *bio,
|
||||
if (css && css->parent) {
|
||||
bio->bi_blkg = blkg_tryget_closest(bio, css);
|
||||
} else {
|
||||
blkg_get(bio->bi_disk->queue->root_blkg);
|
||||
bio->bi_blkg = bio->bi_disk->queue->root_blkg;
|
||||
blkg_get(bio->bi_bdev->bd_disk->queue->root_blkg);
|
||||
bio->bi_blkg = bio->bi_bdev->bd_disk->queue->root_blkg;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
|
||||
|
@ -476,7 +476,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
|
||||
|
||||
static inline int bio_queue_enter(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
bool nowait = bio->bi_opf & REQ_NOWAIT;
|
||||
int ret;
|
||||
|
||||
@ -531,7 +531,7 @@ struct request_queue *blk_alloc_queue(int node_id)
|
||||
if (q->id < 0)
|
||||
goto fail_q;
|
||||
|
||||
ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, 0);
|
||||
if (ret)
|
||||
goto fail_id;
|
||||
|
||||
@ -692,11 +692,9 @@ static inline bool should_fail_request(struct block_device *part,
|
||||
|
||||
#endif /* CONFIG_FAIL_MAKE_REQUEST */
|
||||
|
||||
static inline bool bio_check_ro(struct bio *bio, struct block_device *part)
|
||||
static inline bool bio_check_ro(struct bio *bio)
|
||||
{
|
||||
const int op = bio_op(bio);
|
||||
|
||||
if (part->bd_read_only && op_is_write(op)) {
|
||||
if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
|
||||
@ -704,7 +702,7 @@ static inline bool bio_check_ro(struct bio *bio, struct block_device *part)
|
||||
|
||||
WARN_ONCE(1,
|
||||
"Trying to write to read-only block-device %s (partno %d)\n",
|
||||
bio_devname(bio, b), part->bd_partno);
|
||||
bio_devname(bio, b), bio->bi_bdev->bd_partno);
|
||||
/* Older lvm-tools actually trigger this */
|
||||
return false;
|
||||
}
|
||||
@ -714,7 +712,7 @@ static inline bool bio_check_ro(struct bio *bio, struct block_device *part)
|
||||
|
||||
static noinline int should_fail_bio(struct bio *bio)
|
||||
{
|
||||
if (should_fail_request(bio->bi_disk->part0, bio->bi_iter.bi_size))
|
||||
if (should_fail_request(bdev_whole(bio->bi_bdev), bio->bi_iter.bi_size))
|
||||
return -EIO;
|
||||
return 0;
|
||||
}
|
||||
@ -725,8 +723,9 @@ ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
|
||||
* This may well happen - the kernel calls bread() without checking the size of
|
||||
* the device, e.g., when mounting a file system.
|
||||
*/
|
||||
static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
|
||||
static inline int bio_check_eod(struct bio *bio)
|
||||
{
|
||||
sector_t maxsector = bdev_nr_sectors(bio->bi_bdev);
|
||||
unsigned int nr_sectors = bio_sectors(bio);
|
||||
|
||||
if (nr_sectors && maxsector &&
|
||||
@ -741,33 +740,20 @@ static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
|
||||
/*
|
||||
* Remap block n of partition p to block n+start(p) of the disk.
|
||||
*/
|
||||
static inline int blk_partition_remap(struct bio *bio)
|
||||
static int blk_partition_remap(struct bio *bio)
|
||||
{
|
||||
struct block_device *p;
|
||||
int ret = -EIO;
|
||||
struct block_device *p = bio->bi_bdev;
|
||||
|
||||
rcu_read_lock();
|
||||
p = __disk_get_part(bio->bi_disk, bio->bi_partno);
|
||||
if (unlikely(!p))
|
||||
goto out;
|
||||
if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
|
||||
goto out;
|
||||
if (unlikely(bio_check_ro(bio, p)))
|
||||
goto out;
|
||||
|
||||
return -EIO;
|
||||
if (bio_sectors(bio)) {
|
||||
if (bio_check_eod(bio, bdev_nr_sectors(p)))
|
||||
goto out;
|
||||
bio->bi_iter.bi_sector += p->bd_start_sect;
|
||||
trace_block_bio_remap(bio, p->bd_dev,
|
||||
bio->bi_iter.bi_sector -
|
||||
p->bd_start_sect);
|
||||
}
|
||||
bio->bi_partno = 0;
|
||||
ret = 0;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
bio_set_flag(bio, BIO_REMAPPED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -807,7 +793,8 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
|
||||
|
||||
static noinline_for_stack bool submit_bio_checks(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct block_device *bdev = bio->bi_bdev;
|
||||
struct request_queue *q = bdev->bd_disk->queue;
|
||||
blk_status_t status = BLK_STS_IOERR;
|
||||
struct blk_plug *plug;
|
||||
|
||||
@ -826,14 +813,12 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
|
||||
|
||||
if (should_fail_bio(bio))
|
||||
goto end_io;
|
||||
|
||||
if (bio->bi_partno) {
|
||||
if (unlikely(blk_partition_remap(bio)))
|
||||
if (unlikely(bio_check_ro(bio)))
|
||||
goto end_io;
|
||||
if (!bio_flagged(bio, BIO_REMAPPED)) {
|
||||
if (unlikely(bio_check_eod(bio)))
|
||||
goto end_io;
|
||||
} else {
|
||||
if (unlikely(bio_check_ro(bio, bio->bi_disk->part0)))
|
||||
goto end_io;
|
||||
if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
|
||||
if (bdev->bd_partno && unlikely(blk_partition_remap(bio)))
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
@ -926,7 +911,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
|
||||
|
||||
static blk_qc_t __submit_bio(struct bio *bio)
|
||||
{
|
||||
struct gendisk *disk = bio->bi_disk;
|
||||
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
|
||||
if (blk_crypto_bio_prep(&bio)) {
|
||||
@ -968,7 +953,7 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio)
|
||||
current->bio_list = bio_list_on_stack;
|
||||
|
||||
do {
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
struct bio_list lower, same;
|
||||
|
||||
if (unlikely(bio_queue_enter(bio) != 0))
|
||||
@ -989,7 +974,7 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio)
|
||||
bio_list_init(&lower);
|
||||
bio_list_init(&same);
|
||||
while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
|
||||
if (q == bio->bi_disk->queue)
|
||||
if (q == bio->bi_bdev->bd_disk->queue)
|
||||
bio_list_add(&same, bio);
|
||||
else
|
||||
bio_list_add(&lower, bio);
|
||||
@ -1014,7 +999,7 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
|
||||
current->bio_list = bio_list;
|
||||
|
||||
do {
|
||||
struct gendisk *disk = bio->bi_disk;
|
||||
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||
|
||||
if (unlikely(bio_queue_enter(bio) != 0))
|
||||
continue;
|
||||
@ -1057,7 +1042,7 @@ blk_qc_t submit_bio_noacct(struct bio *bio)
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
if (!bio->bi_disk->fops->submit_bio)
|
||||
if (!bio->bi_bdev->bd_disk->fops->submit_bio)
|
||||
return __submit_bio_noacct_mq(bio);
|
||||
return __submit_bio_noacct(bio);
|
||||
}
|
||||
@ -1069,7 +1054,7 @@ EXPORT_SYMBOL(submit_bio_noacct);
|
||||
*
|
||||
* submit_bio() is used to submit I/O requests to block devices. It is passed a
|
||||
* fully set up &struct bio that describes the I/O that needs to be done. The
|
||||
* bio will be send to the device described by the bi_disk and bi_partno fields.
|
||||
* bio will be send to the device described by the bi_bdev field.
|
||||
*
|
||||
* The success/failure status of the request, along with notification of
|
||||
* completion, is delivered asynchronously through the ->bi_end_io() callback
|
||||
@ -1089,7 +1074,8 @@ blk_qc_t submit_bio(struct bio *bio)
|
||||
unsigned int count;
|
||||
|
||||
if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
|
||||
count = queue_logical_block_size(bio->bi_disk->queue) >> 9;
|
||||
count = queue_logical_block_size(
|
||||
bio->bi_bdev->bd_disk->queue) >> 9;
|
||||
else
|
||||
count = bio_sectors(bio);
|
||||
|
||||
@ -1313,7 +1299,11 @@ void blk_account_io_start(struct request *rq)
|
||||
if (!blk_do_io_stat(rq))
|
||||
return;
|
||||
|
||||
rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
|
||||
/* passthrough requests can hold bios that do not have ->bi_bdev set */
|
||||
if (rq->bio && rq->bio->bi_bdev)
|
||||
rq->part = rq->bio->bi_bdev;
|
||||
else
|
||||
rq->part = rq->rq_disk->part0;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(rq->part, jiffies, false);
|
||||
@ -1336,14 +1326,17 @@ static unsigned long __part_start_io_acct(struct block_device *part,
|
||||
return now;
|
||||
}
|
||||
|
||||
unsigned long part_start_io_acct(struct gendisk *disk, struct block_device **part,
|
||||
struct bio *bio)
|
||||
/**
|
||||
* bio_start_io_acct - start I/O accounting for bio based drivers
|
||||
* @bio: bio to start account for
|
||||
*
|
||||
* Returns the start time that should be passed back to bio_end_io_acct().
|
||||
*/
|
||||
unsigned long bio_start_io_acct(struct bio *bio)
|
||||
{
|
||||
*part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector);
|
||||
|
||||
return __part_start_io_acct(*part, bio_sectors(bio), bio_op(bio));
|
||||
return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(part_start_io_acct);
|
||||
EXPORT_SYMBOL_GPL(bio_start_io_acct);
|
||||
|
||||
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
|
||||
unsigned int op)
|
||||
@ -1366,12 +1359,12 @@ static void __part_end_io_acct(struct block_device *part, unsigned int op,
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
void part_end_io_acct(struct block_device *part, struct bio *bio,
|
||||
unsigned long start_time)
|
||||
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
|
||||
struct block_device *orig_bdev)
|
||||
{
|
||||
__part_end_io_acct(part, bio_op(bio), start_time);
|
||||
__part_end_io_acct(orig_bdev, bio_op(bio), start_time);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(part_end_io_acct);
|
||||
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
|
||||
|
||||
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
|
||||
unsigned long start_time)
|
||||
|
@ -164,10 +164,12 @@ static struct bio *blk_crypto_clone_bio(struct bio *bio_src)
|
||||
struct bio_vec bv;
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src), NULL);
|
||||
bio = bio_kmalloc(GFP_NOIO, bio_segments(bio_src));
|
||||
if (!bio)
|
||||
return NULL;
|
||||
bio->bi_disk = bio_src->bi_disk;
|
||||
bio->bi_bdev = bio_src->bi_bdev;
|
||||
if (bio_flagged(bio_src, BIO_REMAPPED))
|
||||
bio_set_flag(bio, BIO_REMAPPED);
|
||||
bio->bi_opf = bio_src->bi_opf;
|
||||
bio->bi_ioprio = bio_src->bi_ioprio;
|
||||
bio->bi_write_hint = bio_src->bi_write_hint;
|
||||
|
@ -280,7 +280,7 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr)
|
||||
* Success if device supports the encryption context, or if we succeeded
|
||||
* in falling back to the crypto API.
|
||||
*/
|
||||
if (blk_ksm_crypto_cfg_supported(bio->bi_disk->queue->ksm,
|
||||
if (blk_ksm_crypto_cfg_supported(bio->bi_bdev->bd_disk->queue->ksm,
|
||||
&bc_key->crypto_cfg))
|
||||
return true;
|
||||
|
||||
|
@ -31,8 +31,7 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_execute_rq_nowait - insert a request into queue for execution
|
||||
* @q: queue to insert the request in
|
||||
* blk_execute_rq_nowait - insert a request to I/O scheduler for execution
|
||||
* @bd_disk: matching gendisk
|
||||
* @rq: request to insert
|
||||
* @at_head: insert request at head or tail of queue
|
||||
@ -45,9 +44,8 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
|
||||
* Note:
|
||||
* This function will invoke @done directly if the queue is dead.
|
||||
*/
|
||||
void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
|
||||
struct request *rq, int at_head,
|
||||
rq_end_io_fn *done)
|
||||
void blk_execute_rq_nowait(struct gendisk *bd_disk, struct request *rq,
|
||||
int at_head, rq_end_io_fn *done)
|
||||
{
|
||||
WARN_ON(irqs_disabled());
|
||||
WARN_ON(!blk_rq_is_passthrough(rq));
|
||||
@ -67,7 +65,6 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
|
||||
|
||||
/**
|
||||
* blk_execute_rq - insert a request into queue for execution
|
||||
* @q: queue to insert the request in
|
||||
* @bd_disk: matching gendisk
|
||||
* @rq: request to insert
|
||||
* @at_head: insert request at head or tail of queue
|
||||
@ -76,14 +73,13 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
|
||||
* Insert a fully prepared request at the back of the I/O scheduler queue
|
||||
* for execution and wait for completion.
|
||||
*/
|
||||
void blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
|
||||
struct request *rq, int at_head)
|
||||
void blk_execute_rq(struct gendisk *bd_disk, struct request *rq, int at_head)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK(wait);
|
||||
unsigned long hang_check;
|
||||
|
||||
rq->end_io_data = &wait;
|
||||
blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
|
||||
blk_execute_rq_nowait(bd_disk, rq, at_head, blk_end_sync_rq);
|
||||
|
||||
/* Prevent hang_check timer from firing at us during very long I/O */
|
||||
hang_check = sysctl_hung_task_timeout_secs;
|
||||
|
@ -432,23 +432,18 @@ void blk_insert_flush(struct request *rq)
|
||||
/**
|
||||
* blkdev_issue_flush - queue a flush
|
||||
* @bdev: blockdev to issue flush for
|
||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||
*
|
||||
* Description:
|
||||
* Issue a flush for the block device in question.
|
||||
*/
|
||||
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
|
||||
int blkdev_issue_flush(struct block_device *bdev)
|
||||
{
|
||||
struct bio *bio;
|
||||
int ret = 0;
|
||||
struct bio bio;
|
||||
|
||||
bio = bio_alloc(gfp_mask, 0);
|
||||
bio_set_dev(bio, bdev);
|
||||
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
|
||||
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
return ret;
|
||||
bio_init(&bio, NULL, 0);
|
||||
bio_set_dev(&bio, bdev);
|
||||
bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
|
||||
return submit_bio_wait(&bio);
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_issue_flush);
|
||||
|
||||
|
@ -298,14 +298,13 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
|
||||
* Split a bio into two bios, chain the two bios, submit the second half and
|
||||
* store a pointer to the first half in *@bio. If the second bio is still too
|
||||
* big it will be split by a recursive call to this function. Since this
|
||||
* function may allocate a new bio from @bio->bi_disk->queue->bio_split, it is
|
||||
* the responsibility of the caller to ensure that
|
||||
* @bio->bi_disk->queue->bio_split is only released after processing of the
|
||||
* split bio has finished.
|
||||
* function may allocate a new bio from q->bio_split, it is the responsibility
|
||||
* of the caller to ensure that q->bio_split is only released after processing
|
||||
* of the split bio has finished.
|
||||
*/
|
||||
void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
|
||||
{
|
||||
struct request_queue *q = (*bio)->bi_disk->queue;
|
||||
struct request_queue *q = (*bio)->bi_bdev->bd_disk->queue;
|
||||
struct bio *split = NULL;
|
||||
|
||||
switch (bio_op(*bio)) {
|
||||
@ -358,9 +357,9 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
|
||||
*
|
||||
* Split a bio into two bios, chains the two bios, submit the second half and
|
||||
* store a pointer to the first half in *@bio. Since this function may allocate
|
||||
* a new bio from @bio->bi_disk->queue->bio_split, it is the responsibility of
|
||||
* the caller to ensure that @bio->bi_disk->queue->bio_split is only released
|
||||
* after processing of the split bio has finished.
|
||||
* a new bio from q->bio_split, it is the responsibility of the caller to ensure
|
||||
* that q->bio_split is only released after processing of the split bio has
|
||||
* finished.
|
||||
*/
|
||||
void blk_queue_split(struct bio **bio)
|
||||
{
|
||||
@ -866,7 +865,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
|
||||
return false;
|
||||
|
||||
/* must be same device */
|
||||
if (rq->rq_disk != bio->bi_disk)
|
||||
if (rq->rq_disk != bio->bi_bdev->bd_disk)
|
||||
return false;
|
||||
|
||||
/* only merge integrity protected bio into ditto rq */
|
||||
|
@ -1646,6 +1646,42 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_run_hw_queue);
|
||||
|
||||
/*
|
||||
* Is the request queue handled by an IO scheduler that does not respect
|
||||
* hardware queues when dispatching?
|
||||
*/
|
||||
static bool blk_mq_has_sqsched(struct request_queue *q)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (e && e->type->ops.dispatch_request &&
|
||||
!(e->type->elevator_features & ELEVATOR_F_MQ_AWARE))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return prefered queue to dispatch from (if any) for non-mq aware IO
|
||||
* scheduler.
|
||||
*/
|
||||
static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
/*
|
||||
* If the IO scheduler does not respect hardware queues when
|
||||
* dispatching, we just don't bother with multiple HW queues and
|
||||
* dispatch from hctx for the current CPU since running multiple queues
|
||||
* just causes lock contention inside the scheduler and pointless cache
|
||||
* bouncing.
|
||||
*/
|
||||
hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT,
|
||||
raw_smp_processor_id());
|
||||
if (!blk_mq_hctx_stopped(hctx))
|
||||
return hctx;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_run_hw_queues - Run all hardware queues in a request queue.
|
||||
* @q: Pointer to the request queue to run.
|
||||
@ -1653,14 +1689,23 @@ EXPORT_SYMBOL(blk_mq_run_hw_queue);
|
||||
*/
|
||||
void blk_mq_run_hw_queues(struct request_queue *q, bool async)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct blk_mq_hw_ctx *hctx, *sq_hctx;
|
||||
int i;
|
||||
|
||||
sq_hctx = NULL;
|
||||
if (blk_mq_has_sqsched(q))
|
||||
sq_hctx = blk_mq_get_sq_hctx(q);
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (blk_mq_hctx_stopped(hctx))
|
||||
continue;
|
||||
|
||||
blk_mq_run_hw_queue(hctx, async);
|
||||
/*
|
||||
* Dispatch from this hctx either if there's no hctx preferred
|
||||
* by IO scheduler or if it has requests that bypass the
|
||||
* scheduler.
|
||||
*/
|
||||
if (!sq_hctx || sq_hctx == hctx ||
|
||||
!list_empty_careful(&hctx->dispatch))
|
||||
blk_mq_run_hw_queue(hctx, async);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_run_hw_queues);
|
||||
@ -1672,14 +1717,23 @@ EXPORT_SYMBOL(blk_mq_run_hw_queues);
|
||||
*/
|
||||
void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct blk_mq_hw_ctx *hctx, *sq_hctx;
|
||||
int i;
|
||||
|
||||
sq_hctx = NULL;
|
||||
if (blk_mq_has_sqsched(q))
|
||||
sq_hctx = blk_mq_get_sq_hctx(q);
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (blk_mq_hctx_stopped(hctx))
|
||||
continue;
|
||||
|
||||
blk_mq_delay_run_hw_queue(hctx, msecs);
|
||||
/*
|
||||
* Dispatch from this hctx either if there's no hctx preferred
|
||||
* by IO scheduler or if it has requests that bypass the
|
||||
* scheduler.
|
||||
*/
|
||||
if (!sq_hctx || sq_hctx == hctx ||
|
||||
!list_empty_careful(&hctx->dispatch))
|
||||
blk_mq_delay_run_hw_queue(hctx, msecs);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
|
||||
@ -2128,7 +2182,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
||||
*/
|
||||
blk_qc_t blk_mq_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
const int is_sync = op_is_sync(bio->bi_opf);
|
||||
const int is_flush_fua = op_is_flush(bio->bi_opf);
|
||||
struct blk_mq_alloc_data data = {
|
||||
@ -2653,7 +2707,6 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
|
||||
goto free_hctx;
|
||||
|
||||
atomic_set(&hctx->nr_active, 0);
|
||||
atomic_set(&hctx->elevator_queued, 0);
|
||||
if (node == NUMA_NO_NODE)
|
||||
node = set->numa_node;
|
||||
hctx->numa_node = node;
|
||||
|
@ -60,6 +60,7 @@ void blk_set_default_limits(struct queue_limits *lim)
|
||||
lim->io_opt = 0;
|
||||
lim->misaligned = 0;
|
||||
lim->zoned = BLK_ZONED_NONE;
|
||||
lim->zone_write_granularity = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_default_limits);
|
||||
|
||||
@ -366,6 +367,28 @@ void blk_queue_physical_block_size(struct request_queue *q, unsigned int size)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_physical_block_size);
|
||||
|
||||
/**
|
||||
* blk_queue_zone_write_granularity - set zone write granularity for the queue
|
||||
* @q: the request queue for the zoned device
|
||||
* @size: the zone write granularity size, in bytes
|
||||
*
|
||||
* Description:
|
||||
* This should be set to the lowest possible size allowing to write in
|
||||
* sequential zones of a zoned block device.
|
||||
*/
|
||||
void blk_queue_zone_write_granularity(struct request_queue *q,
|
||||
unsigned int size)
|
||||
{
|
||||
if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
|
||||
return;
|
||||
|
||||
q->limits.zone_write_granularity = size;
|
||||
|
||||
if (q->limits.zone_write_granularity < q->limits.logical_block_size)
|
||||
q->limits.zone_write_granularity = q->limits.logical_block_size;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_zone_write_granularity);
|
||||
|
||||
/**
|
||||
* blk_queue_alignment_offset - set physical block alignment offset
|
||||
* @q: the request queue for the device
|
||||
@ -631,6 +654,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
t->discard_granularity;
|
||||
}
|
||||
|
||||
t->zone_write_granularity = max(t->zone_write_granularity,
|
||||
b->zone_write_granularity);
|
||||
t->zoned = max(t->zoned, b->zoned);
|
||||
return ret;
|
||||
}
|
||||
@ -847,6 +872,8 @@ EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
|
||||
*/
|
||||
void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
switch (model) {
|
||||
case BLK_ZONED_HM:
|
||||
/*
|
||||
@ -865,7 +892,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
|
||||
* we do nothing special as far as the block layer is concerned.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) ||
|
||||
disk_has_partitions(disk))
|
||||
!xa_empty(&disk->part_tbl))
|
||||
model = BLK_ZONED_NONE;
|
||||
break;
|
||||
case BLK_ZONED_NONE:
|
||||
@ -875,7 +902,17 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
|
||||
break;
|
||||
}
|
||||
|
||||
disk->queue->limits.zoned = model;
|
||||
q->limits.zoned = model;
|
||||
if (model != BLK_ZONED_NONE) {
|
||||
/*
|
||||
* Set the zone write granularity to the device logical block
|
||||
* size by default. The driver can change this value if needed.
|
||||
*/
|
||||
blk_queue_zone_write_granularity(q,
|
||||
queue_logical_block_size(q));
|
||||
} else {
|
||||
blk_queue_clear_zone_settings(q);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
|
||||
|
||||
|
@ -219,6 +219,12 @@ static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
|
||||
(unsigned long long)q->limits.max_write_zeroes_sectors << 9);
|
||||
}
|
||||
|
||||
static ssize_t queue_zone_write_granularity_show(struct request_queue *q,
|
||||
char *page)
|
||||
{
|
||||
return queue_var_show(queue_zone_write_granularity(q), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
|
||||
{
|
||||
unsigned long long max_sectors = q->limits.max_zone_append_sectors;
|
||||
@ -585,6 +591,7 @@ QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
|
||||
QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_write_zeroes_max, "write_zeroes_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_zoned, "zoned");
|
||||
QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
|
||||
@ -639,6 +646,7 @@ static struct attribute *queue_attrs[] = {
|
||||
&queue_write_same_max_entry.attr,
|
||||
&queue_write_zeroes_max_entry.attr,
|
||||
&queue_zone_append_max_entry.attr,
|
||||
&queue_zone_write_granularity_entry.attr,
|
||||
&queue_nonrot_entry.attr,
|
||||
&queue_zoned_entry.attr,
|
||||
&queue_nr_zones_entry.attr,
|
||||
|
@ -2178,7 +2178,7 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
|
||||
|
||||
bool blk_throtl_bio(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
struct blkcg_gq *blkg = bio->bi_blkg;
|
||||
struct throtl_qnode *qn = NULL;
|
||||
struct throtl_grp *tg = blkg_to_tg(blkg);
|
||||
|
@ -518,7 +518,7 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
|
||||
rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
|
||||
}
|
||||
|
||||
static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
|
||||
static inline bool wbt_should_throttle(struct bio *bio)
|
||||
{
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_WRITE:
|
||||
@ -545,7 +545,7 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
|
||||
|
||||
if (bio_op(bio) == REQ_OP_READ) {
|
||||
flags = WBT_READ;
|
||||
} else if (wbt_should_throttle(rwb, bio)) {
|
||||
} else if (wbt_should_throttle(bio)) {
|
||||
if (current_is_kswapd())
|
||||
flags |= WBT_KSWAPD;
|
||||
if (bio_op(bio) == REQ_OP_DISCARD)
|
||||
|
@ -549,3 +549,20 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
|
||||
|
||||
void blk_queue_clear_zone_settings(struct request_queue *q)
|
||||
{
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
blk_queue_free_zone_bitmaps(q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
q->required_elevator_features &= ~ELEVATOR_F_ZBD_SEQ_WRITE;
|
||||
q->nr_zones = 0;
|
||||
q->max_open_zones = 0;
|
||||
q->max_active_zones = 0;
|
||||
q->limits.chunk_sectors = 0;
|
||||
q->limits.zone_write_granularity = 0;
|
||||
q->limits.max_zone_append_sectors = 0;
|
||||
|
||||
blk_mq_unfreeze_queue(q);
|
||||
}
|
||||
|
12
block/blk.h
12
block/blk.h
@ -55,6 +55,11 @@ void blk_free_flush_queue(struct blk_flush_queue *q);
|
||||
|
||||
void blk_freeze_queue(struct request_queue *q);
|
||||
|
||||
#define BIO_INLINE_VECS 4
|
||||
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
|
||||
gfp_t gfp_mask);
|
||||
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);
|
||||
|
||||
static inline bool biovec_phys_mergeable(struct request_queue *q,
|
||||
struct bio_vec *vec1, struct bio_vec *vec2)
|
||||
{
|
||||
@ -202,8 +207,6 @@ static inline void elevator_exit(struct request_queue *q,
|
||||
__elevator_exit(q, e);
|
||||
}
|
||||
|
||||
struct block_device *__disk_get_part(struct gendisk *disk, int partno);
|
||||
|
||||
ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf);
|
||||
ssize_t part_stat_show(struct device *dev, struct device_attribute *attr,
|
||||
@ -331,12 +334,12 @@ struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
void blk_queue_free_zone_bitmaps(struct request_queue *q);
|
||||
void blk_queue_clear_zone_settings(struct request_queue *q);
|
||||
#else
|
||||
static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
|
||||
static inline void blk_queue_clear_zone_settings(struct request_queue *q) {}
|
||||
#endif
|
||||
|
||||
struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);
|
||||
|
||||
int blk_alloc_devt(struct block_device *part, dev_t *devt);
|
||||
void blk_free_devt(dev_t devt);
|
||||
char *disk_name(struct gendisk *hd, int partno, char *buf);
|
||||
@ -349,7 +352,6 @@ int bdev_add_partition(struct block_device *bdev, int partno,
|
||||
int bdev_del_partition(struct block_device *bdev, int partno);
|
||||
int bdev_resize_partition(struct block_device *bdev, int partno,
|
||||
sector_t start, sector_t length);
|
||||
int disk_expand_part_tbl(struct gendisk *disk, int target);
|
||||
|
||||
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned int len, unsigned int offset,
|
||||
|
@ -246,7 +246,9 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
|
||||
bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
|
||||
if (!bio)
|
||||
return NULL;
|
||||
bio->bi_disk = bio_src->bi_disk;
|
||||
bio->bi_bdev = bio_src->bi_bdev;
|
||||
if (bio_flagged(bio_src, BIO_REMAPPED))
|
||||
bio_set_flag(bio, BIO_REMAPPED);
|
||||
bio->bi_opf = bio_src->bi_opf;
|
||||
bio->bi_ioprio = bio_src->bi_ioprio;
|
||||
bio->bi_write_hint = bio_src->bi_write_hint;
|
||||
|
@ -157,8 +157,10 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg)
|
||||
return PTR_ERR(rq);
|
||||
|
||||
ret = q->bsg_dev.ops->fill_hdr(rq, &hdr, mode);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
blk_put_request(rq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
rq->timeout = msecs_to_jiffies(hdr.timeout);
|
||||
if (!rq->timeout)
|
||||
@ -181,7 +183,7 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg)
|
||||
|
||||
bio = rq->bio;
|
||||
|
||||
blk_execute_rq(q, NULL, rq, !(hdr.flags & BSG_FLAG_Q_AT_TAIL));
|
||||
blk_execute_rq(NULL, rq, !(hdr.flags & BSG_FLAG_Q_AT_TAIL));
|
||||
ret = rq->q->bsg_dev.ops->complete_rq(rq, &hdr);
|
||||
blk_rq_unmap_user(bio);
|
||||
|
||||
|
306
block/genhd.c
306
block/genhd.c
@ -162,15 +162,6 @@ static void part_in_flight_rw(struct block_device *part,
|
||||
inflight[1] = 0;
|
||||
}
|
||||
|
||||
struct block_device *__disk_get_part(struct gendisk *disk, int partno)
|
||||
{
|
||||
struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl);
|
||||
|
||||
if (unlikely(partno < 0 || partno >= ptbl->len))
|
||||
return NULL;
|
||||
return rcu_dereference(ptbl->part[partno]);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_part_iter_init - initialize partition iterator
|
||||
* @piter: iterator to initialize
|
||||
@ -185,26 +176,14 @@ struct block_device *__disk_get_part(struct gendisk *disk, int partno)
|
||||
void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct disk_part_tbl *ptbl;
|
||||
|
||||
rcu_read_lock();
|
||||
ptbl = rcu_dereference(disk->part_tbl);
|
||||
|
||||
piter->disk = disk;
|
||||
piter->part = NULL;
|
||||
|
||||
if (flags & DISK_PITER_REVERSE)
|
||||
piter->idx = ptbl->len - 1;
|
||||
else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
|
||||
if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
|
||||
piter->idx = 0;
|
||||
else
|
||||
piter->idx = 1;
|
||||
|
||||
piter->flags = flags;
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_part_iter_init);
|
||||
|
||||
/**
|
||||
* disk_part_iter_next - proceed iterator to the next partition and return it
|
||||
@ -217,57 +196,30 @@ EXPORT_SYMBOL_GPL(disk_part_iter_init);
|
||||
*/
|
||||
struct block_device *disk_part_iter_next(struct disk_part_iter *piter)
|
||||
{
|
||||
struct disk_part_tbl *ptbl;
|
||||
int inc, end;
|
||||
struct block_device *part;
|
||||
unsigned long idx;
|
||||
|
||||
/* put the last partition */
|
||||
disk_part_iter_exit(piter);
|
||||
|
||||
/* get part_tbl */
|
||||
rcu_read_lock();
|
||||
ptbl = rcu_dereference(piter->disk->part_tbl);
|
||||
|
||||
/* determine iteration parameters */
|
||||
if (piter->flags & DISK_PITER_REVERSE) {
|
||||
inc = -1;
|
||||
if (piter->flags & (DISK_PITER_INCL_PART0 |
|
||||
DISK_PITER_INCL_EMPTY_PART0))
|
||||
end = -1;
|
||||
else
|
||||
end = 0;
|
||||
} else {
|
||||
inc = 1;
|
||||
end = ptbl->len;
|
||||
}
|
||||
|
||||
/* iterate to the next partition */
|
||||
for (; piter->idx != end; piter->idx += inc) {
|
||||
struct block_device *part;
|
||||
|
||||
part = rcu_dereference(ptbl->part[piter->idx]);
|
||||
if (!part)
|
||||
continue;
|
||||
piter->part = bdgrab(part);
|
||||
if (!piter->part)
|
||||
continue;
|
||||
xa_for_each_start(&piter->disk->part_tbl, idx, part, piter->idx) {
|
||||
if (!bdev_nr_sectors(part) &&
|
||||
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
|
||||
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
|
||||
piter->idx == 0)) {
|
||||
bdput(piter->part);
|
||||
piter->part = NULL;
|
||||
piter->idx == 0))
|
||||
continue;
|
||||
}
|
||||
|
||||
piter->idx += inc;
|
||||
piter->part = bdgrab(part);
|
||||
if (!piter->part)
|
||||
continue;
|
||||
piter->idx = idx + 1;
|
||||
break;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return piter->part;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_part_iter_next);
|
||||
|
||||
/**
|
||||
* disk_part_iter_exit - finish up partition iteration
|
||||
@ -284,91 +236,6 @@ void disk_part_iter_exit(struct disk_part_iter *piter)
|
||||
bdput(piter->part);
|
||||
piter->part = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_part_iter_exit);
|
||||
|
||||
static inline int sector_in_part(struct block_device *part, sector_t sector)
|
||||
{
|
||||
return part->bd_start_sect <= sector &&
|
||||
sector < part->bd_start_sect + bdev_nr_sectors(part);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_map_sector_rcu - map sector to partition
|
||||
* @disk: gendisk of interest
|
||||
* @sector: sector to map
|
||||
*
|
||||
* Find out which partition @sector maps to on @disk. This is
|
||||
* primarily used for stats accounting.
|
||||
*
|
||||
* CONTEXT:
|
||||
* RCU read locked.
|
||||
*
|
||||
* RETURNS:
|
||||
* Found partition on success, part0 is returned if no partition matches
|
||||
* or the matched partition is being deleted.
|
||||
*/
|
||||
struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
|
||||
{
|
||||
struct disk_part_tbl *ptbl;
|
||||
struct block_device *part;
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
ptbl = rcu_dereference(disk->part_tbl);
|
||||
|
||||
part = rcu_dereference(ptbl->last_lookup);
|
||||
if (part && sector_in_part(part, sector))
|
||||
goto out_unlock;
|
||||
|
||||
for (i = 1; i < ptbl->len; i++) {
|
||||
part = rcu_dereference(ptbl->part[i]);
|
||||
if (part && sector_in_part(part, sector)) {
|
||||
rcu_assign_pointer(ptbl->last_lookup, part);
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
part = disk->part0;
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
return part;
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_has_partitions
|
||||
* @disk: gendisk of interest
|
||||
*
|
||||
* Walk through the partition table and check if valid partition exists.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Don't care.
|
||||
*
|
||||
* RETURNS:
|
||||
* True if the gendisk has at least one valid non-zero size partition.
|
||||
* Otherwise false.
|
||||
*/
|
||||
bool disk_has_partitions(struct gendisk *disk)
|
||||
{
|
||||
struct disk_part_tbl *ptbl;
|
||||
int i;
|
||||
bool ret = false;
|
||||
|
||||
rcu_read_lock();
|
||||
ptbl = rcu_dereference(disk->part_tbl);
|
||||
|
||||
/* Iterate partitions skipping the whole device at index 0 */
|
||||
for (i = 1; i < ptbl->len; i++) {
|
||||
if (rcu_dereference(ptbl->part[i])) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_has_partitions);
|
||||
|
||||
/*
|
||||
* Can be deleted altogether. Later.
|
||||
@ -604,6 +471,18 @@ static char *bdevt_str(dev_t devt, char *buf)
|
||||
return buf;
|
||||
}
|
||||
|
||||
void disk_uevent(struct gendisk *disk, enum kobject_action action)
|
||||
{
|
||||
struct disk_part_iter piter;
|
||||
struct block_device *part;
|
||||
|
||||
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
|
||||
while ((part = disk_part_iter_next(&piter)))
|
||||
kobject_uevent(bdev_kobj(part), action);
|
||||
disk_part_iter_exit(&piter);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_uevent);
|
||||
|
||||
static void disk_scan_partitions(struct gendisk *disk)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
@ -621,8 +500,6 @@ static void register_disk(struct device *parent, struct gendisk *disk,
|
||||
const struct attribute_group **groups)
|
||||
{
|
||||
struct device *ddev = disk_to_dev(disk);
|
||||
struct disk_part_iter piter;
|
||||
struct block_device *part;
|
||||
int err;
|
||||
|
||||
ddev->parent = parent;
|
||||
@ -665,15 +542,9 @@ static void register_disk(struct device *parent, struct gendisk *disk,
|
||||
|
||||
disk_scan_partitions(disk);
|
||||
|
||||
/* announce disk after possible partitions are created */
|
||||
/* announce the disk and partitions after all partitions are created */
|
||||
dev_set_uevent_suppress(ddev, 0);
|
||||
kobject_uevent(&ddev->kobj, KOBJ_ADD);
|
||||
|
||||
/* announce possible partitions */
|
||||
disk_part_iter_init(&piter, disk, 0);
|
||||
while ((part = disk_part_iter_next(&piter)))
|
||||
kobject_uevent(bdev_kobj(part), KOBJ_ADD);
|
||||
disk_part_iter_exit(&piter);
|
||||
disk_uevent(disk, KOBJ_ADD);
|
||||
|
||||
if (disk->queue->backing_dev_info->dev) {
|
||||
err = sysfs_create_link(&ddev->kobj,
|
||||
@ -829,8 +700,7 @@ void del_gendisk(struct gendisk *disk)
|
||||
down_write(&bdev_lookup_sem);
|
||||
|
||||
/* invalidate stuff */
|
||||
disk_part_iter_init(&piter, disk,
|
||||
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
|
||||
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
|
||||
while ((part = disk_part_iter_next(&piter))) {
|
||||
invalidate_partition(part);
|
||||
delete_partition(part);
|
||||
@ -929,7 +799,7 @@ struct block_device *bdget_disk(struct gendisk *disk, int partno)
|
||||
struct block_device *bdev = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
bdev = __disk_get_part(disk, partno);
|
||||
bdev = xa_load(&disk->part_tbl, partno);
|
||||
if (bdev && !bdgrab(bdev))
|
||||
bdev = NULL;
|
||||
rcu_read_unlock();
|
||||
@ -1319,83 +1189,6 @@ static const struct attribute_group *disk_attr_groups[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
/**
|
||||
* disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
|
||||
* @disk: disk to replace part_tbl for
|
||||
* @new_ptbl: new part_tbl to install
|
||||
*
|
||||
* Replace disk->part_tbl with @new_ptbl in RCU-safe way. The
|
||||
* original ptbl is freed using RCU callback.
|
||||
*
|
||||
* LOCKING:
|
||||
* Matching bd_mutex locked or the caller is the only user of @disk.
|
||||
*/
|
||||
static void disk_replace_part_tbl(struct gendisk *disk,
|
||||
struct disk_part_tbl *new_ptbl)
|
||||
{
|
||||
struct disk_part_tbl *old_ptbl =
|
||||
rcu_dereference_protected(disk->part_tbl, 1);
|
||||
|
||||
rcu_assign_pointer(disk->part_tbl, new_ptbl);
|
||||
|
||||
if (old_ptbl) {
|
||||
rcu_assign_pointer(old_ptbl->last_lookup, NULL);
|
||||
kfree_rcu(old_ptbl, rcu_head);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_expand_part_tbl - expand disk->part_tbl
|
||||
* @disk: disk to expand part_tbl for
|
||||
* @partno: expand such that this partno can fit in
|
||||
*
|
||||
* Expand disk->part_tbl such that @partno can fit in. disk->part_tbl
|
||||
* uses RCU to allow unlocked dereferencing for stats and other stuff.
|
||||
*
|
||||
* LOCKING:
|
||||
* Matching bd_mutex locked or the caller is the only user of @disk.
|
||||
* Might sleep.
|
||||
*
|
||||
* RETURNS:
|
||||
* 0 on success, -errno on failure.
|
||||
*/
|
||||
int disk_expand_part_tbl(struct gendisk *disk, int partno)
|
||||
{
|
||||
struct disk_part_tbl *old_ptbl =
|
||||
rcu_dereference_protected(disk->part_tbl, 1);
|
||||
struct disk_part_tbl *new_ptbl;
|
||||
int len = old_ptbl ? old_ptbl->len : 0;
|
||||
int i, target;
|
||||
|
||||
/*
|
||||
* check for int overflow, since we can get here from blkpg_ioctl()
|
||||
* with a user passed 'partno'.
|
||||
*/
|
||||
target = partno + 1;
|
||||
if (target < 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* disk_max_parts() is zero during initialization, ignore if so */
|
||||
if (disk_max_parts(disk) && target > disk_max_parts(disk))
|
||||
return -EINVAL;
|
||||
|
||||
if (target <= len)
|
||||
return 0;
|
||||
|
||||
new_ptbl = kzalloc_node(struct_size(new_ptbl, part, target), GFP_KERNEL,
|
||||
disk->node_id);
|
||||
if (!new_ptbl)
|
||||
return -ENOMEM;
|
||||
|
||||
new_ptbl->len = target;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
|
||||
|
||||
disk_replace_part_tbl(disk, new_ptbl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_release - releases all allocated resources of the gendisk
|
||||
* @dev: the device representing this disk
|
||||
@ -1419,7 +1212,7 @@ static void disk_release(struct device *dev)
|
||||
blk_free_devt(dev->devt);
|
||||
disk_release_events(disk);
|
||||
kfree(disk->random);
|
||||
disk_replace_part_tbl(disk, NULL);
|
||||
xa_destroy(&disk->part_tbl);
|
||||
bdput(disk->part0);
|
||||
if (disk->queue)
|
||||
blk_put_queue(disk->queue);
|
||||
@ -1572,7 +1365,6 @@ dev_t blk_lookup_devt(const char *name, int partno)
|
||||
struct gendisk *__alloc_disk_node(int minors, int node_id)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
struct disk_part_tbl *ptbl;
|
||||
|
||||
if (minors > DISK_MAX_PARTS) {
|
||||
printk(KERN_ERR
|
||||
@ -1590,11 +1382,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
|
||||
goto out_free_disk;
|
||||
|
||||
disk->node_id = node_id;
|
||||
if (disk_expand_part_tbl(disk, 0))
|
||||
goto out_bdput;
|
||||
|
||||
ptbl = rcu_dereference_protected(disk->part_tbl, 1);
|
||||
rcu_assign_pointer(ptbl->part[0], disk->part0);
|
||||
xa_init(&disk->part_tbl);
|
||||
if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
|
||||
goto out_destroy_part_tbl;
|
||||
|
||||
disk->minors = minors;
|
||||
rand_initialize_disk(disk);
|
||||
@ -1603,7 +1393,8 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
|
||||
device_initialize(disk_to_dev(disk));
|
||||
return disk;
|
||||
|
||||
out_bdput:
|
||||
out_destroy_part_tbl:
|
||||
xa_destroy(&disk->part_tbl);
|
||||
bdput(disk->part0);
|
||||
out_free_disk:
|
||||
kfree(disk);
|
||||
@ -1638,31 +1429,32 @@ static void set_disk_ro_uevent(struct gendisk *gd, int ro)
|
||||
kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
|
||||
}
|
||||
|
||||
void set_disk_ro(struct gendisk *disk, int flag)
|
||||
/**
|
||||
* set_disk_ro - set a gendisk read-only
|
||||
* @disk: gendisk to operate on
|
||||
* @read_only: %true to set the disk read-only, %false set the disk read/write
|
||||
*
|
||||
* This function is used to indicate whether a given disk device should have its
|
||||
* read-only flag set. set_disk_ro() is typically used by device drivers to
|
||||
* indicate whether the underlying physical device is write-protected.
|
||||
*/
|
||||
void set_disk_ro(struct gendisk *disk, bool read_only)
|
||||
{
|
||||
struct disk_part_iter piter;
|
||||
struct block_device *part;
|
||||
|
||||
if (disk->part0->bd_read_only != flag) {
|
||||
set_disk_ro_uevent(disk, flag);
|
||||
disk->part0->bd_read_only = flag;
|
||||
if (read_only) {
|
||||
if (test_and_set_bit(GD_READ_ONLY, &disk->state))
|
||||
return;
|
||||
} else {
|
||||
if (!test_and_clear_bit(GD_READ_ONLY, &disk->state))
|
||||
return;
|
||||
}
|
||||
|
||||
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
|
||||
while ((part = disk_part_iter_next(&piter)))
|
||||
part->bd_read_only = flag;
|
||||
disk_part_iter_exit(&piter);
|
||||
set_disk_ro_uevent(disk, read_only);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(set_disk_ro);
|
||||
|
||||
int bdev_read_only(struct block_device *bdev)
|
||||
{
|
||||
if (!bdev)
|
||||
return 0;
|
||||
return bdev->bd_read_only;
|
||||
return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(bdev_read_only);
|
||||
|
||||
/*
|
||||
|
@ -1029,6 +1029,7 @@ static struct elevator_type kyber_sched = {
|
||||
#endif
|
||||
.elevator_attrs = kyber_sched_attrs,
|
||||
.elevator_name = "kyber",
|
||||
.elevator_features = ELEVATOR_F_MQ_AWARE,
|
||||
.elevator_owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
|
@ -386,8 +386,6 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
||||
spin_lock(&dd->lock);
|
||||
rq = __dd_dispatch_request(dd);
|
||||
spin_unlock(&dd->lock);
|
||||
if (rq)
|
||||
atomic_dec(&rq->mq_hctx->elevator_queued);
|
||||
|
||||
return rq;
|
||||
}
|
||||
@ -535,7 +533,6 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
rq = list_first_entry(list, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
dd_insert_request(hctx, rq, at_head);
|
||||
atomic_inc(&hctx->elevator_queued);
|
||||
}
|
||||
spin_unlock(&dd->lock);
|
||||
}
|
||||
@ -582,9 +579,6 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
|
||||
|
||||
if (!atomic_read(&hctx->elevator_queued))
|
||||
return false;
|
||||
|
||||
return !list_empty_careful(&dd->dispatch) ||
|
||||
!list_empty_careful(&dd->fifo_list[0]) ||
|
||||
!list_empty_careful(&dd->fifo_list[1]);
|
||||
|
@ -197,7 +197,7 @@ static ssize_t part_start_show(struct device *dev,
|
||||
static ssize_t part_ro_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_read_only);
|
||||
return sprintf(buf, "%d\n", bdev_read_only(dev_to_bdev(dev)));
|
||||
}
|
||||
|
||||
static ssize_t part_alignment_offset_show(struct device *dev,
|
||||
@ -289,13 +289,7 @@ struct device_type part_type = {
|
||||
*/
|
||||
void delete_partition(struct block_device *part)
|
||||
{
|
||||
struct gendisk *disk = part->bd_disk;
|
||||
struct disk_part_tbl *ptbl =
|
||||
rcu_dereference_protected(disk->part_tbl, 1);
|
||||
|
||||
rcu_assign_pointer(ptbl->part[part->bd_partno], NULL);
|
||||
rcu_assign_pointer(ptbl->last_lookup, NULL);
|
||||
|
||||
xa_erase(&part->bd_disk->part_tbl, part->bd_partno);
|
||||
kobject_put(part->bd_holder_dir);
|
||||
device_del(&part->bd_device);
|
||||
|
||||
@ -327,7 +321,6 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
|
||||
struct device *ddev = disk_to_dev(disk);
|
||||
struct device *pdev;
|
||||
struct block_device *bdev;
|
||||
struct disk_part_tbl *ptbl;
|
||||
const char *dname;
|
||||
int err;
|
||||
|
||||
@ -343,18 +336,13 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
|
||||
case BLK_ZONED_HA:
|
||||
pr_info("%s: disabling host aware zoned block device support due to partitions\n",
|
||||
disk->disk_name);
|
||||
disk->queue->limits.zoned = BLK_ZONED_NONE;
|
||||
blk_queue_set_zoned(disk, BLK_ZONED_NONE);
|
||||
break;
|
||||
case BLK_ZONED_NONE:
|
||||
break;
|
||||
}
|
||||
|
||||
err = disk_expand_part_tbl(disk, partno);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
ptbl = rcu_dereference_protected(disk->part_tbl, 1);
|
||||
|
||||
if (ptbl->part[partno])
|
||||
if (xa_load(&disk->part_tbl, partno))
|
||||
return ERR_PTR(-EBUSY);
|
||||
|
||||
bdev = bdev_alloc(disk, partno);
|
||||
@ -363,7 +351,6 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
|
||||
|
||||
bdev->bd_start_sect = start;
|
||||
bdev_set_nr_sectors(bdev, len);
|
||||
bdev->bd_read_only = get_disk_ro(disk);
|
||||
|
||||
if (info) {
|
||||
err = -ENOMEM;
|
||||
@ -408,8 +395,10 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
|
||||
}
|
||||
|
||||
/* everything is up and running, commence */
|
||||
err = xa_insert(&disk->part_tbl, partno, bdev, GFP_KERNEL);
|
||||
if (err)
|
||||
goto out_del;
|
||||
bdev_add(bdev, devt);
|
||||
rcu_assign_pointer(ptbl->part[partno], bdev);
|
||||
|
||||
/* suppress uevent if the disk suppresses it */
|
||||
if (!dev_get_uevent_suppress(ddev))
|
||||
@ -615,7 +604,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
|
||||
int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
|
||||
{
|
||||
struct parsed_partitions *state;
|
||||
int ret = -EAGAIN, p, highest;
|
||||
int ret = -EAGAIN, p;
|
||||
|
||||
if (!disk_part_scan_enabled(disk))
|
||||
return 0;
|
||||
@ -663,15 +652,6 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
|
||||
/* tell userspace that the media / partition table may have changed */
|
||||
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
|
||||
|
||||
/*
|
||||
* Detect the highest partition number and preallocate disk->part_tbl.
|
||||
* This is an optimization and not strictly necessary.
|
||||
*/
|
||||
for (p = 1, highest = 0; p < state->limit; p++)
|
||||
if (state->parts[p].size)
|
||||
highest = p;
|
||||
disk_expand_part_tbl(disk, highest);
|
||||
|
||||
for (p = 1; p < state->limit; p++)
|
||||
if (!blk_add_partition(disk, bdev, state, p))
|
||||
goto out_free_state;
|
||||
|
@ -357,7 +357,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
|
||||
* (if he doesn't check that is his problem).
|
||||
* N.B. a non-zero SCSI status is _not_ necessarily an error.
|
||||
*/
|
||||
blk_execute_rq(q, bd_disk, rq, at_head);
|
||||
blk_execute_rq(bd_disk, rq, at_head);
|
||||
|
||||
hdr->duration = jiffies_to_msecs(jiffies - start_time);
|
||||
|
||||
@ -493,7 +493,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
|
||||
goto error;
|
||||
}
|
||||
|
||||
blk_execute_rq(q, disk, rq, 0);
|
||||
blk_execute_rq(disk, rq, 0);
|
||||
|
||||
err = req->result & 0xff; /* only 8 bit SCSI status */
|
||||
if (err) {
|
||||
@ -532,7 +532,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
|
||||
scsi_req(rq)->cmd[0] = cmd;
|
||||
scsi_req(rq)->cmd[4] = data;
|
||||
scsi_req(rq)->cmd_len = 6;
|
||||
blk_execute_rq(q, bd_disk, rq, 0);
|
||||
blk_execute_rq(bd_disk, rq, 0);
|
||||
err = scsi_req(rq)->result ? -EIO : 0;
|
||||
blk_put_request(rq);
|
||||
|
||||
|
@ -284,15 +284,11 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
|
||||
|
||||
static blk_qc_t brd_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct brd_device *brd = bio->bi_disk->private_data;
|
||||
struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
struct bio_vec bvec;
|
||||
sector_t sector;
|
||||
struct bvec_iter iter;
|
||||
|
||||
sector = bio->bi_iter.bi_sector;
|
||||
if (bio_end_sector(bio) > get_capacity(bio->bi_disk))
|
||||
goto io_error;
|
||||
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
unsigned int len = bvec.bv_len;
|
||||
int err;
|
||||
|
@ -138,7 +138,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
|
||||
op_flags |= REQ_FUA | REQ_PREFLUSH;
|
||||
op_flags |= REQ_SYNC;
|
||||
|
||||
bio = bio_alloc_drbd(GFP_NOIO);
|
||||
bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set);
|
||||
bio_set_dev(bio, bdev->md_bdev);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
err = -EIO;
|
||||
|
@ -976,7 +976,7 @@ static void drbd_bm_endio(struct bio *bio)
|
||||
|
||||
static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
|
||||
{
|
||||
struct bio *bio = bio_alloc_drbd(GFP_NOIO);
|
||||
struct bio *bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set);
|
||||
struct drbd_device *device = ctx->device;
|
||||
struct drbd_bitmap *b = device->bitmap;
|
||||
struct page *page;
|
||||
|
@ -1422,8 +1422,6 @@ extern mempool_t drbd_md_io_page_pool;
|
||||
/* We also need to make sure we get a bio
|
||||
* when we need it for housekeeping purposes */
|
||||
extern struct bio_set drbd_md_io_bio_set;
|
||||
/* to allocate from that set */
|
||||
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
|
||||
|
||||
/* And a bio_set for cloning */
|
||||
extern struct bio_set drbd_io_bio_set;
|
||||
@ -1579,8 +1577,8 @@ static inline void drbd_submit_bio_noacct(struct drbd_device *device,
|
||||
int fault_type, struct bio *bio)
|
||||
{
|
||||
__release(local);
|
||||
if (!bio->bi_disk) {
|
||||
drbd_err(device, "drbd_submit_bio_noacct: bio->bi_disk == NULL\n");
|
||||
if (!bio->bi_bdev) {
|
||||
drbd_err(device, "drbd_submit_bio_noacct: bio->bi_bdev == NULL\n");
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
bio_endio(bio);
|
||||
return;
|
||||
|
@ -138,19 +138,6 @@ static const struct block_device_operations drbd_ops = {
|
||||
.release = drbd_release,
|
||||
};
|
||||
|
||||
struct bio *bio_alloc_drbd(gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
if (!bioset_initialized(&drbd_md_io_bio_set))
|
||||
return bio_alloc(gfp_mask, 1);
|
||||
|
||||
bio = bio_alloc_bioset(gfp_mask, 1, &drbd_md_io_bio_set);
|
||||
if (!bio)
|
||||
return NULL;
|
||||
return bio;
|
||||
}
|
||||
|
||||
#ifdef __CHECKER__
|
||||
/* When checking with sparse, and this is an inline function, sparse will
|
||||
give tons of false positives. When this is a real functions sparse works.
|
||||
|
@ -30,7 +30,10 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
|
||||
return NULL;
|
||||
memset(req, 0, sizeof(*req));
|
||||
|
||||
drbd_req_make_private_bio(req, bio_src);
|
||||
req->private_bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
|
||||
req->private_bio->bi_private = req;
|
||||
req->private_bio->bi_end_io = drbd_request_endio;
|
||||
|
||||
req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
|
||||
| (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
|
||||
| (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
|
||||
@ -1595,7 +1598,7 @@ void do_submit(struct work_struct *ws)
|
||||
|
||||
blk_qc_t drbd_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct drbd_device *device = bio->bi_disk->private_data;
|
||||
struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
|
||||
unsigned long start_jif;
|
||||
|
||||
blk_queue_split(&bio);
|
||||
|
@ -256,18 +256,6 @@ enum drbd_req_state_bits {
|
||||
#define MR_WRITE 1
|
||||
#define MR_READ 2
|
||||
|
||||
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
|
||||
{
|
||||
struct bio *bio;
|
||||
bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
|
||||
|
||||
req->private_bio = bio;
|
||||
|
||||
bio->bi_private = req;
|
||||
bio->bi_end_io = drbd_request_endio;
|
||||
bio->bi_next = NULL;
|
||||
}
|
||||
|
||||
/* Short lived temporary struct on the stack.
|
||||
* We could squirrel the error to be returned into
|
||||
* bio->bi_iter.bi_size, or similar. But that would be too ugly. */
|
||||
|
@ -1523,8 +1523,11 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
|
||||
if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
|
||||
drbd_al_begin_io(device, &req->i);
|
||||
|
||||
drbd_req_make_private_bio(req, req->master_bio);
|
||||
req->private_bio = bio_clone_fast(req->master_bio, GFP_NOIO,
|
||||
&drbd_io_bio_set);
|
||||
bio_set_dev(req->private_bio, device->ldev->backing_bdev);
|
||||
req->private_bio->bi_private = req;
|
||||
req->private_bio->bi_end_io = drbd_request_endio;
|
||||
submit_bio_noacct(req->private_bio);
|
||||
|
||||
return 0;
|
||||
|
@ -1015,7 +1015,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
|
||||
rq->timeout = timeout;
|
||||
|
||||
/* insert request and run queue */
|
||||
blk_execute_rq(rq->q, NULL, rq, true);
|
||||
blk_execute_rq(NULL, rq, true);
|
||||
|
||||
if (int_cmd->status) {
|
||||
dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
|
||||
|
@ -1420,7 +1420,7 @@ static blk_qc_t null_submit_bio(struct bio *bio)
|
||||
{
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
sector_t nr_sectors = bio_sectors(bio);
|
||||
struct nullb *nullb = bio->bi_disk->private_data;
|
||||
struct nullb *nullb = bio->bi_bdev->bd_disk->private_data;
|
||||
struct nullb_queue *nq = nullb_to_queue(nullb);
|
||||
struct nullb_cmd *cmd;
|
||||
|
||||
|
@ -148,10 +148,6 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
|
||||
sector += dev->zone_size_sects;
|
||||
}
|
||||
|
||||
q->limits.zoned = BLK_ZONED_HM;
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -160,6 +156,10 @@ int null_register_zoned_dev(struct nullb *nullb)
|
||||
struct nullb_device *dev = nullb->dev;
|
||||
struct request_queue *q = nullb->q;
|
||||
|
||||
blk_queue_set_zoned(nullb->disk, BLK_ZONED_HM);
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
|
||||
|
||||
if (queue_is_mq(q)) {
|
||||
int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
|
||||
|
||||
|
@ -781,7 +781,7 @@ static int pd_special_command(struct pd_unit *disk,
|
||||
req = blk_mq_rq_to_pdu(rq);
|
||||
|
||||
req->func = func;
|
||||
blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
|
||||
blk_execute_rq(disk->gd, rq, 0);
|
||||
blk_put_request(rq);
|
||||
return 0;
|
||||
}
|
||||
|
@ -722,7 +722,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
|
||||
if (cgc->quiet)
|
||||
rq->rq_flags |= RQF_QUIET;
|
||||
|
||||
blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0);
|
||||
blk_execute_rq(pd->bdev->bd_disk, rq, 0);
|
||||
if (scsi_req(rq)->result)
|
||||
ret = -EIO;
|
||||
out:
|
||||
@ -2374,7 +2374,7 @@ static blk_qc_t pkt_submit_bio(struct bio *bio)
|
||||
|
||||
blk_queue_split(&bio);
|
||||
|
||||
pd = bio->bi_disk->queue->queuedata;
|
||||
pd = bio->bi_bdev->bd_disk->queue->queuedata;
|
||||
if (!pd) {
|
||||
pr_err("%s incorrect request queue\n", bio_devname(bio, b));
|
||||
goto end_io;
|
||||
@ -2418,7 +2418,7 @@ static blk_qc_t pkt_submit_bio(struct bio *bio)
|
||||
split = bio;
|
||||
}
|
||||
|
||||
pkt_make_request_write(bio->bi_disk->queue, split);
|
||||
pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split);
|
||||
} while (split != bio);
|
||||
|
||||
return BLK_QC_T_NONE;
|
||||
|
@ -581,7 +581,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
|
||||
|
||||
static blk_qc_t ps3vram_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct ps3_system_bus_device *dev = bio->bi_disk->private_data;
|
||||
struct ps3_system_bus_device *dev = bio->bi_bdev->bd_disk->private_data;
|
||||
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
|
||||
int busy;
|
||||
|
||||
|
@ -692,29 +692,10 @@ static void rbd_release(struct gendisk *disk, fmode_t mode)
|
||||
put_device(&rbd_dev->dev);
|
||||
}
|
||||
|
||||
static int rbd_set_read_only(struct block_device *bdev, bool ro)
|
||||
{
|
||||
struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
|
||||
|
||||
/*
|
||||
* Both images mapped read-only and snapshots can't be marked
|
||||
* read-write.
|
||||
*/
|
||||
if (!ro) {
|
||||
if (rbd_is_ro(rbd_dev))
|
||||
return -EROFS;
|
||||
|
||||
rbd_assert(!rbd_is_snap(rbd_dev));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct block_device_operations rbd_bd_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rbd_open,
|
||||
.release = rbd_release,
|
||||
.set_read_only = rbd_set_read_only,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -122,7 +122,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
|
||||
|
||||
static blk_qc_t rsxx_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct rsxx_cardinfo *card = bio->bi_disk->private_data;
|
||||
struct rsxx_cardinfo *card = bio->bi_bdev->bd_disk->private_data;
|
||||
struct rsxx_bio_meta *bio_meta;
|
||||
blk_status_t st = BLK_STS_IOERR;
|
||||
|
||||
|
@ -539,7 +539,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
|
||||
spin_unlock_irq(&host->lock);
|
||||
|
||||
DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
|
||||
blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL);
|
||||
blk_execute_rq_nowait(NULL, rq, true, NULL);
|
||||
|
||||
return 0;
|
||||
|
||||
@ -578,7 +578,7 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
|
||||
crq->msg_bucket = (u32) rc;
|
||||
|
||||
DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
|
||||
blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL);
|
||||
blk_execute_rq_nowait(NULL, rq, true, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -521,7 +521,7 @@ static int mm_check_plugged(struct cardinfo *card)
|
||||
|
||||
static blk_qc_t mm_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct cardinfo *card = bio->bi_disk->private_data;
|
||||
struct cardinfo *card = bio->bi_bdev->bd_disk->private_data;
|
||||
|
||||
pr_debug("mm_make_request %llu %u\n",
|
||||
(unsigned long long)bio->bi_iter.bi_sector,
|
||||
|
@ -320,7 +320,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
|
||||
blk_execute_rq(vblk->disk, req, false);
|
||||
err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
|
||||
out:
|
||||
blk_put_request(req);
|
||||
|
@ -1596,7 +1596,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
|
||||
*/
|
||||
static blk_qc_t zram_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct zram *zram = bio->bi_disk->private_data;
|
||||
struct zram *zram = bio->bi_bdev->bd_disk->private_data;
|
||||
|
||||
if (!valid_io_request(zram, bio->bi_iter.bi_sector,
|
||||
bio->bi_iter.bi_size)) {
|
||||
|
@ -2214,7 +2214,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
|
||||
rq->timeout = 60 * HZ;
|
||||
bio = rq->bio;
|
||||
|
||||
blk_execute_rq(q, cdi->disk, rq, 0);
|
||||
blk_execute_rq(cdi->disk, rq, 0);
|
||||
if (scsi_req(rq)->result) {
|
||||
struct scsi_sense_hdr sshdr;
|
||||
|
||||
|
@ -107,7 +107,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
|
||||
memcpy(scsi_req(rq)->cmd, pc->c, 12);
|
||||
if (drive->media == ide_tape)
|
||||
scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1;
|
||||
blk_execute_rq(drive->queue, disk, rq, 0);
|
||||
blk_execute_rq(disk, rq, 0);
|
||||
error = scsi_req(rq)->result ? -EIO : 0;
|
||||
put_req:
|
||||
blk_put_request(rq);
|
||||
|
@ -467,7 +467,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
|
||||
}
|
||||
}
|
||||
|
||||
blk_execute_rq(drive->queue, info->disk, rq, 0);
|
||||
blk_execute_rq(info->disk, rq, 0);
|
||||
error = scsi_req(rq)->result ? -EIO : 0;
|
||||
|
||||
if (buffer)
|
||||
|
@ -299,7 +299,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||
rq->rq_flags = RQF_QUIET;
|
||||
blk_execute_rq(drive->queue, cd->disk, rq, 0);
|
||||
blk_execute_rq(cd->disk, rq, 0);
|
||||
ret = scsi_req(rq)->result ? -EIO : 0;
|
||||
blk_put_request(rq);
|
||||
/*
|
||||
|
@ -173,7 +173,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
|
||||
*(int *)&scsi_req(rq)->cmd[1] = arg;
|
||||
ide_req(rq)->special = setting->set;
|
||||
|
||||
blk_execute_rq(q, NULL, rq, 0);
|
||||
blk_execute_rq(NULL, rq, 0);
|
||||
ret = scsi_req(rq)->result;
|
||||
blk_put_request(rq);
|
||||
|
||||
|
@ -482,7 +482,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
|
||||
|
||||
drive->mult_req = arg;
|
||||
drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
|
||||
blk_execute_rq(drive->queue, NULL, rq, 0);
|
||||
blk_execute_rq(NULL, rq, 0);
|
||||
blk_put_request(rq);
|
||||
|
||||
return (drive->mult_count == arg) ? 0 : -EIO;
|
||||
|
@ -137,7 +137,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, void __user *argp)
|
||||
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_TASKFILE;
|
||||
blk_execute_rq(drive->queue, NULL, rq, 0);
|
||||
blk_execute_rq(NULL, rq, 0);
|
||||
err = scsi_req(rq)->result ? -EIO : 0;
|
||||
blk_put_request(rq);
|
||||
|
||||
@ -235,7 +235,7 @@ static int generic_drive_reset(ide_drive_t *drive)
|
||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||
scsi_req(rq)->cmd_len = 1;
|
||||
scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
|
||||
blk_execute_rq(drive->queue, NULL, rq, 1);
|
||||
blk_execute_rq(NULL, rq, 1);
|
||||
ret = scsi_req(rq)->result;
|
||||
blk_put_request(rq);
|
||||
return ret;
|
||||
|
@ -37,7 +37,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
|
||||
scsi_req(rq)->cmd_len = 1;
|
||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||
ide_req(rq)->special = &timeout;
|
||||
blk_execute_rq(q, NULL, rq, 1);
|
||||
blk_execute_rq(NULL, rq, 1);
|
||||
rc = scsi_req(rq)->result ? -EIO : 0;
|
||||
blk_put_request(rq);
|
||||
if (rc)
|
||||
|
@ -27,7 +27,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
|
||||
mesg.event = PM_EVENT_FREEZE;
|
||||
rqpm.pm_state = mesg.event;
|
||||
|
||||
blk_execute_rq(drive->queue, NULL, rq, 0);
|
||||
blk_execute_rq(NULL, rq, 0);
|
||||
ret = scsi_req(rq)->result ? -EIO : 0;
|
||||
blk_put_request(rq);
|
||||
|
||||
@ -50,7 +50,7 @@ static int ide_pm_execute_rq(struct request *rq)
|
||||
blk_mq_end_request(rq, BLK_STS_OK);
|
||||
return -ENXIO;
|
||||
}
|
||||
blk_execute_rq(q, NULL, rq, true);
|
||||
blk_execute_rq(NULL, rq, true);
|
||||
|
||||
return scsi_req(rq)->result ? -EIO : 0;
|
||||
}
|
||||
|
@ -868,7 +868,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
blk_execute_rq(drive->queue, tape->disk, rq, 0);
|
||||
blk_execute_rq(tape->disk, rq, 0);
|
||||
|
||||
/* calculate the number of transferred bytes and update buffer state */
|
||||
size -= scsi_req(rq)->resid_len;
|
||||
|
@ -443,7 +443,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
|
||||
ide_req(rq)->special = cmd;
|
||||
cmd->rq = rq;
|
||||
|
||||
blk_execute_rq(drive->queue, NULL, rq, 0);
|
||||
blk_execute_rq(NULL, rq, 0);
|
||||
error = scsi_req(rq)->result ? -EIO : 0;
|
||||
put_req:
|
||||
blk_put_request(rq);
|
||||
|
@ -49,7 +49,7 @@ struct bio_set pblk_bio_set;
|
||||
|
||||
static blk_qc_t pblk_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct pblk *pblk = bio->bi_disk->queue->queuedata;
|
||||
struct pblk *pblk = bio->bi_bdev->bd_disk->queue->queuedata;
|
||||
|
||||
if (bio_op(bio) == REQ_OP_DISCARD) {
|
||||
pblk_discard(pblk, bio);
|
||||
|
@ -114,7 +114,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
|
||||
check = bio_kmalloc(GFP_NOIO, bio_segments(bio));
|
||||
if (!check)
|
||||
return;
|
||||
check->bi_disk = bio->bi_disk;
|
||||
bio_set_dev(check, bio->bi_bdev);
|
||||
check->bi_opf = REQ_OP_READ;
|
||||
check->bi_iter.bi_sector = bio->bi_iter.bi_sector;
|
||||
check->bi_iter.bi_size = bio->bi_iter.bi_size;
|
||||
|
@ -475,7 +475,7 @@ struct search {
|
||||
unsigned int read_dirty_data:1;
|
||||
unsigned int cache_missed:1;
|
||||
|
||||
struct block_device *part;
|
||||
struct block_device *orig_bdev;
|
||||
unsigned long start_time;
|
||||
|
||||
struct btree_op op;
|
||||
@ -670,8 +670,8 @@ static void bio_complete(struct search *s)
|
||||
{
|
||||
if (s->orig_bio) {
|
||||
/* Count on bcache device */
|
||||
part_end_io_acct(s->part, s->orig_bio, s->start_time);
|
||||
|
||||
bio_end_io_acct_remapped(s->orig_bio, s->start_time,
|
||||
s->orig_bdev);
|
||||
trace_bcache_request_end(s->d, s->orig_bio);
|
||||
s->orig_bio->bi_status = s->iop.status;
|
||||
bio_endio(s->orig_bio);
|
||||
@ -714,7 +714,8 @@ static void search_free(struct closure *cl)
|
||||
}
|
||||
|
||||
static inline struct search *search_alloc(struct bio *bio,
|
||||
struct bcache_device *d)
|
||||
struct bcache_device *d, struct block_device *orig_bdev,
|
||||
unsigned long start_time)
|
||||
{
|
||||
struct search *s;
|
||||
|
||||
@ -732,7 +733,8 @@ static inline struct search *search_alloc(struct bio *bio,
|
||||
s->write = op_is_write(bio_op(bio));
|
||||
s->read_dirty_data = 0;
|
||||
/* Count on the bcache device */
|
||||
s->start_time = part_start_io_acct(d->disk, &s->part, bio);
|
||||
s->orig_bdev = orig_bdev;
|
||||
s->start_time = start_time;
|
||||
s->iop.c = d->c;
|
||||
s->iop.bio = NULL;
|
||||
s->iop.inode = d->id;
|
||||
@ -894,7 +896,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
|
||||
!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
|
||||
s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA)
|
||||
reada = min_t(sector_t, dc->readahead >> 9,
|
||||
get_capacity(bio->bi_disk) - bio_end_sector(bio));
|
||||
get_capacity(bio->bi_bdev->bd_disk) -
|
||||
bio_end_sector(bio));
|
||||
|
||||
s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
|
||||
|
||||
@ -1073,7 +1076,7 @@ struct detached_dev_io_private {
|
||||
unsigned long start_time;
|
||||
bio_end_io_t *bi_end_io;
|
||||
void *bi_private;
|
||||
struct block_device *part;
|
||||
struct block_device *orig_bdev;
|
||||
};
|
||||
|
||||
static void detached_dev_end_io(struct bio *bio)
|
||||
@ -1085,7 +1088,7 @@ static void detached_dev_end_io(struct bio *bio)
|
||||
bio->bi_private = ddip->bi_private;
|
||||
|
||||
/* Count on the bcache device */
|
||||
part_end_io_acct(ddip->part, bio, ddip->start_time);
|
||||
bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev);
|
||||
|
||||
if (bio->bi_status) {
|
||||
struct cached_dev *dc = container_of(ddip->d,
|
||||
@ -1098,7 +1101,8 @@ static void detached_dev_end_io(struct bio *bio)
|
||||
bio->bi_end_io(bio);
|
||||
}
|
||||
|
||||
static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
|
||||
static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
|
||||
struct block_device *orig_bdev, unsigned long start_time)
|
||||
{
|
||||
struct detached_dev_io_private *ddip;
|
||||
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
|
||||
@ -1111,7 +1115,8 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
|
||||
ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
|
||||
ddip->d = d;
|
||||
/* Count on the bcache device */
|
||||
ddip->start_time = part_start_io_acct(d->disk, &ddip->part, bio);
|
||||
ddip->orig_bdev = orig_bdev;
|
||||
ddip->start_time = start_time;
|
||||
ddip->bi_end_io = bio->bi_end_io;
|
||||
ddip->bi_private = bio->bi_private;
|
||||
bio->bi_end_io = detached_dev_end_io;
|
||||
@ -1167,8 +1172,10 @@ static void quit_max_writeback_rate(struct cache_set *c,
|
||||
blk_qc_t cached_dev_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct search *s;
|
||||
struct bcache_device *d = bio->bi_disk->private_data;
|
||||
struct block_device *orig_bdev = bio->bi_bdev;
|
||||
struct bcache_device *d = orig_bdev->bd_disk->private_data;
|
||||
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
|
||||
unsigned long start_time;
|
||||
int rw = bio_data_dir(bio);
|
||||
|
||||
if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
|
||||
@ -1193,11 +1200,13 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
start_time = bio_start_io_acct(bio);
|
||||
|
||||
bio_set_dev(bio, dc->bdev);
|
||||
bio->bi_iter.bi_sector += dc->sb.data_offset;
|
||||
|
||||
if (cached_dev_get(dc)) {
|
||||
s = search_alloc(bio, d);
|
||||
s = search_alloc(bio, d, orig_bdev, start_time);
|
||||
trace_bcache_request_start(s->d, bio);
|
||||
|
||||
if (!bio->bi_iter.bi_size) {
|
||||
@ -1218,7 +1227,7 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio)
|
||||
}
|
||||
} else
|
||||
/* I/O request sent to backing device */
|
||||
detached_dev_do_request(d, bio);
|
||||
detached_dev_do_request(d, bio, orig_bdev, start_time);
|
||||
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
@ -1274,7 +1283,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct search *s;
|
||||
struct closure *cl;
|
||||
struct bcache_device *d = bio->bi_disk->private_data;
|
||||
struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
|
||||
|
||||
if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
@ -1282,7 +1291,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio)
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
s = search_alloc(bio, d);
|
||||
s = search_alloc(bio, d, bio->bi_bdev, bio_start_io_acct(bio));
|
||||
cl = &s->cl;
|
||||
bio = &s->bio.bio;
|
||||
|
||||
|
@ -1939,7 +1939,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
|
||||
goto err;
|
||||
|
||||
if (bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
|
||||
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
|
||||
BIOSET_NEED_RESCUER))
|
||||
goto err;
|
||||
|
||||
c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, sb);
|
||||
|
@ -18,8 +18,7 @@
|
||||
*/
|
||||
|
||||
struct dm_bio_details {
|
||||
struct gendisk *bi_disk;
|
||||
u8 bi_partno;
|
||||
struct block_device *bi_bdev;
|
||||
int __bi_remaining;
|
||||
unsigned long bi_flags;
|
||||
struct bvec_iter bi_iter;
|
||||
@ -31,8 +30,7 @@ struct dm_bio_details {
|
||||
|
||||
static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
|
||||
{
|
||||
bd->bi_disk = bio->bi_disk;
|
||||
bd->bi_partno = bio->bi_partno;
|
||||
bd->bi_bdev = bio->bi_bdev;
|
||||
bd->bi_flags = bio->bi_flags;
|
||||
bd->bi_iter = bio->bi_iter;
|
||||
bd->__bi_remaining = atomic_read(&bio->__bi_remaining);
|
||||
@ -44,8 +42,7 @@ static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
|
||||
|
||||
static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
|
||||
{
|
||||
bio->bi_disk = bd->bi_disk;
|
||||
bio->bi_partno = bd->bi_partno;
|
||||
bio->bi_bdev = bd->bi_bdev;
|
||||
bio->bi_flags = bd->bi_flags;
|
||||
bio->bi_iter = bd->bi_iter;
|
||||
atomic_set(&bio->__bi_remaining, bd->__bi_remaining);
|
||||
|
@ -449,7 +449,7 @@ static int __check_incompat_features(struct cache_disk_superblock *disk_super,
|
||||
/*
|
||||
* Check for read-only metadata to skip the following RDWR checks.
|
||||
*/
|
||||
if (get_disk_ro(cmd->bdev->bd_disk))
|
||||
if (bdev_read_only(cmd->bdev))
|
||||
return 0;
|
||||
|
||||
features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
|
||||
|
@ -85,12 +85,6 @@ struct clone {
|
||||
|
||||
struct dm_clone_metadata *cmd;
|
||||
|
||||
/*
|
||||
* bio used to flush the destination device, before committing the
|
||||
* metadata.
|
||||
*/
|
||||
struct bio flush_bio;
|
||||
|
||||
/* Region hydration hash table */
|
||||
struct hash_table_bucket *ht;
|
||||
|
||||
@ -1155,11 +1149,7 @@ static int commit_metadata(struct clone *clone, bool *dest_dev_flushed)
|
||||
goto out;
|
||||
}
|
||||
|
||||
bio_reset(&clone->flush_bio);
|
||||
bio_set_dev(&clone->flush_bio, clone->dest_dev->bdev);
|
||||
clone->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
|
||||
|
||||
r = submit_bio_wait(&clone->flush_bio);
|
||||
r = blkdev_issue_flush(clone->dest_dev->bdev);
|
||||
if (unlikely(r)) {
|
||||
__metadata_operation_failed(clone, "flush destination device", r);
|
||||
goto out;
|
||||
@ -1886,7 +1876,6 @@ static int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||
bio_list_init(&clone->deferred_flush_completions);
|
||||
clone->hydration_offset = 0;
|
||||
atomic_set(&clone->hydrations_in_flight, 0);
|
||||
bio_init(&clone->flush_bio, NULL, 0);
|
||||
|
||||
clone->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
|
||||
if (!clone->wq) {
|
||||
@ -1958,7 +1947,6 @@ static void clone_dtr(struct dm_target *ti)
|
||||
struct clone *clone = ti->private;
|
||||
|
||||
mutex_destroy(&clone->commit_lock);
|
||||
bio_uninit(&clone->flush_bio);
|
||||
|
||||
for (i = 0; i < clone->nr_ctr_args; i++)
|
||||
kfree(clone->ctr_args[i]);
|
||||
|
@ -145,7 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
|
||||
|
||||
struct dm_raid1_bio_record {
|
||||
struct mirror *m;
|
||||
/* if details->bi_disk == NULL, details were not saved */
|
||||
/* if details->bi_bdev == NULL, details were not saved */
|
||||
struct dm_bio_details details;
|
||||
region_t write_region;
|
||||
};
|
||||
@ -1190,7 +1190,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
|
||||
struct dm_raid1_bio_record *bio_record =
|
||||
dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
|
||||
|
||||
bio_record->details.bi_disk = NULL;
|
||||
bio_record->details.bi_bdev = NULL;
|
||||
|
||||
if (rw == WRITE) {
|
||||
/* Save region for mirror_end_io() handler */
|
||||
@ -1257,7 +1257,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
|
||||
goto out;
|
||||
|
||||
if (unlikely(*error)) {
|
||||
if (!bio_record->details.bi_disk) {
|
||||
if (!bio_record->details.bi_bdev) {
|
||||
/*
|
||||
* There wasn't enough memory to record necessary
|
||||
* information for a retry or there was no other
|
||||
@ -1282,7 +1282,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
|
||||
bd = &bio_record->details;
|
||||
|
||||
dm_bio_restore(bd, bio);
|
||||
bio_record->details.bi_disk = NULL;
|
||||
bio_record->details.bi_bdev = NULL;
|
||||
bio->bi_status = 0;
|
||||
|
||||
queue_bio(ms, bio, rw);
|
||||
@ -1292,7 +1292,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
|
||||
}
|
||||
|
||||
out:
|
||||
bio_record->details.bi_disk = NULL;
|
||||
bio_record->details.bi_bdev = NULL;
|
||||
|
||||
return DM_ENDIO_DONE;
|
||||
}
|
||||
|
@ -636,7 +636,7 @@ static int __check_incompat_features(struct thin_disk_superblock *disk_super,
|
||||
/*
|
||||
* Check for read-only metadata to skip the following RDWR checks.
|
||||
*/
|
||||
if (get_disk_ro(pmd->bdev->bd_disk))
|
||||
if (bdev_read_only(pmd->bdev))
|
||||
return 0;
|
||||
|
||||
features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP;
|
||||
|
@ -819,7 +819,7 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set)
|
||||
ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block,
|
||||
mblk->page);
|
||||
if (ret == 0)
|
||||
ret = blkdev_issue_flush(dev->bdev, GFP_NOIO);
|
||||
ret = blkdev_issue_flush(dev->bdev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -862,7 +862,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
|
||||
|
||||
/* Flush drive cache (this will also sync data) */
|
||||
if (ret == 0)
|
||||
ret = blkdev_issue_flush(dev->bdev, GFP_NOIO);
|
||||
ret = blkdev_issue_flush(dev->bdev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -933,7 +933,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
|
||||
|
||||
/* If there are no dirty metadata blocks, just flush the device cache */
|
||||
if (list_empty(&write_list)) {
|
||||
ret = blkdev_issue_flush(dev->bdev, GFP_NOIO);
|
||||
ret = blkdev_issue_flush(dev->bdev);
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -977,16 +977,17 @@ static void clone_endio(struct bio *bio)
|
||||
struct mapped_device *md = tio->io->md;
|
||||
dm_endio_fn endio = tio->ti->type->end_io;
|
||||
struct bio *orig_bio = io->orig_bio;
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
|
||||
if (unlikely(error == BLK_STS_TARGET)) {
|
||||
if (bio_op(bio) == REQ_OP_DISCARD &&
|
||||
!bio->bi_disk->queue->limits.max_discard_sectors)
|
||||
!q->limits.max_discard_sectors)
|
||||
disable_discard(md);
|
||||
else if (bio_op(bio) == REQ_OP_WRITE_SAME &&
|
||||
!bio->bi_disk->queue->limits.max_write_same_sectors)
|
||||
!q->limits.max_write_same_sectors)
|
||||
disable_write_same(md);
|
||||
else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
|
||||
!bio->bi_disk->queue->limits.max_write_zeroes_sectors)
|
||||
!q->limits.max_write_zeroes_sectors)
|
||||
disable_write_zeroes(md);
|
||||
}
|
||||
|
||||
@ -996,7 +997,7 @@ static void clone_endio(struct bio *bio)
|
||||
*/
|
||||
if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
|
||||
sector_t written_sector = bio->bi_iter.bi_sector;
|
||||
struct request_queue *q = orig_bio->bi_disk->queue;
|
||||
struct request_queue *q = orig_bio->bi_bdev->bd_disk->queue;
|
||||
u64 mask = (u64)blk_queue_zone_sectors(q) - 1;
|
||||
|
||||
orig_bio->bi_iter.bi_sector += written_sector & mask;
|
||||
@ -1422,8 +1423,7 @@ static int __send_empty_flush(struct clone_info *ci)
|
||||
*/
|
||||
bio_init(&flush_bio, NULL, 0);
|
||||
flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
|
||||
flush_bio.bi_disk = ci->io->md->disk;
|
||||
bio_associate_blkg(&flush_bio);
|
||||
bio_set_dev(&flush_bio, ci->io->md->disk->part0);
|
||||
|
||||
ci->bio = &flush_bio;
|
||||
ci->sector_count = 0;
|
||||
@ -1626,7 +1626,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
|
||||
|
||||
static blk_qc_t dm_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct mapped_device *md = bio->bi_disk->private_data;
|
||||
struct mapped_device *md = bio->bi_bdev->bd_disk->private_data;
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
int srcu_idx;
|
||||
struct dm_table *map;
|
||||
|
@ -252,7 +252,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
start_sector + data_offset;
|
||||
|
||||
if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bio->bi_disk->queue))) {
|
||||
!blk_queue_discard(bio->bi_bdev->bd_disk->queue))) {
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
} else {
|
||||
|
@ -340,24 +340,6 @@ static int start_readonly;
|
||||
*/
|
||||
static bool create_on_open = true;
|
||||
|
||||
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
|
||||
struct mddev *mddev)
|
||||
{
|
||||
if (!mddev || !bioset_initialized(&mddev->bio_set))
|
||||
return bio_alloc(gfp_mask, nr_iovecs);
|
||||
|
||||
return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_alloc_mddev);
|
||||
|
||||
static struct bio *md_bio_alloc_sync(struct mddev *mddev)
|
||||
{
|
||||
if (!mddev || !bioset_initialized(&mddev->sync_set))
|
||||
return bio_alloc(GFP_NOIO, 1);
|
||||
|
||||
return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
|
||||
}
|
||||
|
||||
/*
|
||||
* We have a system wide 'event count' that is incremented
|
||||
* on any 'interesting' event, and readers of /proc/mdstat
|
||||
@ -463,8 +445,8 @@ struct md_io {
|
||||
struct mddev *mddev;
|
||||
bio_end_io_t *orig_bi_end_io;
|
||||
void *orig_bi_private;
|
||||
struct block_device *orig_bi_bdev;
|
||||
unsigned long start_time;
|
||||
struct block_device *part;
|
||||
};
|
||||
|
||||
static void md_end_io(struct bio *bio)
|
||||
@ -472,7 +454,7 @@ static void md_end_io(struct bio *bio)
|
||||
struct md_io *md_io = bio->bi_private;
|
||||
struct mddev *mddev = md_io->mddev;
|
||||
|
||||
part_end_io_acct(md_io->part, bio, md_io->start_time);
|
||||
bio_end_io_acct_remapped(bio, md_io->start_time, md_io->orig_bi_bdev);
|
||||
|
||||
bio->bi_end_io = md_io->orig_bi_end_io;
|
||||
bio->bi_private = md_io->orig_bi_private;
|
||||
@ -486,7 +468,7 @@ static void md_end_io(struct bio *bio)
|
||||
static blk_qc_t md_submit_bio(struct bio *bio)
|
||||
{
|
||||
const int rw = bio_data_dir(bio);
|
||||
struct mddev *mddev = bio->bi_disk->private_data;
|
||||
struct mddev *mddev = bio->bi_bdev->bd_disk->private_data;
|
||||
|
||||
if (mddev == NULL || mddev->pers == NULL) {
|
||||
bio_io_error(bio);
|
||||
@ -514,12 +496,12 @@ static blk_qc_t md_submit_bio(struct bio *bio)
|
||||
md_io->mddev = mddev;
|
||||
md_io->orig_bi_end_io = bio->bi_end_io;
|
||||
md_io->orig_bi_private = bio->bi_private;
|
||||
md_io->orig_bi_bdev = bio->bi_bdev;
|
||||
|
||||
bio->bi_end_io = md_end_io;
|
||||
bio->bi_private = md_io;
|
||||
|
||||
md_io->start_time = part_start_io_acct(mddev->gendisk,
|
||||
&md_io->part, bio);
|
||||
md_io->start_time = bio_start_io_acct(bio);
|
||||
}
|
||||
|
||||
/* bio could be mergeable after passing to underlayer */
|
||||
@ -613,7 +595,7 @@ static void submit_flushes(struct work_struct *ws)
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
|
||||
bi = bio_alloc_bioset(GFP_NOIO, 0, &mddev->bio_set);
|
||||
bi->bi_end_io = md_end_flush;
|
||||
bi->bi_private = rdev;
|
||||
bio_set_dev(bi, rdev->bdev);
|
||||
@ -999,7 +981,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
return;
|
||||
|
||||
bio = md_bio_alloc_sync(mddev);
|
||||
bio = bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
|
||||
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
|
||||
@ -1031,29 +1013,29 @@ int md_super_wait(struct mddev *mddev)
|
||||
int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
|
||||
struct page *page, int op, int op_flags, bool metadata_op)
|
||||
{
|
||||
struct bio *bio = md_bio_alloc_sync(rdev->mddev);
|
||||
int ret;
|
||||
struct bio bio;
|
||||
struct bio_vec bvec;
|
||||
|
||||
bio_init(&bio, &bvec, 1);
|
||||
|
||||
if (metadata_op && rdev->meta_bdev)
|
||||
bio_set_dev(bio, rdev->meta_bdev);
|
||||
bio_set_dev(&bio, rdev->meta_bdev);
|
||||
else
|
||||
bio_set_dev(bio, rdev->bdev);
|
||||
bio_set_op_attrs(bio, op, op_flags);
|
||||
bio_set_dev(&bio, rdev->bdev);
|
||||
bio.bi_opf = op | op_flags;
|
||||
if (metadata_op)
|
||||
bio->bi_iter.bi_sector = sector + rdev->sb_start;
|
||||
bio.bi_iter.bi_sector = sector + rdev->sb_start;
|
||||
else if (rdev->mddev->reshape_position != MaxSector &&
|
||||
(rdev->mddev->reshape_backwards ==
|
||||
(sector >= rdev->mddev->reshape_position)))
|
||||
bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
|
||||
bio.bi_iter.bi_sector = sector + rdev->new_data_offset;
|
||||
else
|
||||
bio->bi_iter.bi_sector = sector + rdev->data_offset;
|
||||
bio_add_page(bio, page, size, 0);
|
||||
bio.bi_iter.bi_sector = sector + rdev->data_offset;
|
||||
bio_add_page(&bio, page, size, 0);
|
||||
|
||||
submit_bio_wait(bio);
|
||||
submit_bio_wait(&bio);
|
||||
|
||||
ret = !bio->bi_status;
|
||||
bio_put(bio);
|
||||
return ret;
|
||||
return !bio.bi_status;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sync_page_io);
|
||||
|
||||
@ -2417,6 +2399,12 @@ int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL(md_integrity_add_rdev);
|
||||
|
||||
static bool rdev_read_only(struct md_rdev *rdev)
|
||||
{
|
||||
return bdev_read_only(rdev->bdev) ||
|
||||
(rdev->meta_bdev && bdev_read_only(rdev->meta_bdev));
|
||||
}
|
||||
|
||||
static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
|
||||
{
|
||||
char b[BDEVNAME_SIZE];
|
||||
@ -2426,8 +2414,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
|
||||
if (find_rdev(mddev, rdev->bdev->bd_dev))
|
||||
return -EEXIST;
|
||||
|
||||
if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
|
||||
mddev->pers)
|
||||
if (rdev_read_only(rdev) && mddev->pers)
|
||||
return -EROFS;
|
||||
|
||||
/* make sure rdev->sectors exceeds mddev->dev_sectors */
|
||||
@ -5861,9 +5848,7 @@ int md_run(struct mddev *mddev)
|
||||
continue;
|
||||
sync_blockdev(rdev->bdev);
|
||||
invalidate_bdev(rdev->bdev);
|
||||
if (mddev->ro != 1 &&
|
||||
(bdev_read_only(rdev->bdev) ||
|
||||
bdev_read_only(rdev->meta_bdev))) {
|
||||
if (mddev->ro != 1 && rdev_read_only(rdev)) {
|
||||
mddev->ro = 1;
|
||||
if (mddev->gendisk)
|
||||
set_disk_ro(mddev->gendisk, 1);
|
||||
@ -6158,7 +6143,7 @@ static int restart_array(struct mddev *mddev)
|
||||
if (test_bit(Journal, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags))
|
||||
has_journal = true;
|
||||
if (bdev_read_only(rdev->bdev))
|
||||
if (rdev_read_only(rdev))
|
||||
has_readonly = true;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
@ -556,7 +556,7 @@ static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sect
|
||||
|
||||
static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
|
||||
{
|
||||
atomic_add(nr_sectors, &bio->bi_disk->sync_io);
|
||||
md_sync_acct(bio->bi_bdev, nr_sectors);
|
||||
}
|
||||
|
||||
struct md_personality
|
||||
@ -742,8 +742,6 @@ extern void md_rdev_clear(struct md_rdev *rdev);
|
||||
extern void md_handle_request(struct mddev *mddev, struct bio *bio);
|
||||
extern void mddev_suspend(struct mddev *mddev);
|
||||
extern void mddev_resume(struct mddev *mddev);
|
||||
extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
|
||||
struct mddev *mddev);
|
||||
|
||||
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
|
||||
extern void md_update_sb(struct mddev *mddev, int force);
|
||||
@ -793,14 +791,14 @@ static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
|
||||
static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
if (bio_op(bio) == REQ_OP_WRITE_SAME &&
|
||||
!bio->bi_disk->queue->limits.max_write_same_sectors)
|
||||
!bio->bi_bdev->bd_disk->queue->limits.max_write_same_sectors)
|
||||
mddev->queue->limits.max_write_same_sectors = 0;
|
||||
}
|
||||
|
||||
static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
|
||||
!bio->bi_disk->queue->limits.max_write_zeroes_sectors)
|
||||
!bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors)
|
||||
mddev->queue->limits.max_write_zeroes_sectors = 0;
|
||||
}
|
||||
|
||||
|
@ -794,13 +794,13 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
|
||||
|
||||
while (bio) { /* submit pending writes */
|
||||
struct bio *next = bio->bi_next;
|
||||
struct md_rdev *rdev = (void *)bio->bi_disk;
|
||||
struct md_rdev *rdev = (void *)bio->bi_bdev;
|
||||
bio->bi_next = NULL;
|
||||
bio_set_dev(bio, rdev->bdev);
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
bio_io_error(bio);
|
||||
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bio->bi_disk->queue)))
|
||||
!blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
@ -1104,7 +1104,7 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
|
||||
int i = 0;
|
||||
struct bio *behind_bio = NULL;
|
||||
|
||||
behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev);
|
||||
behind_bio = bio_alloc_bioset(GFP_NOIO, vcnt, &r1_bio->mddev->bio_set);
|
||||
if (!behind_bio)
|
||||
return;
|
||||
|
||||
@ -1520,7 +1520,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
trace_block_bio_remap(mbio, disk_devt(mddev->gendisk),
|
||||
r1_bio->sector);
|
||||
/* flush_pending_writes() needs access to the rdev so...*/
|
||||
mbio->bi_disk = (void *)conf->mirrors[i].rdev;
|
||||
mbio->bi_bdev = (void *)conf->mirrors[i].rdev;
|
||||
|
||||
cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
|
||||
if (cb)
|
||||
|
@ -882,13 +882,13 @@ static void flush_pending_writes(struct r10conf *conf)
|
||||
|
||||
while (bio) { /* submit pending writes */
|
||||
struct bio *next = bio->bi_next;
|
||||
struct md_rdev *rdev = (void*)bio->bi_disk;
|
||||
struct md_rdev *rdev = (void*)bio->bi_bdev;
|
||||
bio->bi_next = NULL;
|
||||
bio_set_dev(bio, rdev->bdev);
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
bio_io_error(bio);
|
||||
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bio->bi_disk->queue)))
|
||||
!blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
@ -1075,13 +1075,13 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
|
||||
while (bio) { /* submit pending writes */
|
||||
struct bio *next = bio->bi_next;
|
||||
struct md_rdev *rdev = (void*)bio->bi_disk;
|
||||
struct md_rdev *rdev = (void*)bio->bi_bdev;
|
||||
bio->bi_next = NULL;
|
||||
bio_set_dev(bio, rdev->bdev);
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
bio_io_error(bio);
|
||||
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bio->bi_disk->queue)))
|
||||
!blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
@ -1253,7 +1253,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
||||
trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk),
|
||||
r10_bio->sector);
|
||||
/* flush_pending_writes() needs access to the rdev so...*/
|
||||
mbio->bi_disk = (void *)rdev;
|
||||
mbio->bi_bdev = (void *)rdev;
|
||||
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
|
||||
@ -3003,7 +3003,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
|
||||
/* Again, very different code for resync and recovery.
|
||||
* Both must result in an r10bio with a list of bios that
|
||||
* have bi_end_io, bi_sector, bi_disk set,
|
||||
* have bi_end_io, bi_sector, bi_bdev set,
|
||||
* and bi_private set to the r10bio.
|
||||
* For recovery, we may actually create several r10bios
|
||||
* with 2 bios in each, that correspond to the bios in the main one.
|
||||
@ -4531,7 +4531,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
|
||||
return sectors_done;
|
||||
}
|
||||
|
||||
read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev);
|
||||
read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set);
|
||||
|
||||
bio_set_dev(read_bio, rdev->bdev);
|
||||
read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
|
||||
@ -4539,10 +4539,6 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
|
||||
read_bio->bi_private = r10_bio;
|
||||
read_bio->bi_end_io = end_reshape_read;
|
||||
bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
|
||||
read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
|
||||
read_bio->bi_status = 0;
|
||||
read_bio->bi_vcnt = 0;
|
||||
read_bio->bi_iter.bi_size = 0;
|
||||
r10_bio->master_bio = read_bio;
|
||||
r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
|
||||
|
||||
|
@ -1037,7 +1037,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
|
||||
}
|
||||
|
||||
/* flush the disk cache after recovery if necessary */
|
||||
ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL);
|
||||
ret = blkdev_issue_flush(rdev->bdev);
|
||||
out:
|
||||
__free_page(page);
|
||||
return ret;
|
||||
|
@ -5310,7 +5310,7 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
|
||||
unsigned int chunk_sectors;
|
||||
unsigned int bio_sectors = bio_sectors(bio);
|
||||
|
||||
WARN_ON_ONCE(bio->bi_partno);
|
||||
WARN_ON_ONCE(bio->bi_bdev->bd_partno);
|
||||
|
||||
chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
|
||||
return chunk_sectors >=
|
||||
@ -5393,90 +5393,72 @@ static void raid5_align_endio(struct bio *bi)
|
||||
static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
int dd_idx;
|
||||
struct bio* align_bi;
|
||||
struct bio *align_bio;
|
||||
struct md_rdev *rdev;
|
||||
sector_t end_sector;
|
||||
sector_t sector, end_sector, first_bad;
|
||||
int bad_sectors, dd_idx;
|
||||
|
||||
if (!in_chunk_boundary(mddev, raid_bio)) {
|
||||
pr_debug("%s: non aligned\n", __func__);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* use bio_clone_fast to make a copy of the bio
|
||||
*/
|
||||
align_bi = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set);
|
||||
if (!align_bi)
|
||||
return 0;
|
||||
/*
|
||||
* set bi_end_io to a new function, and set bi_private to the
|
||||
* original bio.
|
||||
*/
|
||||
align_bi->bi_end_io = raid5_align_endio;
|
||||
align_bi->bi_private = raid_bio;
|
||||
/*
|
||||
* compute position
|
||||
*/
|
||||
align_bi->bi_iter.bi_sector =
|
||||
raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector,
|
||||
0, &dd_idx, NULL);
|
||||
|
||||
end_sector = bio_end_sector(align_bi);
|
||||
sector = raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector, 0,
|
||||
&dd_idx, NULL);
|
||||
end_sector = bio_end_sector(raid_bio);
|
||||
|
||||
rcu_read_lock();
|
||||
if (r5c_big_stripe_cached(conf, sector))
|
||||
goto out_rcu_unlock;
|
||||
|
||||
rdev = rcu_dereference(conf->disks[dd_idx].replacement);
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags) ||
|
||||
rdev->recovery_offset < end_sector) {
|
||||
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
|
||||
if (rdev &&
|
||||
(test_bit(Faulty, &rdev->flags) ||
|
||||
if (!rdev)
|
||||
goto out_rcu_unlock;
|
||||
if (test_bit(Faulty, &rdev->flags) ||
|
||||
!(test_bit(In_sync, &rdev->flags) ||
|
||||
rdev->recovery_offset >= end_sector)))
|
||||
rdev = NULL;
|
||||
rdev->recovery_offset >= end_sector))
|
||||
goto out_rcu_unlock;
|
||||
}
|
||||
|
||||
if (r5c_big_stripe_cached(conf, align_bi->bi_iter.bi_sector)) {
|
||||
rcu_read_unlock();
|
||||
bio_put(align_bi);
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
|
||||
align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set);
|
||||
bio_set_dev(align_bio, rdev->bdev);
|
||||
align_bio->bi_end_io = raid5_align_endio;
|
||||
align_bio->bi_private = raid_bio;
|
||||
align_bio->bi_iter.bi_sector = sector;
|
||||
|
||||
raid_bio->bi_next = (void *)rdev;
|
||||
|
||||
if (is_badblock(rdev, sector, bio_sectors(align_bio), &first_bad,
|
||||
&bad_sectors)) {
|
||||
bio_put(align_bio);
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (rdev) {
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
/* No reshape active, so we can trust rdev->data_offset */
|
||||
align_bio->bi_iter.bi_sector += rdev->data_offset;
|
||||
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
raid_bio->bi_next = (void*)rdev;
|
||||
bio_set_dev(align_bi, rdev->bdev);
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0,
|
||||
conf->device_lock);
|
||||
atomic_inc(&conf->active_aligned_reads);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
if (is_badblock(rdev, align_bi->bi_iter.bi_sector,
|
||||
bio_sectors(align_bi),
|
||||
&first_bad, &bad_sectors)) {
|
||||
bio_put(align_bi);
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
return 0;
|
||||
}
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk),
|
||||
raid_bio->bi_iter.bi_sector);
|
||||
submit_bio_noacct(align_bio);
|
||||
return 1;
|
||||
|
||||
/* No reshape active, so we can trust rdev->data_offset */
|
||||
align_bi->bi_iter.bi_sector += rdev->data_offset;
|
||||
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
wait_event_lock_irq(conf->wait_for_quiescent,
|
||||
conf->quiesce == 0,
|
||||
conf->device_lock);
|
||||
atomic_inc(&conf->active_aligned_reads);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(align_bi, disk_devt(mddev->gendisk),
|
||||
raid_bio->bi_iter.bi_sector);
|
||||
submit_bio_noacct(align_bi);
|
||||
return 1;
|
||||
} else {
|
||||
rcu_read_unlock();
|
||||
bio_put(align_bi);
|
||||
return 0;
|
||||
}
|
||||
out_rcu_unlock:
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
|
||||
|
@ -253,7 +253,7 @@ static ssize_t power_ro_lock_store(struct device *dev,
|
||||
goto out_put;
|
||||
}
|
||||
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP;
|
||||
blk_execute_rq(mq->queue, NULL, req, 0);
|
||||
blk_execute_rq(NULL, req, 0);
|
||||
ret = req_to_mmc_queue_req(req)->drv_op_result;
|
||||
blk_put_request(req);
|
||||
|
||||
@ -629,7 +629,7 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md,
|
||||
rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
|
||||
req_to_mmc_queue_req(req)->drv_op_data = idatas;
|
||||
req_to_mmc_queue_req(req)->ioc_count = 1;
|
||||
blk_execute_rq(mq->queue, NULL, req, 0);
|
||||
blk_execute_rq(NULL, req, 0);
|
||||
ioc_err = req_to_mmc_queue_req(req)->drv_op_result;
|
||||
err = mmc_blk_ioctl_copy_to_user(ic_ptr, idata);
|
||||
blk_put_request(req);
|
||||
@ -698,7 +698,7 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
|
||||
rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
|
||||
req_to_mmc_queue_req(req)->drv_op_data = idata;
|
||||
req_to_mmc_queue_req(req)->ioc_count = num_of_cmds;
|
||||
blk_execute_rq(mq->queue, NULL, req, 0);
|
||||
blk_execute_rq(NULL, req, 0);
|
||||
ioc_err = req_to_mmc_queue_req(req)->drv_op_result;
|
||||
|
||||
/* copy to user if data and response */
|
||||
@ -2722,7 +2722,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val)
|
||||
if (IS_ERR(req))
|
||||
return PTR_ERR(req);
|
||||
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS;
|
||||
blk_execute_rq(mq->queue, NULL, req, 0);
|
||||
blk_execute_rq(NULL, req, 0);
|
||||
ret = req_to_mmc_queue_req(req)->drv_op_result;
|
||||
if (ret >= 0) {
|
||||
*val = ret;
|
||||
@ -2761,7 +2761,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp)
|
||||
}
|
||||
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_EXT_CSD;
|
||||
req_to_mmc_queue_req(req)->drv_op_data = &ext_csd;
|
||||
blk_execute_rq(mq->queue, NULL, req, 0);
|
||||
blk_execute_rq(NULL, req, 0);
|
||||
err = req_to_mmc_queue_req(req)->drv_op_result;
|
||||
blk_put_request(req);
|
||||
if (err) {
|
||||
|
@ -165,7 +165,7 @@ static int nsblk_do_bvec(struct nd_namespace_blk *nsblk,
|
||||
static blk_qc_t nd_blk_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct bio_integrity_payload *bip;
|
||||
struct nd_namespace_blk *nsblk = bio->bi_disk->private_data;
|
||||
struct nd_namespace_blk *nsblk = bio->bi_bdev->bd_disk->private_data;
|
||||
struct bvec_iter iter;
|
||||
unsigned long start;
|
||||
struct bio_vec bvec;
|
||||
@ -177,7 +177,7 @@ static blk_qc_t nd_blk_submit_bio(struct bio *bio)
|
||||
|
||||
bip = bio_integrity(bio);
|
||||
rw = bio_data_dir(bio);
|
||||
do_acct = blk_queue_io_stat(bio->bi_disk->queue);
|
||||
do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
|
||||
if (do_acct)
|
||||
start = bio_start_io_acct(bio);
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
|
@ -1442,7 +1442,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
|
||||
static blk_qc_t btt_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct btt *btt = bio->bi_disk->private_data;
|
||||
struct btt *btt = bio->bi_bdev->bd_disk->private_data;
|
||||
struct bvec_iter iter;
|
||||
unsigned long start;
|
||||
struct bio_vec bvec;
|
||||
@ -1452,7 +1452,7 @@ static blk_qc_t btt_submit_bio(struct bio *bio)
|
||||
if (!bio_integrity_prep(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
do_acct = blk_queue_io_stat(bio->bi_disk->queue);
|
||||
do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
|
||||
if (do_acct)
|
||||
start = bio_start_io_acct(bio);
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
|
@ -196,13 +196,13 @@ static blk_qc_t pmem_submit_bio(struct bio *bio)
|
||||
unsigned long start;
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
struct pmem_device *pmem = bio->bi_disk->private_data;
|
||||
struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data;
|
||||
struct nd_region *nd_region = to_region(pmem);
|
||||
|
||||
if (bio->bi_opf & REQ_PREFLUSH)
|
||||
ret = nvdimm_flush(nd_region, bio);
|
||||
|
||||
do_acct = blk_queue_io_stat(bio->bi_disk->queue);
|
||||
do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
|
||||
if (do_acct)
|
||||
start = bio_start_io_acct(bio);
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
|
@ -925,7 +925,7 @@ static void nvme_execute_rq_polled(struct request_queue *q,
|
||||
|
||||
rq->cmd_flags |= REQ_HIPRI;
|
||||
rq->end_io_data = &wait;
|
||||
blk_execute_rq_nowait(q, bd_disk, rq, at_head, nvme_end_sync_rq);
|
||||
blk_execute_rq_nowait(bd_disk, rq, at_head, nvme_end_sync_rq);
|
||||
|
||||
while (!completion_done(&wait)) {
|
||||
blk_poll(q, request_to_qc_t(rq->mq_hctx, rq), true);
|
||||
@ -964,7 +964,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||
if (poll)
|
||||
nvme_execute_rq_polled(req->q, NULL, req, at_head);
|
||||
else
|
||||
blk_execute_rq(req->q, NULL, req, at_head);
|
||||
blk_execute_rq(NULL, req, at_head);
|
||||
if (result)
|
||||
*result = nvme_req(req)->result;
|
||||
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
|
||||
@ -1101,7 +1101,7 @@ void nvme_execute_passthru_rq(struct request *rq)
|
||||
u32 effects;
|
||||
|
||||
effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
|
||||
blk_execute_rq(rq->q, disk, rq, 0);
|
||||
blk_execute_rq(disk, rq, 0);
|
||||
nvme_passthru_end(ctrl, effects);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU);
|
||||
@ -1113,7 +1113,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
{
|
||||
bool write = nvme_is_write(cmd);
|
||||
struct nvme_ns *ns = q->queuedata;
|
||||
struct gendisk *disk = ns ? ns->disk : NULL;
|
||||
struct block_device *bdev = ns ? ns->disk->part0 : NULL;
|
||||
struct request *req;
|
||||
struct bio *bio = NULL;
|
||||
void *meta = NULL;
|
||||
@ -1133,8 +1133,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
if (ret)
|
||||
goto out;
|
||||
bio = req->bio;
|
||||
bio->bi_disk = disk;
|
||||
if (disk && meta_buffer && meta_len) {
|
||||
if (bdev)
|
||||
bio_set_dev(bio, bdev);
|
||||
if (bdev && meta_buffer && meta_len) {
|
||||
meta = nvme_add_user_metadata(bio, meta_buffer, meta_len,
|
||||
meta_seed, write);
|
||||
if (IS_ERR(meta)) {
|
||||
@ -1202,7 +1203,7 @@ static int nvme_keep_alive(struct nvme_ctrl *ctrl)
|
||||
rq->timeout = ctrl->kato * HZ;
|
||||
rq->end_io_data = ctrl;
|
||||
|
||||
blk_execute_rq_nowait(rq->q, NULL, rq, 0, nvme_keep_alive_end_io);
|
||||
blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -2125,9 +2126,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
|
||||
nvme_config_discard(disk, ns);
|
||||
nvme_config_write_zeroes(disk, ns);
|
||||
|
||||
if ((id->nsattr & NVME_NS_ATTR_RO) ||
|
||||
test_bit(NVME_NS_FORCE_RO, &ns->flags))
|
||||
set_disk_ro(disk, true);
|
||||
set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) ||
|
||||
test_bit(NVME_NS_FORCE_RO, &ns->flags));
|
||||
}
|
||||
|
||||
static inline bool nvme_first_scan(struct gendisk *disk)
|
||||
@ -2176,17 +2176,18 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
|
||||
ns->lba_shift = id->lbaf[lbaf].ds;
|
||||
nvme_set_queue_limits(ns->ctrl, ns->queue);
|
||||
|
||||
ret = nvme_configure_metadata(ns, id);
|
||||
if (ret)
|
||||
goto out_unfreeze;
|
||||
nvme_set_chunk_sectors(ns, id);
|
||||
nvme_update_disk_info(ns->disk, ns, id);
|
||||
|
||||
if (ns->head->ids.csi == NVME_CSI_ZNS) {
|
||||
ret = nvme_update_zone_info(ns, lbaf);
|
||||
if (ret)
|
||||
goto out_unfreeze;
|
||||
}
|
||||
|
||||
ret = nvme_configure_metadata(ns, id);
|
||||
if (ret)
|
||||
goto out_unfreeze;
|
||||
nvme_set_chunk_sectors(ns, id);
|
||||
nvme_update_disk_info(ns->disk, ns, id);
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
|
||||
if (blk_queue_is_zoned(ns->queue)) {
|
||||
|
@ -695,7 +695,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd,
|
||||
|
||||
rq->end_io_data = rqd;
|
||||
|
||||
blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io);
|
||||
blk_execute_rq_nowait(NULL, rq, 0, nvme_nvm_end_io);
|
||||
|
||||
return 0;
|
||||
|
||||
@ -757,7 +757,6 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
|
||||
{
|
||||
bool write = nvme_is_write((struct nvme_command *)vcmd);
|
||||
struct nvm_dev *dev = ns->ndev;
|
||||
struct gendisk *disk = ns->disk;
|
||||
struct request *rq;
|
||||
struct bio *bio = NULL;
|
||||
__le64 *ppa_list = NULL;
|
||||
@ -817,10 +816,10 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
|
||||
vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma);
|
||||
}
|
||||
|
||||
bio->bi_disk = disk;
|
||||
bio_set_dev(bio, ns->disk->part0);
|
||||
}
|
||||
|
||||
blk_execute_rq(q, NULL, rq, 0);
|
||||
blk_execute_rq(NULL, rq, 0);
|
||||
|
||||
if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
|
||||
ret = -EINTR;
|
||||
|
@ -296,7 +296,7 @@ static bool nvme_available_path(struct nvme_ns_head *head)
|
||||
|
||||
blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct nvme_ns_head *head = bio->bi_disk->private_data;
|
||||
struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data;
|
||||
struct device *dev = disk_to_dev(head->disk);
|
||||
struct nvme_ns *ns;
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
@ -312,7 +312,7 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
|
||||
srcu_idx = srcu_read_lock(&head->srcu);
|
||||
ns = nvme_find_path(head);
|
||||
if (likely(ns)) {
|
||||
bio->bi_disk = ns->disk;
|
||||
bio_set_dev(bio, ns->disk->part0);
|
||||
bio->bi_opf |= REQ_NVME_MPATH;
|
||||
trace_block_bio_remap(bio, disk_devt(ns->head->disk),
|
||||
bio->bi_iter.bi_sector);
|
||||
@ -352,7 +352,7 @@ static void nvme_requeue_work(struct work_struct *work)
|
||||
* Reset disk to the mpath node and resubmit to select a new
|
||||
* path.
|
||||
*/
|
||||
bio->bi_disk = head->disk;
|
||||
bio_set_dev(bio, head->disk->part0);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
}
|
||||
|
@ -1357,7 +1357,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
|
||||
}
|
||||
|
||||
abort_req->end_io_data = NULL;
|
||||
blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio);
|
||||
blk_execute_rq_nowait(NULL, abort_req, 0, abort_endio);
|
||||
|
||||
/*
|
||||
* The aborted req will be completed on receiving the abort req.
|
||||
@ -2281,7 +2281,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
|
||||
req->end_io_data = nvmeq;
|
||||
|
||||
init_completion(&nvmeq->delete_done);
|
||||
blk_execute_rq_nowait(q, NULL, req, false,
|
||||
blk_execute_rq_nowait(NULL, req, false,
|
||||
opcode == nvme_admin_delete_cq ?
|
||||
nvme_del_cq_end : nvme_del_queue_end);
|
||||
return 0;
|
||||
|
@ -1468,7 +1468,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
|
||||
if (unlikely(nr))
|
||||
goto mr_put;
|
||||
|
||||
nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c,
|
||||
nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c,
|
||||
req->mr->sig_attrs, ns->pi_type);
|
||||
nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
|
||||
|
||||
|
@ -9,13 +9,7 @@
|
||||
|
||||
int nvme_revalidate_zones(struct nvme_ns *ns)
|
||||
{
|
||||
struct request_queue *q = ns->queue;
|
||||
int ret;
|
||||
|
||||
ret = blk_revalidate_disk_zones(ns->disk, NULL);
|
||||
if (!ret)
|
||||
blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
|
||||
return ret;
|
||||
return blk_revalidate_disk_zones(ns->disk, NULL);
|
||||
}
|
||||
|
||||
static int nvme_set_max_append(struct nvme_ctrl *ctrl)
|
||||
@ -109,10 +103,11 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
|
||||
goto free_data;
|
||||
}
|
||||
|
||||
q->limits.zoned = BLK_ZONED_HM;
|
||||
blk_queue_set_zoned(ns->disk, BLK_ZONED_HM);
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
|
||||
blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1);
|
||||
blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
|
||||
free_data:
|
||||
kfree(id);
|
||||
return status;
|
||||
|
@ -333,7 +333,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
|
||||
|
||||
u16 nvmet_bdev_flush(struct nvmet_req *req)
|
||||
{
|
||||
if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL))
|
||||
if (blkdev_issue_flush(req->ns->bdev))
|
||||
return NVME_SC_INTERNAL | NVME_SC_DNR;
|
||||
return 0;
|
||||
}
|
||||
|
@ -275,7 +275,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
|
||||
schedule_work(&req->p.work);
|
||||
} else {
|
||||
rq->end_io_data = req;
|
||||
blk_execute_rq_nowait(rq->q, ns ? ns->disk : NULL, rq, 0,
|
||||
blk_execute_rq_nowait(ns ? ns->disk : NULL, rq, 0,
|
||||
nvmet_passthru_req_done);
|
||||
}
|
||||
|
||||
|
@ -428,23 +428,15 @@ static int dasd_state_unfmt_to_basic(struct dasd_device *device)
|
||||
static int
|
||||
dasd_state_ready_to_online(struct dasd_device * device)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
struct disk_part_iter piter;
|
||||
struct block_device *part;
|
||||
|
||||
device->state = DASD_STATE_ONLINE;
|
||||
if (device->block) {
|
||||
dasd_schedule_block_bh(device->block);
|
||||
if ((device->features & DASD_FEATURE_USERAW)) {
|
||||
disk = device->block->gdp;
|
||||
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
|
||||
kobject_uevent(&disk_to_dev(device->block->gdp)->kobj,
|
||||
KOBJ_CHANGE);
|
||||
return 0;
|
||||
}
|
||||
disk = device->block->bdev->bd_disk;
|
||||
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
|
||||
while ((part = disk_part_iter_next(&piter)))
|
||||
kobject_uevent(bdev_kobj(part), KOBJ_CHANGE);
|
||||
disk_part_iter_exit(&piter);
|
||||
disk_uevent(device->block->bdev->bd_disk, KOBJ_CHANGE);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -455,9 +447,6 @@ dasd_state_ready_to_online(struct dasd_device * device)
|
||||
static int dasd_state_online_to_ready(struct dasd_device *device)
|
||||
{
|
||||
int rc;
|
||||
struct gendisk *disk;
|
||||
struct disk_part_iter piter;
|
||||
struct block_device *part;
|
||||
|
||||
if (device->discipline->online_to_ready) {
|
||||
rc = device->discipline->online_to_ready(device);
|
||||
@ -466,13 +455,8 @@ static int dasd_state_online_to_ready(struct dasd_device *device)
|
||||
}
|
||||
|
||||
device->state = DASD_STATE_READY;
|
||||
if (device->block && !(device->features & DASD_FEATURE_USERAW)) {
|
||||
disk = device->block->bdev->bd_disk;
|
||||
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
|
||||
while ((part = disk_part_iter_next(&piter)))
|
||||
kobject_uevent(bdev_kobj(part), KOBJ_CHANGE);
|
||||
disk_part_iter_exit(&piter);
|
||||
}
|
||||
if (device->block && !(device->features & DASD_FEATURE_USERAW))
|
||||
disk_uevent(device->block->bdev->bd_disk, KOBJ_CHANGE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -879,17 +879,13 @@ dcssblk_submit_bio(struct bio *bio)
|
||||
blk_queue_split(&bio);
|
||||
|
||||
bytes_done = 0;
|
||||
dev_info = bio->bi_disk->private_data;
|
||||
dev_info = bio->bi_bdev->bd_disk->private_data;
|
||||
if (dev_info == NULL)
|
||||
goto fail;
|
||||
if ((bio->bi_iter.bi_sector & 7) != 0 ||
|
||||
(bio->bi_iter.bi_size & 4095) != 0)
|
||||
/* Request is not page-aligned. */
|
||||
goto fail;
|
||||
if (bio_end_sector(bio) > get_capacity(bio->bi_disk)) {
|
||||
/* Request beyond end of DCSS segment. */
|
||||
goto fail;
|
||||
}
|
||||
/* verify data transfer direction */
|
||||
if (dev_info->is_shared) {
|
||||
switch (dev_info->segment_type) {
|
||||
|
@ -184,7 +184,7 @@ static unsigned long xpram_highest_page_index(void)
|
||||
*/
|
||||
static blk_qc_t xpram_submit_bio(struct bio *bio)
|
||||
{
|
||||
xpram_device_t *xdev = bio->bi_disk->private_data;
|
||||
xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
unsigned int index;
|
||||
|
@ -2007,7 +2007,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
|
||||
req->timeout = 10 * HZ;
|
||||
rq->retries = 5;
|
||||
|
||||
blk_execute_rq_nowait(req->q, NULL, req, 1, eh_lock_door_done);
|
||||
blk_execute_rq_nowait(NULL, req, 1, eh_lock_door_done);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -269,7 +269,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
|
||||
/*
|
||||
* head injection *required* here otherwise quiesce won't work
|
||||
*/
|
||||
blk_execute_rq(req->q, NULL, req, 1);
|
||||
blk_execute_rq(NULL, req, 1);
|
||||
|
||||
/*
|
||||
* Some devices (USB mass-storage in particular) may transfer
|
||||
|
@ -665,12 +665,28 @@ static int sd_zbc_init_disk(struct scsi_disk *sdkp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sd_zbc_release_disk(struct scsi_disk *sdkp)
|
||||
static void sd_zbc_clear_zone_info(struct scsi_disk *sdkp)
|
||||
{
|
||||
/* Serialize against revalidate zones */
|
||||
mutex_lock(&sdkp->rev_mutex);
|
||||
|
||||
kvfree(sdkp->zones_wp_offset);
|
||||
sdkp->zones_wp_offset = NULL;
|
||||
kfree(sdkp->zone_wp_update_buf);
|
||||
sdkp->zone_wp_update_buf = NULL;
|
||||
|
||||
sdkp->nr_zones = 0;
|
||||
sdkp->rev_nr_zones = 0;
|
||||
sdkp->zone_blocks = 0;
|
||||
sdkp->rev_zone_blocks = 0;
|
||||
|
||||
mutex_unlock(&sdkp->rev_mutex);
|
||||
}
|
||||
|
||||
void sd_zbc_release_disk(struct scsi_disk *sdkp)
|
||||
{
|
||||
if (sd_is_zoned(sdkp))
|
||||
sd_zbc_clear_zone_info(sdkp);
|
||||
}
|
||||
|
||||
static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
|
||||
@ -769,6 +785,21 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
|
||||
*/
|
||||
return 0;
|
||||
|
||||
/* READ16/WRITE16 is mandatory for ZBC disks */
|
||||
sdkp->device->use_16_for_rw = 1;
|
||||
sdkp->device->use_10_for_rw = 0;
|
||||
|
||||
if (!blk_queue_is_zoned(q)) {
|
||||
/*
|
||||
* This can happen for a host aware disk with partitions.
|
||||
* The block device zone information was already cleared
|
||||
* by blk_queue_set_zoned(). Only clear the scsi disk zone
|
||||
* information and exit early.
|
||||
*/
|
||||
sd_zbc_clear_zone_info(sdkp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check zoned block device characteristics (unconstrained reads) */
|
||||
ret = sd_zbc_check_zoned_characteristics(sdkp, buf);
|
||||
if (ret)
|
||||
@ -789,9 +820,13 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
|
||||
blk_queue_max_active_zones(q, 0);
|
||||
nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
|
||||
|
||||
/* READ16/WRITE16 is mandatory for ZBC disks */
|
||||
sdkp->device->use_16_for_rw = 1;
|
||||
sdkp->device->use_10_for_rw = 0;
|
||||
/*
|
||||
* Per ZBC and ZAC specifications, writes in sequential write required
|
||||
* zones of host-managed devices must be aligned to the device physical
|
||||
* block size.
|
||||
*/
|
||||
if (blk_queue_zoned_model(q) == BLK_ZONED_HM)
|
||||
blk_queue_zone_write_granularity(q, sdkp->physical_block_size);
|
||||
|
||||
sdkp->rev_nr_zones = nr_zones;
|
||||
sdkp->rev_zone_blocks = zone_blocks;
|
||||
|
@ -829,8 +829,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
|
||||
|
||||
srp->rq->timeout = timeout;
|
||||
kref_get(&sfp->f_ref); /* sg_rq_end_io() does kref_put(). */
|
||||
blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk,
|
||||
srp->rq, at_head, sg_rq_end_io);
|
||||
blk_execute_rq_nowait(sdp->disk, srp->rq, at_head, sg_rq_end_io);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user