mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-17 05:45:20 +00:00
for-5.9/block-20200802
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl8m7YwQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpt+dEAC7a0HYuX2OrkyawBnsgd1QQR/soC7surec yDDa7SMM8cOq3935bfzcYHV9FWJszEGIknchiGb9R3/T+vmSohbvDsM5zgwya9u/ FHUIuTq324I6JWXKl30k4rwjiX9wQeMt+WZ5gC8KJYCWA296i2IpJwd0A45aaKuS x4bTjxqknE+fD4gQiMUSt+bmuOUAp81fEku3EPapCRYDPAj8f5uoY7R2arT/POwB b+s+AtXqzBymIqx1z0sZ/XcdZKmDuhdurGCWu7BfJFIzw5kQ2Qe3W8rUmrQ3pGut 8a21YfilhUFiBv+B4wptfrzJuzU6Ps0BXHCnBsQjzvXwq5uFcZH495mM/4E4OJvh SbjL2K4iFj+O1ngFkukG/F8tdEM1zKBYy2ZEkGoWKUpyQanbAaGI6QKKJA+DCdBi yPEb7yRAa5KfLqMiocm1qCEO1I56HRiNHaJVMqCPOZxLmpXj19Fs71yIRplP1Trv GGXdWZsccjuY6OljoXWdEfnxAr5zBsO3Yf2yFT95AD+egtGsU1oOzlqAaU1mtflw ABo452pvh6FFpxGXqz6oK4VqY4Et7WgXOiljA4yIGoPpG/08L1Yle4eVc2EE01Jb +BL49xNJVeUhGFrvUjPGl9kVMeLmubPFbmgrtipW+VRg9W8+Yirw7DPP6K+gbPAR RzAUdZFbWw== =abJG -----END PGP SIGNATURE----- Merge tag 'for-5.9/block-20200802' of git://git.kernel.dk/linux-block Pull core block updates from Jens Axboe: "Good amount of cleanups and tech debt removals in here, and as a result, the diffstat shows a nice net reduction in code. - Softirq completion cleanups (Christoph) - Stop using ->queuedata (Christoph) - Cleanup bd claiming (Christoph) - Use check_events, moving away from the legacy media change (Christoph) - Use inode i_blkbits consistently (Christoph) - Remove old unused writeback congestion bits (Christoph) - Cleanup/unify submission path (Christoph) - Use bio_uninit consistently, instead of bio_disassociate_blkg (Christoph) - sbitmap cleared bits handling (John) - Request merging blktrace event addition (Jan) - sysfs add/remove race fixes (Luis) - blk-mq tag fixes/optimizations (Ming) - Duplicate words in comments (Randy) - Flush deferral cleanup (Yufen) - IO context locking/retry fixes (John) - struct_size() usage (Gustavo) - blk-iocost fixes (Chengming) - blk-cgroup IO stats fixes (Boris) - Various little fixes" * tag 'for-5.9/block-20200802' of git://git.kernel.dk/linux-block: (135 commits) block: blk-timeout: delete duplicated word block: blk-mq-sched: delete duplicated word block: blk-mq: delete duplicated word block: genhd: delete duplicated words block: elevator: delete duplicated word and fix typos block: bio: delete duplicated words block: bfq-iosched: fix duplicated word iocost_monitor: start from the oldest usage index iocost: Fix check condition of iocg abs_vdebt block: Remove callback typedefs for blk_mq_ops block: Use non _rcu version of list functions for tag_set_list blk-cgroup: show global disk stats in root cgroup io.stat blk-cgroup: make iostat functions visible to stat printing block: improve discard bio alignment in __blkdev_issue_discard() block: change REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL to be odd numbers block: defer flush request no matter whether we have elevator block: make blk_timeout_init() static block: remove retry loop in ioc_release_fn() block: remove unnecessary ioc nested locking block: integrate bd_start_claiming into __blkdev_get ...
This commit is contained in:
commit
382625d0d4
@ -1483,8 +1483,7 @@ IO Interface Files
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
io.stat
|
||||
A read-only nested-keyed file which exists on non-root
|
||||
cgroups.
|
||||
A read-only nested-keyed file.
|
||||
|
||||
Lines are keyed by $MAJ:$MIN device numbers and not ordered.
|
||||
The following nested keys are defined.
|
||||
|
@ -1036,7 +1036,7 @@ Now the generic block layer performs partition-remapping early and thus
|
||||
provides drivers with a sector number relative to whole device, rather than
|
||||
having to take partition number into account in order to arrive at the true
|
||||
sector number. The routine blk_partition_remap() is invoked by
|
||||
generic_make_request even before invoking the queue specific make_request_fn,
|
||||
submit_bio_noacct even before invoking the queue specific ->submit_bio,
|
||||
so the i/o scheduler also gets to operate on whole disk sector numbers. This
|
||||
should typically not require changes to block drivers, it just never gets
|
||||
to invoke its own partition sector offset calculations since all bios
|
||||
|
@ -47,7 +47,7 @@ the Forced Unit Access is implemented. The REQ_PREFLUSH and REQ_FUA flags
|
||||
may both be set on a single bio.
|
||||
|
||||
|
||||
Implementation details for make_request_fn based block drivers
|
||||
Implementation details for bio based block drivers
|
||||
--------------------------------------------------------------
|
||||
|
||||
These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
|
||||
|
@ -157,7 +157,6 @@ with the kernel as a block device by registering the following general
|
||||
cdrom_release, /∗ release ∗/
|
||||
NULL, /∗ fsync ∗/
|
||||
NULL, /∗ fasync ∗/
|
||||
cdrom_media_changed, /∗ media change ∗/
|
||||
NULL /∗ revalidate ∗/
|
||||
};
|
||||
|
||||
@ -366,19 +365,6 @@ which may or may not be in the drive). If the drive is not a changer,
|
||||
CDS_DRIVE_NOT_READY /* something is wrong, tray is moving? */
|
||||
CDS_DISC_OK /* a disc is loaded and everything is fine */
|
||||
|
||||
::
|
||||
|
||||
int media_changed(struct cdrom_device_info *cdi, int disc_nr)
|
||||
|
||||
This function is very similar to the original function in $struct
|
||||
file_operations*. It returns 1 if the medium of the device *cdi->dev*
|
||||
has changed since the last call, and 0 otherwise. The parameter
|
||||
*disc_nr* identifies a specific slot in a juke-box, it should be
|
||||
ignored for single-disc drives. Note that by `re-routing` this
|
||||
function through *cdrom_media_changed()*, we can implement separate
|
||||
queues for the VFS and a new *ioctl()* function that can report device
|
||||
changes to software (e. g., an auto-mounting daemon).
|
||||
|
||||
::
|
||||
|
||||
int tray_move(struct cdrom_device_info *cdi, int position)
|
||||
@ -917,9 +903,7 @@ commands can be identified by the underscores in their names.
|
||||
maximum number of discs in the juke-box found in the *cdrom_dops*.
|
||||
`CDROM_MEDIA_CHANGED`
|
||||
Returns 1 if a disc has been changed since the last call.
|
||||
Note that calls to *cdrom_media_changed* by the VFS are treated
|
||||
by an independent queue, so both mechanisms will detect a
|
||||
media change once. For juke-boxes, an extra argument *arg*
|
||||
For juke-boxes, an extra argument *arg*
|
||||
specifies the slot for which the information is given. The special
|
||||
value *CDSL_CURRENT* requests that information about the currently
|
||||
selected slot be returned.
|
||||
|
@ -24,7 +24,7 @@ Available fault injection capabilities
|
||||
|
||||
injects disk IO errors on devices permitted by setting
|
||||
/sys/block/<device>/make-it-fail or
|
||||
/sys/block/<device>/<partition>/make-it-fail. (generic_make_request())
|
||||
/sys/block/<device>/<partition>/make-it-fail. (submit_bio_noacct())
|
||||
|
||||
- fail_mmc_request
|
||||
|
||||
|
@ -467,7 +467,6 @@ prototypes::
|
||||
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
|
||||
int (*direct_access) (struct block_device *, sector_t, void **,
|
||||
unsigned long *);
|
||||
int (*media_changed) (struct gendisk *);
|
||||
void (*unlock_native_capacity) (struct gendisk *);
|
||||
int (*revalidate_disk) (struct gendisk *);
|
||||
int (*getgeo)(struct block_device *, struct hd_geometry *);
|
||||
@ -483,14 +482,13 @@ release: yes
|
||||
ioctl: no
|
||||
compat_ioctl: no
|
||||
direct_access: no
|
||||
media_changed: no
|
||||
unlock_native_capacity: no
|
||||
revalidate_disk: no
|
||||
getgeo: no
|
||||
swap_slot_free_notify: no (see below)
|
||||
======================= ===================
|
||||
|
||||
media_changed, unlock_native_capacity and revalidate_disk are called only from
|
||||
unlock_native_capacity and revalidate_disk are called only from
|
||||
check_disk_change().
|
||||
|
||||
swap_slot_free_notify is called with swap_lock and sometimes the page lock
|
||||
|
@ -1453,7 +1453,7 @@ function-trace, we get a much larger output::
|
||||
=> __blk_run_queue_uncond
|
||||
=> __blk_run_queue
|
||||
=> blk_queue_bio
|
||||
=> generic_make_request
|
||||
=> submit_bio_noacct
|
||||
=> submit_bio
|
||||
=> submit_bh
|
||||
=> __ext3_get_inode_loc
|
||||
@ -1738,7 +1738,7 @@ tracers.
|
||||
=> __blk_run_queue_uncond
|
||||
=> __blk_run_queue
|
||||
=> blk_queue_bio
|
||||
=> generic_make_request
|
||||
=> submit_bio_noacct
|
||||
=> submit_bio
|
||||
=> submit_bh
|
||||
=> ext3_bread
|
||||
|
@ -59,9 +59,9 @@ struct nfhd_device {
|
||||
struct gendisk *disk;
|
||||
};
|
||||
|
||||
static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio)
|
||||
static blk_qc_t nfhd_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct nfhd_device *dev = queue->queuedata;
|
||||
struct nfhd_device *dev = bio->bi_disk->private_data;
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
int dir, len, shift;
|
||||
@ -93,6 +93,7 @@ static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||
|
||||
static const struct block_device_operations nfhd_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = nfhd_submit_bio,
|
||||
.getgeo = nfhd_getgeo,
|
||||
};
|
||||
|
||||
@ -118,11 +119,10 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
|
||||
dev->bsize = bsize;
|
||||
dev->bshift = ffs(bsize) - 10;
|
||||
|
||||
dev->queue = blk_alloc_queue(nfhd_make_request, NUMA_NO_NODE);
|
||||
dev->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (dev->queue == NULL)
|
||||
goto free_dev;
|
||||
|
||||
dev->queue->queuedata = dev;
|
||||
blk_queue_logical_block_size(dev->queue, bsize);
|
||||
|
||||
dev->disk = alloc_disk(16);
|
||||
|
@ -101,9 +101,9 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector,
|
||||
spin_unlock(&dev->lock);
|
||||
}
|
||||
|
||||
static blk_qc_t simdisk_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t simdisk_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct simdisk *dev = q->queuedata;
|
||||
struct simdisk *dev = bio->bi_disk->private_data;
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
@ -127,8 +127,6 @@ static int simdisk_open(struct block_device *bdev, fmode_t mode)
|
||||
struct simdisk *dev = bdev->bd_disk->private_data;
|
||||
|
||||
spin_lock(&dev->lock);
|
||||
if (!dev->users)
|
||||
check_disk_change(bdev);
|
||||
++dev->users;
|
||||
spin_unlock(&dev->lock);
|
||||
return 0;
|
||||
@ -144,6 +142,7 @@ static void simdisk_release(struct gendisk *disk, fmode_t mode)
|
||||
|
||||
static const struct block_device_operations simdisk_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = simdisk_submit_bio,
|
||||
.open = simdisk_open,
|
||||
.release = simdisk_release,
|
||||
};
|
||||
@ -267,14 +266,12 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
|
||||
spin_lock_init(&dev->lock);
|
||||
dev->users = 0;
|
||||
|
||||
dev->queue = blk_alloc_queue(simdisk_make_request, NUMA_NO_NODE);
|
||||
dev->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (dev->queue == NULL) {
|
||||
pr_err("blk_alloc_queue failed\n");
|
||||
goto out_alloc_queue;
|
||||
}
|
||||
|
||||
dev->queue->queuedata = dev;
|
||||
|
||||
dev->gd = alloc_disk(SIMDISK_MINORS);
|
||||
if (dev->gd == NULL) {
|
||||
pr_err("alloc_disk failed\n");
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
|
||||
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
|
||||
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
|
||||
blk-exec.o blk-merge.o blk-timeout.o \
|
||||
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
|
||||
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
|
||||
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o
|
||||
|
@ -4714,7 +4714,7 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
||||
* some unlucky request wait for as long as the device
|
||||
* wishes.
|
||||
*
|
||||
* Of course, serving one request at at time may cause loss of
|
||||
* Of course, serving one request at a time may cause loss of
|
||||
* throughput.
|
||||
*/
|
||||
if (bfqd->strict_guarantees && bfqd->rq_in_driver > 0)
|
||||
|
165
block/bio.c
165
block/bio.c
@ -234,8 +234,12 @@ fallback:
|
||||
|
||||
void bio_uninit(struct bio *bio)
|
||||
{
|
||||
bio_disassociate_blkg(bio);
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
if (bio->bi_blkg) {
|
||||
blkg_put(bio->bi_blkg);
|
||||
bio->bi_blkg = NULL;
|
||||
}
|
||||
#endif
|
||||
if (bio_integrity(bio))
|
||||
bio_integrity_free(bio);
|
||||
|
||||
@ -354,7 +358,7 @@ static void bio_alloc_rescue(struct work_struct *work)
|
||||
if (!bio)
|
||||
break;
|
||||
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
}
|
||||
|
||||
@ -412,19 +416,19 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
|
||||
* submit the previously allocated bio for IO before attempting to allocate
|
||||
* a new one. Failure to do so can cause deadlocks under memory pressure.
|
||||
*
|
||||
* Note that when running under generic_make_request() (i.e. any block
|
||||
* Note that when running under submit_bio_noacct() (i.e. any block
|
||||
* driver), bios are not submitted until after you return - see the code in
|
||||
* generic_make_request() that converts recursion into iteration, to prevent
|
||||
* submit_bio_noacct() that converts recursion into iteration, to prevent
|
||||
* stack overflows.
|
||||
*
|
||||
* This would normally mean allocating multiple bios under
|
||||
* generic_make_request() would be susceptible to deadlocks, but we have
|
||||
* submit_bio_noacct() would be susceptible to deadlocks, but we have
|
||||
* deadlock avoidance code that resubmits any blocked bios from a rescuer
|
||||
* thread.
|
||||
*
|
||||
* However, we do not guarantee forward progress for allocations from other
|
||||
* mempools. Doing multiple allocations from the same mempool under
|
||||
* generic_make_request() should be avoided - instead, use bio_set's front_pad
|
||||
* submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
|
||||
* for per bio allocations.
|
||||
*
|
||||
* RETURNS:
|
||||
@ -444,9 +448,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
|
||||
if (nr_iovecs > UIO_MAXIOV)
|
||||
return NULL;
|
||||
|
||||
p = kmalloc(sizeof(struct bio) +
|
||||
nr_iovecs * sizeof(struct bio_vec),
|
||||
gfp_mask);
|
||||
p = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
|
||||
front_pad = 0;
|
||||
inline_vecs = nr_iovecs;
|
||||
} else {
|
||||
@ -455,14 +457,14 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
|
||||
nr_iovecs > 0))
|
||||
return NULL;
|
||||
/*
|
||||
* generic_make_request() converts recursion to iteration; this
|
||||
* submit_bio_noacct() converts recursion to iteration; this
|
||||
* means if we're running beneath it, any bios we allocate and
|
||||
* submit will not be submitted (and thus freed) until after we
|
||||
* return.
|
||||
*
|
||||
* This exposes us to a potential deadlock if we allocate
|
||||
* multiple bios from the same bio_set() while running
|
||||
* underneath generic_make_request(). If we were to allocate
|
||||
* underneath submit_bio_noacct(). If we were to allocate
|
||||
* multiple bios (say a stacking block driver that was splitting
|
||||
* bios), we would deadlock if we exhausted the mempool's
|
||||
* reserve.
|
||||
@ -860,7 +862,7 @@ EXPORT_SYMBOL(bio_add_pc_page);
|
||||
* @same_page: return if the segment has been merged inside the same page
|
||||
*
|
||||
* Try to add the data at @page + @off to the last bvec of @bio. This is a
|
||||
* a useful optimisation for file systems with a block size smaller than the
|
||||
* useful optimisation for file systems with a block size smaller than the
|
||||
* page size.
|
||||
*
|
||||
* Warn if (@len, @off) crosses pages in case that @same_page is true.
|
||||
@ -986,7 +988,7 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
|
||||
* Pins pages from *iter and appends them to @bio's bvec array. The
|
||||
* pages will have to be released using put_page() when done.
|
||||
* For multi-segment *iter, this function only adds pages from the
|
||||
* the next non-empty segment of the iov iterator.
|
||||
* next non-empty segment of the iov iterator.
|
||||
*/
|
||||
static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
@ -1625,141 +1627,6 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
|
||||
}
|
||||
EXPORT_SYMBOL(bioset_init_from_src);
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
|
||||
/**
|
||||
* bio_disassociate_blkg - puts back the blkg reference if associated
|
||||
* @bio: target bio
|
||||
*
|
||||
* Helper to disassociate the blkg from @bio if a blkg is associated.
|
||||
*/
|
||||
void bio_disassociate_blkg(struct bio *bio)
|
||||
{
|
||||
if (bio->bi_blkg) {
|
||||
blkg_put(bio->bi_blkg);
|
||||
bio->bi_blkg = NULL;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_disassociate_blkg);
|
||||
|
||||
/**
|
||||
* __bio_associate_blkg - associate a bio with the a blkg
|
||||
* @bio: target bio
|
||||
* @blkg: the blkg to associate
|
||||
*
|
||||
* This tries to associate @bio with the specified @blkg. Association failure
|
||||
* is handled by walking up the blkg tree. Therefore, the blkg associated can
|
||||
* be anything between @blkg and the root_blkg. This situation only happens
|
||||
* when a cgroup is dying and then the remaining bios will spill to the closest
|
||||
* alive blkg.
|
||||
*
|
||||
* A reference will be taken on the @blkg and will be released when @bio is
|
||||
* freed.
|
||||
*/
|
||||
static void __bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
|
||||
{
|
||||
bio_disassociate_blkg(bio);
|
||||
|
||||
bio->bi_blkg = blkg_tryget_closest(blkg);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_associate_blkg_from_css - associate a bio with a specified css
|
||||
* @bio: target bio
|
||||
* @css: target css
|
||||
*
|
||||
* Associate @bio with the blkg found by combining the css's blkg and the
|
||||
* request_queue of the @bio. This falls back to the queue's root_blkg if
|
||||
* the association fails with the css.
|
||||
*/
|
||||
void bio_associate_blkg_from_css(struct bio *bio,
|
||||
struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
if (!css || !css->parent)
|
||||
blkg = q->root_blkg;
|
||||
else
|
||||
blkg = blkg_lookup_create(css_to_blkcg(css), q);
|
||||
|
||||
__bio_associate_blkg(bio, blkg);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
/**
|
||||
* bio_associate_blkg_from_page - associate a bio with the page's blkg
|
||||
* @bio: target bio
|
||||
* @page: the page to lookup the blkcg from
|
||||
*
|
||||
* Associate @bio with the blkg from @page's owning memcg and the respective
|
||||
* request_queue. If cgroup_e_css returns %NULL, fall back to the queue's
|
||||
* root_blkg.
|
||||
*/
|
||||
void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
if (!page->mem_cgroup)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
|
||||
bio_associate_blkg_from_css(bio, css);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
/**
|
||||
* bio_associate_blkg - associate a bio with a blkg
|
||||
* @bio: target bio
|
||||
*
|
||||
* Associate @bio with the blkg found from the bio's css and request_queue.
|
||||
* If one is not found, bio_lookup_blkg() creates the blkg. If a blkg is
|
||||
* already associated, the css is reused and association redone as the
|
||||
* request_queue may have changed.
|
||||
*/
|
||||
void bio_associate_blkg(struct bio *bio)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
if (bio->bi_blkg)
|
||||
css = &bio_blkcg(bio)->css;
|
||||
else
|
||||
css = blkcg_css();
|
||||
|
||||
bio_associate_blkg_from_css(bio, css);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_blkg);
|
||||
|
||||
/**
|
||||
* bio_clone_blkg_association - clone blkg association from src to dst bio
|
||||
* @dst: destination bio
|
||||
* @src: source bio
|
||||
*/
|
||||
void bio_clone_blkg_association(struct bio *dst, struct bio *src)
|
||||
{
|
||||
rcu_read_lock();
|
||||
|
||||
if (src->bi_blkg)
|
||||
__bio_associate_blkg(dst, src->bi_blkg);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
|
||||
#endif /* CONFIG_BLK_CGROUP */
|
||||
|
||||
static void __init biovec_init_slabs(void)
|
||||
{
|
||||
int i;
|
||||
|
@ -95,9 +95,6 @@ static void __blkg_release(struct rcu_head *rcu)
|
||||
css_put(&blkg->blkcg->css);
|
||||
if (blkg->parent)
|
||||
blkg_put(blkg->parent);
|
||||
|
||||
wb_congested_put(blkg->wb_congested);
|
||||
|
||||
blkg_free(blkg);
|
||||
}
|
||||
|
||||
@ -227,7 +224,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
|
||||
struct blkcg_gq *new_blkg)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
struct bdi_writeback_congested *wb_congested;
|
||||
int i, ret;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
@ -245,31 +241,22 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
|
||||
goto err_free_blkg;
|
||||
}
|
||||
|
||||
wb_congested = wb_congested_get_create(q->backing_dev_info,
|
||||
blkcg->css.id,
|
||||
GFP_NOWAIT | __GFP_NOWARN);
|
||||
if (!wb_congested) {
|
||||
ret = -ENOMEM;
|
||||
goto err_put_css;
|
||||
}
|
||||
|
||||
/* allocate */
|
||||
if (!new_blkg) {
|
||||
new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT | __GFP_NOWARN);
|
||||
if (unlikely(!new_blkg)) {
|
||||
ret = -ENOMEM;
|
||||
goto err_put_congested;
|
||||
goto err_put_css;
|
||||
}
|
||||
}
|
||||
blkg = new_blkg;
|
||||
blkg->wb_congested = wb_congested;
|
||||
|
||||
/* link parent */
|
||||
if (blkcg_parent(blkcg)) {
|
||||
blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
|
||||
if (WARN_ON_ONCE(!blkg->parent)) {
|
||||
ret = -ENODEV;
|
||||
goto err_put_congested;
|
||||
goto err_put_css;
|
||||
}
|
||||
blkg_get(blkg->parent);
|
||||
}
|
||||
@ -306,8 +293,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
|
||||
blkg_put(blkg);
|
||||
return ERR_PTR(ret);
|
||||
|
||||
err_put_congested:
|
||||
wb_congested_put(wb_congested);
|
||||
err_put_css:
|
||||
css_put(&blkcg->css);
|
||||
err_free_blkg:
|
||||
@ -316,30 +301,35 @@ err_free_blkg:
|
||||
}
|
||||
|
||||
/**
|
||||
* __blkg_lookup_create - lookup blkg, try to create one if not there
|
||||
* blkg_lookup_create - lookup blkg, try to create one if not there
|
||||
* @blkcg: blkcg of interest
|
||||
* @q: request_queue of interest
|
||||
*
|
||||
* Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to
|
||||
* create one. blkg creation is performed recursively from blkcg_root such
|
||||
* that all non-root blkg's have access to the parent blkg. This function
|
||||
* should be called under RCU read lock and @q->queue_lock.
|
||||
* should be called under RCU read lock and takes @q->queue_lock.
|
||||
*
|
||||
* Returns the blkg or the closest blkg if blkg_create() fails as it walks
|
||||
* down from root.
|
||||
*/
|
||||
struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q)
|
||||
static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
unsigned long flags;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
lockdep_assert_held(&q->queue_lock);
|
||||
|
||||
blkg = __blkg_lookup(blkcg, q, true);
|
||||
blkg = blkg_lookup(blkcg, q);
|
||||
if (blkg)
|
||||
return blkg;
|
||||
|
||||
spin_lock_irqsave(&q->queue_lock, flags);
|
||||
blkg = __blkg_lookup(blkcg, q, true);
|
||||
if (blkg)
|
||||
goto found;
|
||||
|
||||
/*
|
||||
* Create blkgs walking down from blkcg_root to @blkcg, so that all
|
||||
* non-root blkgs have access to their parents. Returns the closest
|
||||
@ -362,34 +352,16 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
||||
}
|
||||
|
||||
blkg = blkg_create(pos, q, NULL);
|
||||
if (IS_ERR(blkg))
|
||||
return ret_blkg;
|
||||
if (IS_ERR(blkg)) {
|
||||
blkg = ret_blkg;
|
||||
break;
|
||||
}
|
||||
if (pos == blkcg)
|
||||
return blkg;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_lookup_create - find or create a blkg
|
||||
* @blkcg: target block cgroup
|
||||
* @q: target request_queue
|
||||
*
|
||||
* This looks up or creates the blkg representing the unique pair
|
||||
* of the blkcg and the request_queue.
|
||||
*/
|
||||
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q)
|
||||
{
|
||||
struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
|
||||
|
||||
if (unlikely(!blkg)) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->queue_lock, flags);
|
||||
blkg = __blkg_lookup_create(blkcg, q);
|
||||
spin_unlock_irqrestore(&q->queue_lock, flags);
|
||||
break;
|
||||
}
|
||||
|
||||
found:
|
||||
spin_unlock_irqrestore(&q->queue_lock, flags);
|
||||
return blkg;
|
||||
}
|
||||
|
||||
@ -739,12 +711,137 @@ void blkg_conf_finish(struct blkg_conf_ctx *ctx)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_finish);
|
||||
|
||||
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] = src->bytes[i];
|
||||
dst->ios[i] = src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] += src->bytes[i];
|
||||
dst->ios[i] += src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] -= src->bytes[i];
|
||||
dst->ios[i] -= src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
struct blkcg_gq *parent = blkg->parent;
|
||||
struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
|
||||
struct blkg_iostat cur, delta;
|
||||
unsigned int seq;
|
||||
|
||||
/* fetch the current per-cpu values */
|
||||
do {
|
||||
seq = u64_stats_fetch_begin(&bisc->sync);
|
||||
blkg_iostat_set(&cur, &bisc->cur);
|
||||
} while (u64_stats_fetch_retry(&bisc->sync, seq));
|
||||
|
||||
/* propagate percpu delta to global */
|
||||
u64_stats_update_begin(&blkg->iostat.sync);
|
||||
blkg_iostat_set(&delta, &cur);
|
||||
blkg_iostat_sub(&delta, &bisc->last);
|
||||
blkg_iostat_add(&blkg->iostat.cur, &delta);
|
||||
blkg_iostat_add(&bisc->last, &delta);
|
||||
u64_stats_update_end(&blkg->iostat.sync);
|
||||
|
||||
/* propagate global delta to parent */
|
||||
if (parent) {
|
||||
u64_stats_update_begin(&parent->iostat.sync);
|
||||
blkg_iostat_set(&delta, &blkg->iostat.cur);
|
||||
blkg_iostat_sub(&delta, &blkg->iostat.last);
|
||||
blkg_iostat_add(&parent->iostat.cur, &delta);
|
||||
blkg_iostat_add(&blkg->iostat.last, &delta);
|
||||
u64_stats_update_end(&parent->iostat.sync);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* The rstat algorithms intentionally don't handle the root cgroup to avoid
|
||||
* incurring overhead when no cgroups are defined. For that reason,
|
||||
* cgroup_rstat_flush in blkcg_print_stat does not actually fill out the
|
||||
* iostat in the root cgroup's blkcg_gq.
|
||||
*
|
||||
* However, we would like to re-use the printing code between the root and
|
||||
* non-root cgroups to the extent possible. For that reason, we simulate
|
||||
* flushing the root cgroup's stats by explicitly filling in the iostat
|
||||
* with disk level statistics.
|
||||
*/
|
||||
static void blkcg_fill_root_iostats(void)
|
||||
{
|
||||
struct class_dev_iter iter;
|
||||
struct device *dev;
|
||||
|
||||
class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
|
||||
while ((dev = class_dev_iter_next(&iter))) {
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
struct hd_struct *part = disk_get_part(disk, 0);
|
||||
struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue);
|
||||
struct blkg_iostat tmp;
|
||||
int cpu;
|
||||
|
||||
memset(&tmp, 0, sizeof(tmp));
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct disk_stats *cpu_dkstats;
|
||||
|
||||
cpu_dkstats = per_cpu_ptr(part->dkstats, cpu);
|
||||
tmp.ios[BLKG_IOSTAT_READ] +=
|
||||
cpu_dkstats->ios[STAT_READ];
|
||||
tmp.ios[BLKG_IOSTAT_WRITE] +=
|
||||
cpu_dkstats->ios[STAT_WRITE];
|
||||
tmp.ios[BLKG_IOSTAT_DISCARD] +=
|
||||
cpu_dkstats->ios[STAT_DISCARD];
|
||||
// convert sectors to bytes
|
||||
tmp.bytes[BLKG_IOSTAT_READ] +=
|
||||
cpu_dkstats->sectors[STAT_READ] << 9;
|
||||
tmp.bytes[BLKG_IOSTAT_WRITE] +=
|
||||
cpu_dkstats->sectors[STAT_WRITE] << 9;
|
||||
tmp.bytes[BLKG_IOSTAT_DISCARD] +=
|
||||
cpu_dkstats->sectors[STAT_DISCARD] << 9;
|
||||
|
||||
u64_stats_update_begin(&blkg->iostat.sync);
|
||||
blkg_iostat_set(&blkg->iostat.cur, &tmp);
|
||||
u64_stats_update_end(&blkg->iostat.sync);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int blkcg_print_stat(struct seq_file *sf, void *v)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
cgroup_rstat_flush(blkcg->css.cgroup);
|
||||
if (!seq_css(sf)->parent)
|
||||
blkcg_fill_root_iostats();
|
||||
else
|
||||
cgroup_rstat_flush(blkcg->css.cgroup);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
@ -833,7 +930,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
|
||||
static struct cftype blkcg_files[] = {
|
||||
{
|
||||
.name = "stat",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = blkcg_print_stat,
|
||||
},
|
||||
{ } /* terminate */
|
||||
@ -1025,7 +1121,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
|
||||
* blkcg_init_queue - initialize blkcg part of request queue
|
||||
* @q: request_queue to initialize
|
||||
*
|
||||
* Called from __blk_alloc_queue(). Responsible for initializing blkcg
|
||||
* Called from blk_alloc_queue(). Responsible for initializing blkcg
|
||||
* part of new request_queue @q.
|
||||
*
|
||||
* RETURNS:
|
||||
@ -1114,77 +1210,6 @@ static int blkcg_can_attach(struct cgroup_taskset *tset)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] = src->bytes[i];
|
||||
dst->ios[i] = src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] += src->bytes[i];
|
||||
dst->ios[i] += src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] -= src->bytes[i];
|
||||
dst->ios[i] -= src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
struct blkcg_gq *parent = blkg->parent;
|
||||
struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
|
||||
struct blkg_iostat cur, delta;
|
||||
unsigned seq;
|
||||
|
||||
/* fetch the current per-cpu values */
|
||||
do {
|
||||
seq = u64_stats_fetch_begin(&bisc->sync);
|
||||
blkg_iostat_set(&cur, &bisc->cur);
|
||||
} while (u64_stats_fetch_retry(&bisc->sync, seq));
|
||||
|
||||
/* propagate percpu delta to global */
|
||||
u64_stats_update_begin(&blkg->iostat.sync);
|
||||
blkg_iostat_set(&delta, &cur);
|
||||
blkg_iostat_sub(&delta, &bisc->last);
|
||||
blkg_iostat_add(&blkg->iostat.cur, &delta);
|
||||
blkg_iostat_add(&bisc->last, &delta);
|
||||
u64_stats_update_end(&blkg->iostat.sync);
|
||||
|
||||
/* propagate global delta to parent */
|
||||
if (parent) {
|
||||
u64_stats_update_begin(&parent->iostat.sync);
|
||||
blkg_iostat_set(&delta, &blkg->iostat.cur);
|
||||
blkg_iostat_sub(&delta, &blkg->iostat.last);
|
||||
blkg_iostat_add(&parent->iostat.cur, &delta);
|
||||
blkg_iostat_add(&blkg->iostat.last, &delta);
|
||||
u64_stats_update_end(&parent->iostat.sync);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blkcg_bind(struct cgroup_subsys_state *root_css)
|
||||
{
|
||||
int i;
|
||||
@ -1727,6 +1752,139 @@ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
|
||||
atomic64_add(delta, &blkg->delay_nsec);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_tryget_closest - try and get a blkg ref on the closet blkg
|
||||
* @bio: target bio
|
||||
* @css: target css
|
||||
*
|
||||
* As the failure mode here is to walk up the blkg tree, this ensure that the
|
||||
* blkg->parent pointers are always valid. This returns the blkg that it ended
|
||||
* up taking a reference on or %NULL if no reference was taken.
|
||||
*/
|
||||
static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
|
||||
struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct blkcg_gq *blkg, *ret_blkg = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue);
|
||||
while (blkg) {
|
||||
if (blkg_tryget(blkg)) {
|
||||
ret_blkg = blkg;
|
||||
break;
|
||||
}
|
||||
blkg = blkg->parent;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret_blkg;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_associate_blkg_from_css - associate a bio with a specified css
|
||||
* @bio: target bio
|
||||
* @css: target css
|
||||
*
|
||||
* Associate @bio with the blkg found by combining the css's blkg and the
|
||||
* request_queue of the @bio. An association failure is handled by walking up
|
||||
* the blkg tree. Therefore, the blkg associated can be anything between @blkg
|
||||
* and q->root_blkg. This situation only happens when a cgroup is dying and
|
||||
* then the remaining bios will spill to the closest alive blkg.
|
||||
*
|
||||
* A reference will be taken on the blkg and will be released when @bio is
|
||||
* freed.
|
||||
*/
|
||||
void bio_associate_blkg_from_css(struct bio *bio,
|
||||
struct cgroup_subsys_state *css)
|
||||
{
|
||||
if (bio->bi_blkg)
|
||||
blkg_put(bio->bi_blkg);
|
||||
|
||||
if (css && css->parent) {
|
||||
bio->bi_blkg = blkg_tryget_closest(bio, css);
|
||||
} else {
|
||||
blkg_get(bio->bi_disk->queue->root_blkg);
|
||||
bio->bi_blkg = bio->bi_disk->queue->root_blkg;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
|
||||
|
||||
/**
|
||||
* bio_associate_blkg - associate a bio with a blkg
|
||||
* @bio: target bio
|
||||
*
|
||||
* Associate @bio with the blkg found from the bio's css and request_queue.
|
||||
* If one is not found, bio_lookup_blkg() creates the blkg. If a blkg is
|
||||
* already associated, the css is reused and association redone as the
|
||||
* request_queue may have changed.
|
||||
*/
|
||||
void bio_associate_blkg(struct bio *bio)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
if (bio->bi_blkg)
|
||||
css = &bio_blkcg(bio)->css;
|
||||
else
|
||||
css = blkcg_css();
|
||||
|
||||
bio_associate_blkg_from_css(bio, css);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_blkg);
|
||||
|
||||
/**
|
||||
* bio_clone_blkg_association - clone blkg association from src to dst bio
|
||||
* @dst: destination bio
|
||||
* @src: source bio
|
||||
*/
|
||||
void bio_clone_blkg_association(struct bio *dst, struct bio *src)
|
||||
{
|
||||
if (src->bi_blkg) {
|
||||
if (dst->bi_blkg)
|
||||
blkg_put(dst->bi_blkg);
|
||||
blkg_get(src->bi_blkg);
|
||||
dst->bi_blkg = src->bi_blkg;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
|
||||
|
||||
static int blk_cgroup_io_type(struct bio *bio)
|
||||
{
|
||||
if (op_is_discard(bio->bi_opf))
|
||||
return BLKG_IOSTAT_DISCARD;
|
||||
if (op_is_write(bio->bi_opf))
|
||||
return BLKG_IOSTAT_WRITE;
|
||||
return BLKG_IOSTAT_READ;
|
||||
}
|
||||
|
||||
void blk_cgroup_bio_start(struct bio *bio)
|
||||
{
|
||||
int rwd = blk_cgroup_io_type(bio), cpu;
|
||||
struct blkg_iostat_set *bis;
|
||||
|
||||
cpu = get_cpu();
|
||||
bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
|
||||
u64_stats_update_begin(&bis->sync);
|
||||
|
||||
/*
|
||||
* If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
|
||||
* bio and we would have already accounted for the size of the bio.
|
||||
*/
|
||||
if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
|
||||
bio_set_flag(bio, BIO_CGROUP_ACCT);
|
||||
bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
|
||||
}
|
||||
bis->cur.ios[rwd]++;
|
||||
|
||||
u64_stats_update_end(&bis->sync);
|
||||
if (cgroup_subsys_on_dfl(io_cgrp_subsys))
|
||||
cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu);
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static int __init blkcg_init(void)
|
||||
{
|
||||
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
|
||||
|
308
block/blk-core.c
308
block/blk-core.c
@ -51,9 +51,7 @@
|
||||
#include "blk-pm.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct dentry *blk_debugfs_root;
|
||||
#endif
|
||||
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
|
||||
@ -285,7 +283,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags);
|
||||
* A block device may call blk_sync_queue to ensure that any
|
||||
* such activity is cancelled, thus allowing it to release resources
|
||||
* that the callbacks might use. The caller must already have made sure
|
||||
* that its ->make_request_fn will not re-add plugging prior to calling
|
||||
* that its ->submit_bio will not re-add plugging prior to calling
|
||||
* this function.
|
||||
*
|
||||
* This function does not cancel any asynchronous activity arising
|
||||
@ -321,6 +319,16 @@ void blk_clear_pm_only(struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_clear_pm_only);
|
||||
|
||||
/**
|
||||
* blk_put_queue - decrement the request_queue refcount
|
||||
* @q: the request_queue structure to decrement the refcount for
|
||||
*
|
||||
* Decrements the refcount of the request_queue kobject. When this reaches 0
|
||||
* we'll have blk_release_queue() called.
|
||||
*
|
||||
* Context: Any context, but the last reference must not be dropped from
|
||||
* atomic context.
|
||||
*/
|
||||
void blk_put_queue(struct request_queue *q)
|
||||
{
|
||||
kobject_put(&q->kobj);
|
||||
@ -352,9 +360,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
|
||||
*
|
||||
* Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
|
||||
* put it. All future requests will be failed immediately with -ENODEV.
|
||||
*
|
||||
* Context: can sleep
|
||||
*/
|
||||
void blk_cleanup_queue(struct request_queue *q)
|
||||
{
|
||||
/* cannot be called from atomic context */
|
||||
might_sleep();
|
||||
|
||||
WARN_ON_ONCE(blk_queue_registered(q));
|
||||
|
||||
/* mark @q DYING, no new request or merges will be allowed afterwards */
|
||||
@ -497,7 +510,7 @@ static void blk_timeout_work(struct work_struct *work)
|
||||
{
|
||||
}
|
||||
|
||||
struct request_queue *__blk_alloc_queue(int node_id)
|
||||
struct request_queue *blk_alloc_queue(int node_id)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
@ -540,9 +553,7 @@ struct request_queue *__blk_alloc_queue(int node_id)
|
||||
|
||||
kobject_init(&q->kobj, &blk_queue_ktype);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_IO_TRACE
|
||||
mutex_init(&q->blk_trace_mutex);
|
||||
#endif
|
||||
mutex_init(&q->debugfs_mutex);
|
||||
mutex_init(&q->sysfs_lock);
|
||||
mutex_init(&q->sysfs_dir_lock);
|
||||
spin_lock_init(&q->queue_lock);
|
||||
@ -564,6 +575,7 @@ struct request_queue *__blk_alloc_queue(int node_id)
|
||||
|
||||
blk_queue_dma_alignment(q, 511);
|
||||
blk_set_default_limits(&q->limits);
|
||||
q->nr_requests = BLKDEV_MAX_RQ;
|
||||
|
||||
return q;
|
||||
|
||||
@ -581,23 +593,16 @@ fail_q:
|
||||
kmem_cache_free(blk_requestq_cachep, q);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id)
|
||||
{
|
||||
struct request_queue *q;
|
||||
|
||||
if (WARN_ON_ONCE(!make_request))
|
||||
return NULL;
|
||||
|
||||
q = __blk_alloc_queue(node_id);
|
||||
if (!q)
|
||||
return NULL;
|
||||
q->make_request_fn = make_request;
|
||||
q->nr_requests = BLKDEV_MAX_RQ;
|
||||
return q;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_alloc_queue);
|
||||
|
||||
/**
|
||||
* blk_get_queue - increment the request_queue refcount
|
||||
* @q: the request_queue structure to increment the refcount for
|
||||
*
|
||||
* Increment the refcount of the request_queue kobject.
|
||||
*
|
||||
* Context: Any context.
|
||||
*/
|
||||
bool blk_get_queue(struct request_queue *q)
|
||||
{
|
||||
if (likely(!blk_queue_dying(q))) {
|
||||
@ -850,8 +855,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
|
||||
return false;
|
||||
|
||||
WARN_ONCE(1,
|
||||
"generic_make_request: Trying to write "
|
||||
"to read-only block-device %s (partno %d)\n",
|
||||
"Trying to write to read-only block-device %s (partno %d)\n",
|
||||
bio_devname(bio, b), part->partno);
|
||||
/* Older lvm-tools actually trigger this */
|
||||
return false;
|
||||
@ -952,25 +956,13 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static noinline_for_stack bool
|
||||
generic_make_request_checks(struct bio *bio)
|
||||
static noinline_for_stack bool submit_bio_checks(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int nr_sectors = bio_sectors(bio);
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
blk_status_t status = BLK_STS_IOERR;
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
might_sleep();
|
||||
|
||||
q = bio->bi_disk->queue;
|
||||
if (unlikely(!q)) {
|
||||
printk(KERN_ERR
|
||||
"generic_make_request: Trying to access "
|
||||
"nonexistent block-device %s (%Lu)\n",
|
||||
bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
/*
|
||||
* For a REQ_NOWAIT based request, return -EOPNOTSUPP
|
||||
* if queue is not a request based queue.
|
||||
@ -992,14 +984,13 @@ generic_make_request_checks(struct bio *bio)
|
||||
}
|
||||
|
||||
/*
|
||||
* Filter flush bio's early so that make_request based
|
||||
* drivers without flush support don't have to worry
|
||||
* about them.
|
||||
* Filter flush bio's early so that bio based drivers without flush
|
||||
* support don't have to worry about them.
|
||||
*/
|
||||
if (op_is_flush(bio->bi_opf) &&
|
||||
!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
|
||||
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
|
||||
if (!nr_sectors) {
|
||||
if (!bio_sectors(bio)) {
|
||||
status = BLK_STS_OK;
|
||||
goto end_io;
|
||||
}
|
||||
@ -1054,8 +1045,13 @@ generic_make_request_checks(struct bio *bio)
|
||||
if (unlikely(!current->io_context))
|
||||
create_task_io_context(current, GFP_ATOMIC, q->node);
|
||||
|
||||
if (!blkcg_bio_issue_check(q, bio))
|
||||
if (blk_throtl_bio(bio)) {
|
||||
blkcg_bio_issue_init(bio);
|
||||
return false;
|
||||
}
|
||||
|
||||
blk_cgroup_bio_start(bio);
|
||||
blkcg_bio_issue_init(bio);
|
||||
|
||||
if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
trace_block_bio_queue(q, bio);
|
||||
@ -1074,22 +1070,116 @@ end_io:
|
||||
return false;
|
||||
}
|
||||
|
||||
static blk_qc_t do_make_request(struct bio *bio)
|
||||
static blk_qc_t __submit_bio(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct gendisk *disk = bio->bi_disk;
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
|
||||
if (blk_crypto_bio_prep(&bio)) {
|
||||
if (!q->make_request_fn)
|
||||
return blk_mq_make_request(q, bio);
|
||||
ret = q->make_request_fn(q, bio);
|
||||
if (!disk->fops->submit_bio)
|
||||
return blk_mq_submit_bio(bio);
|
||||
ret = disk->fops->submit_bio(bio);
|
||||
}
|
||||
blk_queue_exit(q);
|
||||
blk_queue_exit(disk->queue);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The loop in this function may be a bit non-obvious, and so deserves some
|
||||
* explanation:
|
||||
*
|
||||
* - Before entering the loop, bio->bi_next is NULL (as all callers ensure
|
||||
* that), so we have a list with a single bio.
|
||||
* - We pretend that we have just taken it off a longer list, so we assign
|
||||
* bio_list to a pointer to the bio_list_on_stack, thus initialising the
|
||||
* bio_list of new bios to be added. ->submit_bio() may indeed add some more
|
||||
* bios through a recursive call to submit_bio_noacct. If it did, we find a
|
||||
* non-NULL value in bio_list and re-enter the loop from the top.
|
||||
* - In this case we really did just take the bio of the top of the list (no
|
||||
* pretending) and so remove it from bio_list, and call into ->submit_bio()
|
||||
* again.
|
||||
*
|
||||
* bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
|
||||
* bio_list_on_stack[1] contains bios that were submitted before the current
|
||||
* ->submit_bio_bio, but that haven't been processed yet.
|
||||
*/
|
||||
static blk_qc_t __submit_bio_noacct(struct bio *bio)
|
||||
{
|
||||
struct bio_list bio_list_on_stack[2];
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
|
||||
BUG_ON(bio->bi_next);
|
||||
|
||||
bio_list_init(&bio_list_on_stack[0]);
|
||||
current->bio_list = bio_list_on_stack;
|
||||
|
||||
do {
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct bio_list lower, same;
|
||||
|
||||
if (unlikely(bio_queue_enter(bio) != 0))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Create a fresh bio_list for all subordinate requests.
|
||||
*/
|
||||
bio_list_on_stack[1] = bio_list_on_stack[0];
|
||||
bio_list_init(&bio_list_on_stack[0]);
|
||||
|
||||
ret = __submit_bio(bio);
|
||||
|
||||
/*
|
||||
* Sort new bios into those for a lower level and those for the
|
||||
* same level.
|
||||
*/
|
||||
bio_list_init(&lower);
|
||||
bio_list_init(&same);
|
||||
while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
|
||||
if (q == bio->bi_disk->queue)
|
||||
bio_list_add(&same, bio);
|
||||
else
|
||||
bio_list_add(&lower, bio);
|
||||
|
||||
/*
|
||||
* Now assemble so we handle the lowest level first.
|
||||
*/
|
||||
bio_list_merge(&bio_list_on_stack[0], &lower);
|
||||
bio_list_merge(&bio_list_on_stack[0], &same);
|
||||
bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
|
||||
} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
|
||||
|
||||
current->bio_list = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
|
||||
{
|
||||
struct bio_list bio_list[2] = { };
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
|
||||
current->bio_list = bio_list;
|
||||
|
||||
do {
|
||||
struct gendisk *disk = bio->bi_disk;
|
||||
|
||||
if (unlikely(bio_queue_enter(bio) != 0))
|
||||
continue;
|
||||
|
||||
if (!blk_crypto_bio_prep(&bio)) {
|
||||
blk_queue_exit(disk->queue);
|
||||
ret = BLK_QC_T_NONE;
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = blk_mq_submit_bio(bio);
|
||||
} while ((bio = bio_list_pop(&bio_list[0])));
|
||||
|
||||
current->bio_list = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* generic_make_request - re-submit a bio to the block device layer for I/O
|
||||
* submit_bio_noacct - re-submit a bio to the block device layer for I/O
|
||||
* @bio: The bio describing the location in memory and on the device.
|
||||
*
|
||||
* This is a version of submit_bio() that shall only be used for I/O that is
|
||||
@ -1097,115 +1187,27 @@ static blk_qc_t do_make_request(struct bio *bio)
|
||||
* systems and other upper level users of the block layer should use
|
||||
* submit_bio() instead.
|
||||
*/
|
||||
blk_qc_t generic_make_request(struct bio *bio)
|
||||
blk_qc_t submit_bio_noacct(struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* bio_list_on_stack[0] contains bios submitted by the current
|
||||
* make_request_fn.
|
||||
* bio_list_on_stack[1] contains bios that were submitted before
|
||||
* the current make_request_fn, but that haven't been processed
|
||||
* yet.
|
||||
*/
|
||||
struct bio_list bio_list_on_stack[2];
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
|
||||
if (!generic_make_request_checks(bio))
|
||||
goto out;
|
||||
if (!submit_bio_checks(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
/*
|
||||
* We only want one ->make_request_fn to be active at a time, else
|
||||
* stack usage with stacked devices could be a problem. So use
|
||||
* current->bio_list to keep a list of requests submited by a
|
||||
* make_request_fn function. current->bio_list is also used as a
|
||||
* flag to say if generic_make_request is currently active in this
|
||||
* task or not. If it is NULL, then no make_request is active. If
|
||||
* it is non-NULL, then a make_request is active, and new requests
|
||||
* should be added at the tail
|
||||
* We only want one ->submit_bio to be active at a time, else stack
|
||||
* usage with stacked devices could be a problem. Use current->bio_list
|
||||
* to collect a list of requests submited by a ->submit_bio method while
|
||||
* it is active, and then process them after it returned.
|
||||
*/
|
||||
if (current->bio_list) {
|
||||
bio_list_add(¤t->bio_list[0], bio);
|
||||
goto out;
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
/* following loop may be a bit non-obvious, and so deserves some
|
||||
* explanation.
|
||||
* Before entering the loop, bio->bi_next is NULL (as all callers
|
||||
* ensure that) so we have a list with a single bio.
|
||||
* We pretend that we have just taken it off a longer list, so
|
||||
* we assign bio_list to a pointer to the bio_list_on_stack,
|
||||
* thus initialising the bio_list of new bios to be
|
||||
* added. ->make_request() may indeed add some more bios
|
||||
* through a recursive call to generic_make_request. If it
|
||||
* did, we find a non-NULL value in bio_list and re-enter the loop
|
||||
* from the top. In this case we really did just take the bio
|
||||
* of the top of the list (no pretending) and so remove it from
|
||||
* bio_list, and call into ->make_request() again.
|
||||
*/
|
||||
BUG_ON(bio->bi_next);
|
||||
bio_list_init(&bio_list_on_stack[0]);
|
||||
current->bio_list = bio_list_on_stack;
|
||||
do {
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
|
||||
if (likely(bio_queue_enter(bio) == 0)) {
|
||||
struct bio_list lower, same;
|
||||
|
||||
/* Create a fresh bio_list for all subordinate requests */
|
||||
bio_list_on_stack[1] = bio_list_on_stack[0];
|
||||
bio_list_init(&bio_list_on_stack[0]);
|
||||
ret = do_make_request(bio);
|
||||
|
||||
/* sort new bios into those for a lower level
|
||||
* and those for the same level
|
||||
*/
|
||||
bio_list_init(&lower);
|
||||
bio_list_init(&same);
|
||||
while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
|
||||
if (q == bio->bi_disk->queue)
|
||||
bio_list_add(&same, bio);
|
||||
else
|
||||
bio_list_add(&lower, bio);
|
||||
/* now assemble so we handle the lowest level first */
|
||||
bio_list_merge(&bio_list_on_stack[0], &lower);
|
||||
bio_list_merge(&bio_list_on_stack[0], &same);
|
||||
bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
|
||||
}
|
||||
bio = bio_list_pop(&bio_list_on_stack[0]);
|
||||
} while (bio);
|
||||
current->bio_list = NULL; /* deactivate */
|
||||
|
||||
out:
|
||||
return ret;
|
||||
if (!bio->bi_disk->fops->submit_bio)
|
||||
return __submit_bio_noacct_mq(bio);
|
||||
return __submit_bio_noacct(bio);
|
||||
}
|
||||
EXPORT_SYMBOL(generic_make_request);
|
||||
|
||||
/**
|
||||
* direct_make_request - hand a buffer directly to its device driver for I/O
|
||||
* @bio: The bio describing the location in memory and on the device.
|
||||
*
|
||||
* This function behaves like generic_make_request(), but does not protect
|
||||
* against recursion. Must only be used if the called driver is known
|
||||
* to be blk-mq based.
|
||||
*/
|
||||
blk_qc_t direct_make_request(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
|
||||
if (WARN_ON_ONCE(q->make_request_fn)) {
|
||||
bio_io_error(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
if (!generic_make_request_checks(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
if (unlikely(bio_queue_enter(bio)))
|
||||
return BLK_QC_T_NONE;
|
||||
if (!blk_crypto_bio_prep(&bio)) {
|
||||
blk_queue_exit(q);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
return blk_mq_make_request(q, bio);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(direct_make_request);
|
||||
EXPORT_SYMBOL(submit_bio_noacct);
|
||||
|
||||
/**
|
||||
* submit_bio - submit a bio to the block device layer for I/O
|
||||
@ -1266,13 +1268,13 @@ blk_qc_t submit_bio(struct bio *bio)
|
||||
blk_qc_t ret;
|
||||
|
||||
psi_memstall_enter(&pflags);
|
||||
ret = generic_make_request(bio);
|
||||
ret = submit_bio_noacct(bio);
|
||||
psi_memstall_leave(&pflags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
return generic_make_request(bio);
|
||||
return submit_bio_noacct(bio);
|
||||
}
|
||||
EXPORT_SYMBOL(submit_bio);
|
||||
|
||||
@ -1908,9 +1910,7 @@ int __init blk_dev_init(void)
|
||||
blk_requestq_cachep = kmem_cache_create("request_queue",
|
||||
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
blk_debugfs_root = debugfs_create_dir("block", NULL);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -228,7 +228,7 @@ static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr)
|
||||
return false;
|
||||
}
|
||||
bio_chain(split_bio, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
*bio_ptr = split_bio;
|
||||
}
|
||||
|
||||
|
@ -239,7 +239,7 @@ void __blk_crypto_free_request(struct request *rq)
|
||||
* kernel crypto API. When the crypto API fallback is used for encryption,
|
||||
* blk-crypto may choose to split the bio into 2 - the first one that will
|
||||
* continue to be processed and the second one that will be resubmitted via
|
||||
* generic_make_request. A bounce bio will be allocated to encrypt the contents
|
||||
* submit_bio_noacct. A bounce bio will be allocated to encrypt the contents
|
||||
* of the aforementioned "first one", and *bio_ptr will be updated to this
|
||||
* bounce bio.
|
||||
*
|
||||
|
@ -219,7 +219,6 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
||||
struct request *rq, *n;
|
||||
unsigned long flags = 0;
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
blk_account_io_flush(flush_rq);
|
||||
|
||||
@ -235,13 +234,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
||||
if (fq->rq_status != BLK_STS_OK)
|
||||
error = fq->rq_status;
|
||||
|
||||
hctx = flush_rq->mq_hctx;
|
||||
if (!q->elevator) {
|
||||
blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
|
||||
flush_rq->tag = -1;
|
||||
flush_rq->tag = BLK_MQ_NO_TAG;
|
||||
} else {
|
||||
blk_mq_put_driver_tag(flush_rq);
|
||||
flush_rq->internal_tag = -1;
|
||||
flush_rq->internal_tag = BLK_MQ_NO_TAG;
|
||||
}
|
||||
|
||||
running = &fq->flush_queue[fq->flush_running_idx];
|
||||
@ -286,13 +283,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
|
||||
if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
|
||||
return;
|
||||
|
||||
/* C2 and C3
|
||||
*
|
||||
* For blk-mq + scheduling, we can risk having all driver tags
|
||||
* assigned to empty flushes, and we deadlock if we are expecting
|
||||
* other requests to make progress. Don't defer for that case.
|
||||
*/
|
||||
if (!list_empty(&fq->flush_data_in_flight) && q->elevator &&
|
||||
/* C2 and C3 */
|
||||
if (!list_empty(&fq->flush_data_in_flight) &&
|
||||
time_before(jiffies,
|
||||
fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
|
||||
return;
|
||||
@ -316,13 +308,10 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
|
||||
flush_rq->mq_ctx = first_rq->mq_ctx;
|
||||
flush_rq->mq_hctx = first_rq->mq_hctx;
|
||||
|
||||
if (!q->elevator) {
|
||||
fq->orig_rq = first_rq;
|
||||
if (!q->elevator)
|
||||
flush_rq->tag = first_rq->tag;
|
||||
blk_mq_tag_set_rq(flush_rq->mq_hctx, first_rq->tag, flush_rq);
|
||||
} else {
|
||||
else
|
||||
flush_rq->internal_tag = first_rq->internal_tag;
|
||||
}
|
||||
|
||||
flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
|
||||
flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
|
||||
|
@ -96,15 +96,7 @@ static void ioc_release_fn(struct work_struct *work)
|
||||
{
|
||||
struct io_context *ioc = container_of(work, struct io_context,
|
||||
release_work);
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Exiting icq may call into put_io_context() through elevator
|
||||
* which will trigger lockdep warning. The ioc's are guaranteed to
|
||||
* be different, use a different locking subclass here. Use
|
||||
* irqsave variant as there's no spin_lock_irq_nested().
|
||||
*/
|
||||
spin_lock_irqsave_nested(&ioc->lock, flags, 1);
|
||||
spin_lock_irq(&ioc->lock);
|
||||
|
||||
while (!hlist_empty(&ioc->icq_list)) {
|
||||
struct io_cq *icq = hlist_entry(ioc->icq_list.first,
|
||||
@ -115,13 +107,27 @@ static void ioc_release_fn(struct work_struct *work)
|
||||
ioc_destroy_icq(icq);
|
||||
spin_unlock(&q->queue_lock);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
cpu_relax();
|
||||
spin_lock_irqsave_nested(&ioc->lock, flags, 1);
|
||||
/* Make sure q and icq cannot be freed. */
|
||||
rcu_read_lock();
|
||||
|
||||
/* Re-acquire the locks in the correct order. */
|
||||
spin_unlock(&ioc->lock);
|
||||
spin_lock(&q->queue_lock);
|
||||
spin_lock(&ioc->lock);
|
||||
|
||||
/*
|
||||
* The icq may have been destroyed when the ioc lock
|
||||
* was released.
|
||||
*/
|
||||
if (!(icq->flags & ICQ_DESTROYED))
|
||||
ioc_destroy_icq(icq);
|
||||
|
||||
spin_unlock(&q->queue_lock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
spin_unlock_irq(&ioc->lock);
|
||||
|
||||
kmem_cache_free(iocontext_cachep, ioc);
|
||||
}
|
||||
@ -170,7 +176,6 @@ void put_io_context(struct io_context *ioc)
|
||||
*/
|
||||
void put_io_context_active(struct io_context *ioc)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct io_cq *icq;
|
||||
|
||||
if (!atomic_dec_and_test(&ioc->active_ref)) {
|
||||
@ -178,19 +183,14 @@ void put_io_context_active(struct io_context *ioc)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Need ioc lock to walk icq_list and q lock to exit icq. Perform
|
||||
* reverse double locking. Read comment in ioc_release_fn() for
|
||||
* explanation on the nested locking annotation.
|
||||
*/
|
||||
spin_lock_irqsave_nested(&ioc->lock, flags, 1);
|
||||
spin_lock_irq(&ioc->lock);
|
||||
hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
|
||||
if (icq->flags & ICQ_EXITED)
|
||||
continue;
|
||||
|
||||
ioc_exit_icq(icq);
|
||||
}
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
spin_unlock_irq(&ioc->lock);
|
||||
|
||||
put_io_context(ioc);
|
||||
}
|
||||
|
@ -1370,7 +1370,7 @@ static void ioc_timer_fn(struct timer_list *timer)
|
||||
* should have woken up in the last period and expire idle iocgs.
|
||||
*/
|
||||
list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
|
||||
if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
|
||||
if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
|
||||
!iocg_is_idle(iocg))
|
||||
continue;
|
||||
|
||||
@ -2045,8 +2045,7 @@ static struct blkg_policy_data *ioc_pd_alloc(gfp_t gfp, struct request_queue *q,
|
||||
int levels = blkcg->css.cgroup->level + 1;
|
||||
struct ioc_gq *iocg;
|
||||
|
||||
iocg = kzalloc_node(sizeof(*iocg) + levels * sizeof(iocg->ancestors[0]),
|
||||
gfp, q->node);
|
||||
iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, q->node);
|
||||
if (!iocg)
|
||||
return NULL;
|
||||
|
||||
|
@ -591,7 +591,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
|
||||
struct rq_wait *rqw;
|
||||
struct iolatency_grp *iolat;
|
||||
u64 window_start;
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
u64 now;
|
||||
bool issue_as_root = bio_issue_as_root_blkg(bio);
|
||||
bool enabled = false;
|
||||
int inflight = 0;
|
||||
@ -608,6 +608,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
|
||||
if (!enabled)
|
||||
return;
|
||||
|
||||
now = ktime_to_ns(ktime_get());
|
||||
while (blkg && blkg->parent) {
|
||||
iolat = blkg_to_lat(blkg);
|
||||
if (!iolat) {
|
||||
|
@ -29,7 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
struct bio *bio = *biop;
|
||||
unsigned int op;
|
||||
sector_t bs_mask;
|
||||
sector_t bs_mask, part_offset = 0;
|
||||
|
||||
if (!q)
|
||||
return -ENXIO;
|
||||
@ -54,9 +54,34 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
if (!nr_sects)
|
||||
return -EINVAL;
|
||||
|
||||
/* In case the discard request is in a partition */
|
||||
if (bdev->bd_partno)
|
||||
part_offset = bdev->bd_part->start_sect;
|
||||
|
||||
while (nr_sects) {
|
||||
sector_t req_sects = min_t(sector_t, nr_sects,
|
||||
bio_allowed_max_sectors(q));
|
||||
sector_t granularity_aligned_lba, req_sects;
|
||||
sector_t sector_mapped = sector + part_offset;
|
||||
|
||||
granularity_aligned_lba = round_up(sector_mapped,
|
||||
q->limits.discard_granularity >> SECTOR_SHIFT);
|
||||
|
||||
/*
|
||||
* Check whether the discard bio starts at a discard_granularity
|
||||
* aligned LBA,
|
||||
* - If no: set (granularity_aligned_lba - sector_mapped) to
|
||||
* bi_size of the first split bio, then the second bio will
|
||||
* start at a discard_granularity aligned LBA on the device.
|
||||
* - If yes: use bio_aligned_discard_max_sectors() as the max
|
||||
* possible bi_size of the first split bio. Then when this bio
|
||||
* is split in device drive, the split ones are very probably
|
||||
* to be aligned to discard_granularity of the device's queue.
|
||||
*/
|
||||
if (granularity_aligned_lba == sector_mapped)
|
||||
req_sects = min_t(sector_t, nr_sects,
|
||||
bio_aligned_discard_max_sectors(q));
|
||||
else
|
||||
req_sects = min_t(sector_t, nr_sects,
|
||||
granularity_aligned_lba - sector_mapped);
|
||||
|
||||
WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
|
||||
|
||||
|
@ -283,20 +283,20 @@ split:
|
||||
|
||||
/**
|
||||
* __blk_queue_split - split a bio and submit the second half
|
||||
* @q: [in] request queue pointer
|
||||
* @bio: [in, out] bio to be split
|
||||
* @nr_segs: [out] number of segments in the first bio
|
||||
*
|
||||
* Split a bio into two bios, chain the two bios, submit the second half and
|
||||
* store a pointer to the first half in *@bio. If the second bio is still too
|
||||
* big it will be split by a recursive call to this function. Since this
|
||||
* function may allocate a new bio from @q->bio_split, it is the responsibility
|
||||
* of the caller to ensure that @q is only released after processing of the
|
||||
* function may allocate a new bio from @bio->bi_disk->queue->bio_split, it is
|
||||
* the responsibility of the caller to ensure that
|
||||
* @bio->bi_disk->queue->bio_split is only released after processing of the
|
||||
* split bio has finished.
|
||||
*/
|
||||
void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
unsigned int *nr_segs)
|
||||
void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
|
||||
{
|
||||
struct request_queue *q = (*bio)->bi_disk->queue;
|
||||
struct bio *split = NULL;
|
||||
|
||||
switch (bio_op(*bio)) {
|
||||
@ -338,27 +338,26 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
|
||||
bio_chain(split, *bio);
|
||||
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
|
||||
generic_make_request(*bio);
|
||||
submit_bio_noacct(*bio);
|
||||
*bio = split;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_queue_split - split a bio and submit the second half
|
||||
* @q: [in] request queue pointer
|
||||
* @bio: [in, out] bio to be split
|
||||
*
|
||||
* Split a bio into two bios, chains the two bios, submit the second half and
|
||||
* store a pointer to the first half in *@bio. Since this function may allocate
|
||||
* a new bio from @q->bio_split, it is the responsibility of the caller to
|
||||
* ensure that @q is only released after processing of the split bio has
|
||||
* finished.
|
||||
* a new bio from @bio->bi_disk->queue->bio_split, it is the responsibility of
|
||||
* the caller to ensure that @bio->bi_disk->queue->bio_split is only released
|
||||
* after processing of the split bio has finished.
|
||||
*/
|
||||
void blk_queue_split(struct request_queue *q, struct bio **bio)
|
||||
void blk_queue_split(struct bio **bio)
|
||||
{
|
||||
unsigned int nr_segs;
|
||||
|
||||
__blk_queue_split(q, bio, &nr_segs);
|
||||
__blk_queue_split(bio, &nr_segs);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_split);
|
||||
|
||||
@ -793,6 +792,8 @@ static struct request *attempt_merge(struct request_queue *q,
|
||||
*/
|
||||
blk_account_io_merge_request(next);
|
||||
|
||||
trace_block_rq_merge(q, next);
|
||||
|
||||
/*
|
||||
* ownership of bio passed from next to req, return 'next' for
|
||||
* the caller to free
|
||||
|
@ -404,8 +404,7 @@ static bool hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
|
||||
const struct show_busy_params *params = data;
|
||||
|
||||
if (rq->mq_hctx == params->hctx)
|
||||
__blk_mq_debugfs_rq_show(params->m,
|
||||
list_entry_rq(&rq->queuelist));
|
||||
__blk_mq_debugfs_rq_show(params->m, rq);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -827,9 +826,6 @@ void blk_mq_debugfs_register(struct request_queue *q)
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
|
||||
blk_debugfs_root);
|
||||
|
||||
debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
|
||||
|
||||
/*
|
||||
@ -860,9 +856,7 @@ void blk_mq_debugfs_register(struct request_queue *q)
|
||||
|
||||
void blk_mq_debugfs_unregister(struct request_queue *q)
|
||||
{
|
||||
debugfs_remove_recursive(q->debugfs_dir);
|
||||
q->sched_debugfs_dir = NULL;
|
||||
q->debugfs_dir = NULL;
|
||||
}
|
||||
|
||||
static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/list_sort.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
@ -80,6 +81,35 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
}
|
||||
|
||||
static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
|
||||
{
|
||||
struct request *rqa = container_of(a, struct request, queuelist);
|
||||
struct request *rqb = container_of(b, struct request, queuelist);
|
||||
|
||||
return rqa->mq_hctx > rqb->mq_hctx;
|
||||
}
|
||||
|
||||
static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx =
|
||||
list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
|
||||
struct request *rq;
|
||||
LIST_HEAD(hctx_list);
|
||||
unsigned int count = 0;
|
||||
|
||||
list_for_each_entry(rq, rq_list, queuelist) {
|
||||
if (rq->mq_hctx != hctx) {
|
||||
list_cut_before(&hctx_list, rq_list, &rq->queuelist);
|
||||
goto dispatch;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
list_splice_tail_init(rq_list, &hctx_list);
|
||||
|
||||
dispatch:
|
||||
return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
|
||||
}
|
||||
|
||||
#define BLK_MQ_BUDGET_DELAY 3 /* ms units */
|
||||
|
||||
/*
|
||||
@ -90,12 +120,20 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
|
||||
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
|
||||
* be run again. This is necessary to avoid starving flushes.
|
||||
*/
|
||||
static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
bool multi_hctxs = false, run_queue = false;
|
||||
bool dispatched = false, busy = false;
|
||||
unsigned int max_dispatch;
|
||||
LIST_HEAD(rq_list);
|
||||
int ret = 0;
|
||||
int count = 0;
|
||||
|
||||
if (hctx->dispatch_busy)
|
||||
max_dispatch = 1;
|
||||
else
|
||||
max_dispatch = hctx->queue->nr_requests;
|
||||
|
||||
do {
|
||||
struct request *rq;
|
||||
@ -104,16 +142,16 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
break;
|
||||
|
||||
if (!list_empty_careful(&hctx->dispatch)) {
|
||||
ret = -EAGAIN;
|
||||
busy = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!blk_mq_get_dispatch_budget(hctx))
|
||||
if (!blk_mq_get_dispatch_budget(q))
|
||||
break;
|
||||
|
||||
rq = e->type->ops.dispatch_request(hctx);
|
||||
if (!rq) {
|
||||
blk_mq_put_dispatch_budget(hctx);
|
||||
blk_mq_put_dispatch_budget(q);
|
||||
/*
|
||||
* We're releasing without dispatching. Holding the
|
||||
* budget could have blocked any "hctx"s with the
|
||||
@ -121,7 +159,7 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
* no guarantee anyone will kick the queue. Kick it
|
||||
* ourselves.
|
||||
*/
|
||||
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
|
||||
run_queue = true;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -130,8 +168,42 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
* if this rq won't be queued to driver via .queue_rq()
|
||||
* in blk_mq_dispatch_rq_list().
|
||||
*/
|
||||
list_add(&rq->queuelist, &rq_list);
|
||||
} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
|
||||
list_add_tail(&rq->queuelist, &rq_list);
|
||||
if (rq->mq_hctx != hctx)
|
||||
multi_hctxs = true;
|
||||
} while (++count < max_dispatch);
|
||||
|
||||
if (!count) {
|
||||
if (run_queue)
|
||||
blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
|
||||
} else if (multi_hctxs) {
|
||||
/*
|
||||
* Requests from different hctx may be dequeued from some
|
||||
* schedulers, such as bfq and deadline.
|
||||
*
|
||||
* Sort the requests in the list according to their hctx,
|
||||
* dispatch batching requests from same hctx at a time.
|
||||
*/
|
||||
list_sort(NULL, &rq_list, sched_rq_cmp);
|
||||
do {
|
||||
dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
|
||||
} while (!list_empty(&rq_list));
|
||||
} else {
|
||||
dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
|
||||
}
|
||||
|
||||
if (busy)
|
||||
return -EAGAIN;
|
||||
return !!dispatched;
|
||||
}
|
||||
|
||||
static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
int ret;
|
||||
|
||||
do {
|
||||
ret = __blk_mq_do_dispatch_sched(hctx);
|
||||
} while (ret == 1);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -153,7 +225,7 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
|
||||
*
|
||||
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
|
||||
* to be run again. This is necessary to avoid starving flushes.
|
||||
* be run again. This is necessary to avoid starving flushes.
|
||||
*/
|
||||
static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
@ -161,10 +233,9 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
LIST_HEAD(rq_list);
|
||||
struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
|
||||
int ret = 0;
|
||||
struct request *rq;
|
||||
|
||||
do {
|
||||
struct request *rq;
|
||||
|
||||
if (!list_empty_careful(&hctx->dispatch)) {
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
@ -173,12 +244,12 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
if (!sbitmap_any_bit_set(&hctx->ctx_map))
|
||||
break;
|
||||
|
||||
if (!blk_mq_get_dispatch_budget(hctx))
|
||||
if (!blk_mq_get_dispatch_budget(q))
|
||||
break;
|
||||
|
||||
rq = blk_mq_dequeue_from_ctx(hctx, ctx);
|
||||
if (!rq) {
|
||||
blk_mq_put_dispatch_budget(hctx);
|
||||
blk_mq_put_dispatch_budget(q);
|
||||
/*
|
||||
* We're releasing without dispatching. Holding the
|
||||
* budget could have blocked any "hctx"s with the
|
||||
@ -200,7 +271,7 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
/* round robin for fair dispatch */
|
||||
ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
|
||||
|
||||
} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
|
||||
} while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
|
||||
|
||||
WRITE_ONCE(hctx->dispatch_from, ctx);
|
||||
return ret;
|
||||
@ -240,7 +311,7 @@ static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
*/
|
||||
if (!list_empty(&rq_list)) {
|
||||
blk_mq_sched_mark_restart_hctx(hctx);
|
||||
if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
|
||||
if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
|
||||
if (has_sched_dispatch)
|
||||
ret = blk_mq_do_dispatch_sched(hctx);
|
||||
else
|
||||
@ -253,7 +324,7 @@ static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
ret = blk_mq_do_dispatch_ctx(hctx);
|
||||
} else {
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
blk_mq_dispatch_rq_list(q, &rq_list, false);
|
||||
blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -56,43 +56,12 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
|
||||
blk_mq_tag_wakeup_all(tags, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* For shared tag users, we track the number of currently active users
|
||||
* and attempt to provide a fair share of the tag depth for each of them.
|
||||
*/
|
||||
static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
|
||||
struct sbitmap_queue *bt)
|
||||
{
|
||||
unsigned int depth, users;
|
||||
|
||||
if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
|
||||
return true;
|
||||
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Don't try dividing an ant
|
||||
*/
|
||||
if (bt->sb.depth == 1)
|
||||
return true;
|
||||
|
||||
users = atomic_read(&hctx->tags->active_queues);
|
||||
if (!users)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Allow at least some tags
|
||||
*/
|
||||
depth = max((bt->sb.depth + users - 1) / users, 4U);
|
||||
return atomic_read(&hctx->nr_active) < depth;
|
||||
}
|
||||
|
||||
static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
|
||||
struct sbitmap_queue *bt)
|
||||
{
|
||||
if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
|
||||
!hctx_may_queue(data->hctx, bt))
|
||||
if (!data->q->elevator && !hctx_may_queue(data->hctx, bt))
|
||||
return BLK_MQ_NO_TAG;
|
||||
|
||||
if (data->shallow_depth)
|
||||
return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
|
||||
else
|
||||
@ -191,33 +160,6 @@ found_tag:
|
||||
return tag + tag_offset;
|
||||
}
|
||||
|
||||
bool __blk_mq_get_driver_tag(struct request *rq)
|
||||
{
|
||||
struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
|
||||
unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
|
||||
bool shared = blk_mq_tag_busy(rq->mq_hctx);
|
||||
int tag;
|
||||
|
||||
if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
|
||||
bt = &rq->mq_hctx->tags->breserved_tags;
|
||||
tag_offset = 0;
|
||||
}
|
||||
|
||||
if (!hctx_may_queue(rq->mq_hctx, bt))
|
||||
return false;
|
||||
tag = __sbitmap_queue_get(bt);
|
||||
if (tag == BLK_MQ_NO_TAG)
|
||||
return false;
|
||||
|
||||
rq->tag = tag + tag_offset;
|
||||
if (shared) {
|
||||
rq->rq_flags |= RQF_MQ_INFLIGHT;
|
||||
atomic_inc(&rq->mq_hctx->nr_active);
|
||||
}
|
||||
rq->mq_hctx->tags->rqs[rq->tag] = rq;
|
||||
return true;
|
||||
}
|
||||
|
||||
void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
|
||||
unsigned int tag)
|
||||
{
|
||||
|
@ -51,14 +51,6 @@ enum {
|
||||
BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1,
|
||||
};
|
||||
|
||||
bool __blk_mq_get_driver_tag(struct request *rq);
|
||||
static inline bool blk_mq_get_driver_tag(struct request *rq)
|
||||
{
|
||||
if (rq->tag != BLK_MQ_NO_TAG)
|
||||
return true;
|
||||
return __blk_mq_get_driver_tag(rq);
|
||||
}
|
||||
|
||||
extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
|
||||
extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
|
||||
|
||||
@ -79,15 +71,34 @@ static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
|
||||
}
|
||||
|
||||
/*
|
||||
* This helper should only be used for flush request to share tag
|
||||
* with the request cloned from, and both the two requests can't be
|
||||
* in flight at the same time. The caller has to make sure the tag
|
||||
* can't be freed.
|
||||
* For shared tag users, we track the number of currently active users
|
||||
* and attempt to provide a fair share of the tag depth for each of them.
|
||||
*/
|
||||
static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int tag, struct request *rq)
|
||||
static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
|
||||
struct sbitmap_queue *bt)
|
||||
{
|
||||
hctx->tags->rqs[tag] = rq;
|
||||
unsigned int depth, users;
|
||||
|
||||
if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
|
||||
return true;
|
||||
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Don't try dividing an ant
|
||||
*/
|
||||
if (bt->sb.depth == 1)
|
||||
return true;
|
||||
|
||||
users = atomic_read(&hctx->tags->active_queues);
|
||||
if (!users)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Allow at least some tags
|
||||
*/
|
||||
depth = max((bt->sb.depth + users - 1) / users, 4U);
|
||||
return atomic_read(&hctx->nr_active) < depth;
|
||||
}
|
||||
|
||||
static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
|
||||
|
396
block/blk-mq.c
396
block/blk-mq.c
@ -41,6 +41,8 @@
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
|
||||
|
||||
static void blk_mq_poll_stats_start(struct request_queue *q);
|
||||
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
|
||||
|
||||
@ -275,26 +277,20 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
||||
{
|
||||
struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
|
||||
struct request *rq = tags->static_rqs[tag];
|
||||
req_flags_t rq_flags = 0;
|
||||
|
||||
if (data->flags & BLK_MQ_REQ_INTERNAL) {
|
||||
if (data->q->elevator) {
|
||||
rq->tag = BLK_MQ_NO_TAG;
|
||||
rq->internal_tag = tag;
|
||||
} else {
|
||||
if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
|
||||
rq_flags = RQF_MQ_INFLIGHT;
|
||||
atomic_inc(&data->hctx->nr_active);
|
||||
}
|
||||
rq->tag = tag;
|
||||
rq->internal_tag = BLK_MQ_NO_TAG;
|
||||
data->hctx->tags->rqs[rq->tag] = rq;
|
||||
}
|
||||
|
||||
/* csd/requeue_work/fifo_time is initialized before use */
|
||||
rq->q = data->q;
|
||||
rq->mq_ctx = data->ctx;
|
||||
rq->mq_hctx = data->hctx;
|
||||
rq->rq_flags = rq_flags;
|
||||
rq->rq_flags = 0;
|
||||
rq->cmd_flags = data->cmd_flags;
|
||||
if (data->flags & BLK_MQ_REQ_PREEMPT)
|
||||
rq->rq_flags |= RQF_PREEMPT;
|
||||
@ -362,8 +358,6 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data)
|
||||
data->flags |= BLK_MQ_REQ_NOWAIT;
|
||||
|
||||
if (e) {
|
||||
data->flags |= BLK_MQ_REQ_INTERNAL;
|
||||
|
||||
/*
|
||||
* Flush requests are special and go directly to the
|
||||
* dispatch list. Don't include reserved tags in the
|
||||
@ -378,7 +372,7 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data)
|
||||
retry:
|
||||
data->ctx = blk_mq_get_ctx(q);
|
||||
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
|
||||
if (!(data->flags & BLK_MQ_REQ_INTERNAL))
|
||||
if (!e)
|
||||
blk_mq_tag_busy(data->hctx);
|
||||
|
||||
/*
|
||||
@ -394,7 +388,7 @@ retry:
|
||||
/*
|
||||
* Give up the CPU and sleep for a random short time to ensure
|
||||
* that thread using a realtime scheduling class are migrated
|
||||
* off the the CPU, and thus off the hctx that is going away.
|
||||
* off the CPU, and thus off the hctx that is going away.
|
||||
*/
|
||||
msleep(3);
|
||||
goto retry;
|
||||
@ -474,9 +468,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
|
||||
cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
|
||||
data.ctx = __blk_mq_get_ctx(q, cpu);
|
||||
|
||||
if (q->elevator)
|
||||
data.flags |= BLK_MQ_REQ_INTERNAL;
|
||||
else
|
||||
if (!q->elevator)
|
||||
blk_mq_tag_busy(data.hctx);
|
||||
|
||||
ret = -EWOULDBLOCK;
|
||||
@ -552,8 +544,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
|
||||
blk_stat_add(rq, now);
|
||||
}
|
||||
|
||||
if (rq->internal_tag != BLK_MQ_NO_TAG)
|
||||
blk_mq_sched_completed_request(rq, now);
|
||||
blk_mq_sched_completed_request(rq, now);
|
||||
|
||||
blk_account_io_done(rq, now);
|
||||
|
||||
@ -574,71 +565,139 @@ void blk_mq_end_request(struct request *rq, blk_status_t error)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_end_request);
|
||||
|
||||
/*
|
||||
* Softirq action handler - move entries to local list and loop over them
|
||||
* while passing them to the queue registered handler.
|
||||
*/
|
||||
static __latent_entropy void blk_done_softirq(struct softirq_action *h)
|
||||
{
|
||||
struct list_head *cpu_list, local_list;
|
||||
|
||||
local_irq_disable();
|
||||
cpu_list = this_cpu_ptr(&blk_cpu_done);
|
||||
list_replace_init(cpu_list, &local_list);
|
||||
local_irq_enable();
|
||||
|
||||
while (!list_empty(&local_list)) {
|
||||
struct request *rq;
|
||||
|
||||
rq = list_entry(local_list.next, struct request, ipi_list);
|
||||
list_del_init(&rq->ipi_list);
|
||||
rq->q->mq_ops->complete(rq);
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_mq_trigger_softirq(struct request *rq)
|
||||
{
|
||||
struct list_head *list;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
list = this_cpu_ptr(&blk_cpu_done);
|
||||
list_add_tail(&rq->ipi_list, list);
|
||||
|
||||
/*
|
||||
* If the list only contains our just added request, signal a raise of
|
||||
* the softirq. If there are already entries there, someone already
|
||||
* raised the irq but it hasn't run yet.
|
||||
*/
|
||||
if (list->next == &rq->ipi_list)
|
||||
raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static int blk_softirq_cpu_dead(unsigned int cpu)
|
||||
{
|
||||
/*
|
||||
* If a CPU goes away, splice its entries to the current CPU
|
||||
* and trigger a run of the softirq
|
||||
*/
|
||||
local_irq_disable();
|
||||
list_splice_init(&per_cpu(blk_cpu_done, cpu),
|
||||
this_cpu_ptr(&blk_cpu_done));
|
||||
raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
||||
local_irq_enable();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void __blk_mq_complete_request_remote(void *data)
|
||||
{
|
||||
struct request *rq = data;
|
||||
struct request_queue *q = rq->q;
|
||||
|
||||
q->mq_ops->complete(rq);
|
||||
/*
|
||||
* For most of single queue controllers, there is only one irq vector
|
||||
* for handling I/O completion, and the only irq's affinity is set
|
||||
* to all possible CPUs. On most of ARCHs, this affinity means the irq
|
||||
* is handled on one specific CPU.
|
||||
*
|
||||
* So complete I/O requests in softirq context in case of single queue
|
||||
* devices to avoid degrading I/O performance due to irqsoff latency.
|
||||
*/
|
||||
if (rq->q->nr_hw_queues == 1)
|
||||
blk_mq_trigger_softirq(rq);
|
||||
else
|
||||
rq->q->mq_ops->complete(rq);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_force_complete_rq() - Force complete the request, bypassing any error
|
||||
* injection that could drop the completion.
|
||||
* @rq: Request to be force completed
|
||||
*
|
||||
* Drivers should use blk_mq_complete_request() to complete requests in their
|
||||
* normal IO path. For timeout error recovery, drivers may call this forced
|
||||
* completion routine after they've reclaimed timed out requests to bypass
|
||||
* potentially subsequent fake timeouts.
|
||||
*/
|
||||
void blk_mq_force_complete_rq(struct request *rq)
|
||||
static inline bool blk_mq_complete_need_ipi(struct request *rq)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct request_queue *q = rq->q;
|
||||
bool shared = false;
|
||||
int cpu;
|
||||
int cpu = raw_smp_processor_id();
|
||||
|
||||
if (!IS_ENABLED(CONFIG_SMP) ||
|
||||
!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
|
||||
return false;
|
||||
|
||||
/* same CPU or cache domain? Complete locally */
|
||||
if (cpu == rq->mq_ctx->cpu ||
|
||||
(!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
|
||||
cpus_share_cache(cpu, rq->mq_ctx->cpu)))
|
||||
return false;
|
||||
|
||||
/* don't try to IPI to an offline CPU */
|
||||
return cpu_online(rq->mq_ctx->cpu);
|
||||
}
|
||||
|
||||
bool blk_mq_complete_request_remote(struct request *rq)
|
||||
{
|
||||
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
|
||||
/*
|
||||
* Most of single queue controllers, there is only one irq vector
|
||||
* for handling IO completion, and the only irq's affinity is set
|
||||
* as all possible CPUs. On most of ARCHs, this affinity means the
|
||||
* irq is handled on one specific CPU.
|
||||
*
|
||||
* So complete IO reqeust in softirq context in case of single queue
|
||||
* for not degrading IO performance by irqsoff latency.
|
||||
*/
|
||||
if (q->nr_hw_queues == 1) {
|
||||
__blk_complete_request(rq);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* For a polled request, always complete locallly, it's pointless
|
||||
* to redirect the completion.
|
||||
*/
|
||||
if ((rq->cmd_flags & REQ_HIPRI) ||
|
||||
!test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
|
||||
q->mq_ops->complete(rq);
|
||||
return;
|
||||
}
|
||||
if (rq->cmd_flags & REQ_HIPRI)
|
||||
return false;
|
||||
|
||||
cpu = get_cpu();
|
||||
if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
|
||||
shared = cpus_share_cache(cpu, ctx->cpu);
|
||||
|
||||
if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
|
||||
if (blk_mq_complete_need_ipi(rq)) {
|
||||
rq->csd.func = __blk_mq_complete_request_remote;
|
||||
rq->csd.info = rq;
|
||||
rq->csd.flags = 0;
|
||||
smp_call_function_single_async(ctx->cpu, &rq->csd);
|
||||
smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
|
||||
} else {
|
||||
q->mq_ops->complete(rq);
|
||||
if (rq->q->nr_hw_queues > 1)
|
||||
return false;
|
||||
blk_mq_trigger_softirq(rq);
|
||||
}
|
||||
put_cpu();
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_force_complete_rq);
|
||||
EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
|
||||
|
||||
/**
|
||||
* blk_mq_complete_request - end I/O on a request
|
||||
* @rq: the request being processed
|
||||
*
|
||||
* Description:
|
||||
* Complete a request by scheduling the ->complete_rq operation.
|
||||
**/
|
||||
void blk_mq_complete_request(struct request *rq)
|
||||
{
|
||||
if (!blk_mq_complete_request_remote(rq))
|
||||
rq->q->mq_ops->complete(rq);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_complete_request);
|
||||
|
||||
static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
|
||||
__releases(hctx->srcu)
|
||||
@ -660,23 +719,6 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
|
||||
*srcu_idx = srcu_read_lock(hctx->srcu);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_complete_request - end I/O on a request
|
||||
* @rq: the request being processed
|
||||
*
|
||||
* Description:
|
||||
* Ends all I/O on a request. It does not handle partial completions.
|
||||
* The actual completion happens out-of-order, through a IPI handler.
|
||||
**/
|
||||
bool blk_mq_complete_request(struct request *rq)
|
||||
{
|
||||
if (unlikely(blk_should_fake_timeout(rq->q)))
|
||||
return false;
|
||||
blk_mq_force_complete_rq(rq);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_complete_request);
|
||||
|
||||
/**
|
||||
* blk_mq_start_request - Start processing a request
|
||||
* @rq: Pointer to request to be started
|
||||
@ -1052,6 +1094,45 @@ static inline unsigned int queued_to_index(unsigned int queued)
|
||||
return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
|
||||
}
|
||||
|
||||
static bool __blk_mq_get_driver_tag(struct request *rq)
|
||||
{
|
||||
struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
|
||||
unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
|
||||
int tag;
|
||||
|
||||
blk_mq_tag_busy(rq->mq_hctx);
|
||||
|
||||
if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
|
||||
bt = &rq->mq_hctx->tags->breserved_tags;
|
||||
tag_offset = 0;
|
||||
}
|
||||
|
||||
if (!hctx_may_queue(rq->mq_hctx, bt))
|
||||
return false;
|
||||
tag = __sbitmap_queue_get(bt);
|
||||
if (tag == BLK_MQ_NO_TAG)
|
||||
return false;
|
||||
|
||||
rq->tag = tag + tag_offset;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool blk_mq_get_driver_tag(struct request *rq)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
|
||||
return false;
|
||||
|
||||
if ((hctx->flags & BLK_MQ_F_TAG_SHARED) &&
|
||||
!(rq->rq_flags & RQF_MQ_INFLIGHT)) {
|
||||
rq->rq_flags |= RQF_MQ_INFLIGHT;
|
||||
atomic_inc(&hctx->nr_active);
|
||||
}
|
||||
hctx->tags->rqs[rq->tag] = rq;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
|
||||
int flags, void *key)
|
||||
{
|
||||
@ -1204,25 +1285,70 @@ static void blk_mq_handle_zone_resource(struct request *rq,
|
||||
__blk_mq_requeue_request(rq);
|
||||
}
|
||||
|
||||
enum prep_dispatch {
|
||||
PREP_DISPATCH_OK,
|
||||
PREP_DISPATCH_NO_TAG,
|
||||
PREP_DISPATCH_NO_BUDGET,
|
||||
};
|
||||
|
||||
static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
|
||||
bool need_budget)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
if (need_budget && !blk_mq_get_dispatch_budget(rq->q)) {
|
||||
blk_mq_put_driver_tag(rq);
|
||||
return PREP_DISPATCH_NO_BUDGET;
|
||||
}
|
||||
|
||||
if (!blk_mq_get_driver_tag(rq)) {
|
||||
/*
|
||||
* The initial allocation attempt failed, so we need to
|
||||
* rerun the hardware queue when a tag is freed. The
|
||||
* waitqueue takes care of that. If the queue is run
|
||||
* before we add this entry back on the dispatch list,
|
||||
* we'll re-run it below.
|
||||
*/
|
||||
if (!blk_mq_mark_tag_wait(hctx, rq)) {
|
||||
/*
|
||||
* All budgets not got from this function will be put
|
||||
* together during handling partial dispatch
|
||||
*/
|
||||
if (need_budget)
|
||||
blk_mq_put_dispatch_budget(rq->q);
|
||||
return PREP_DISPATCH_NO_TAG;
|
||||
}
|
||||
}
|
||||
|
||||
return PREP_DISPATCH_OK;
|
||||
}
|
||||
|
||||
/* release all allocated budgets before calling to blk_mq_dispatch_rq_list */
|
||||
static void blk_mq_release_budgets(struct request_queue *q,
|
||||
unsigned int nr_budgets)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_budgets; i++)
|
||||
blk_mq_put_dispatch_budget(q);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if we did some work AND can potentially do more.
|
||||
*/
|
||||
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
bool got_budget)
|
||||
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
|
||||
unsigned int nr_budgets)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
enum prep_dispatch prep;
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct request *rq, *nxt;
|
||||
bool no_tag = false;
|
||||
int errors, queued;
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
bool no_budget_avail = false;
|
||||
LIST_HEAD(zone_list);
|
||||
|
||||
if (list_empty(list))
|
||||
return false;
|
||||
|
||||
WARN_ON(!list_is_singular(list) && got_budget);
|
||||
|
||||
/*
|
||||
* Now process all the entries, sending them to the driver.
|
||||
*/
|
||||
@ -1232,32 +1358,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
|
||||
rq = list_first_entry(list, struct request, queuelist);
|
||||
|
||||
hctx = rq->mq_hctx;
|
||||
if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
|
||||
blk_mq_put_driver_tag(rq);
|
||||
no_budget_avail = true;
|
||||
WARN_ON_ONCE(hctx != rq->mq_hctx);
|
||||
prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets);
|
||||
if (prep != PREP_DISPATCH_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!blk_mq_get_driver_tag(rq)) {
|
||||
/*
|
||||
* The initial allocation attempt failed, so we need to
|
||||
* rerun the hardware queue when a tag is freed. The
|
||||
* waitqueue takes care of that. If the queue is run
|
||||
* before we add this entry back on the dispatch list,
|
||||
* we'll re-run it below.
|
||||
*/
|
||||
if (!blk_mq_mark_tag_wait(hctx, rq)) {
|
||||
blk_mq_put_dispatch_budget(hctx);
|
||||
/*
|
||||
* For non-shared tags, the RESTART check
|
||||
* will suffice.
|
||||
*/
|
||||
if (hctx->flags & BLK_MQ_F_TAG_SHARED)
|
||||
no_tag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
list_del_init(&rq->queuelist);
|
||||
|
||||
@ -1274,31 +1378,35 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
bd.last = !blk_mq_get_driver_tag(nxt);
|
||||
}
|
||||
|
||||
/*
|
||||
* once the request is queued to lld, no need to cover the
|
||||
* budget any more
|
||||
*/
|
||||
if (nr_budgets)
|
||||
nr_budgets--;
|
||||
ret = q->mq_ops->queue_rq(hctx, &bd);
|
||||
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
|
||||
blk_mq_handle_dev_resource(rq, list);
|
||||
switch (ret) {
|
||||
case BLK_STS_OK:
|
||||
queued++;
|
||||
break;
|
||||
} else if (ret == BLK_STS_ZONE_RESOURCE) {
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_DEV_RESOURCE:
|
||||
blk_mq_handle_dev_resource(rq, list);
|
||||
goto out;
|
||||
case BLK_STS_ZONE_RESOURCE:
|
||||
/*
|
||||
* Move the request to zone_list and keep going through
|
||||
* the dispatch list to find more requests the drive can
|
||||
* accept.
|
||||
*/
|
||||
blk_mq_handle_zone_resource(rq, &zone_list);
|
||||
if (list_empty(list))
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlikely(ret != BLK_STS_OK)) {
|
||||
break;
|
||||
default:
|
||||
errors++;
|
||||
blk_mq_end_request(rq, BLK_STS_IOERR);
|
||||
continue;
|
||||
}
|
||||
|
||||
queued++;
|
||||
} while (!list_empty(list));
|
||||
|
||||
out:
|
||||
if (!list_empty(&zone_list))
|
||||
list_splice_tail_init(&zone_list, list);
|
||||
|
||||
@ -1310,6 +1418,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
*/
|
||||
if (!list_empty(list)) {
|
||||
bool needs_restart;
|
||||
/* For non-shared tags, the RESTART check will suffice */
|
||||
bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
|
||||
(hctx->flags & BLK_MQ_F_TAG_SHARED);
|
||||
bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
|
||||
|
||||
blk_mq_release_budgets(q, nr_budgets);
|
||||
|
||||
/*
|
||||
* If we didn't flush the entire list, we could have told
|
||||
@ -1361,13 +1475,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
} else
|
||||
blk_mq_update_dispatch_busy(hctx, false);
|
||||
|
||||
/*
|
||||
* If the host/device is unable to accept more work, inform the
|
||||
* caller of that.
|
||||
*/
|
||||
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
|
||||
return false;
|
||||
|
||||
return (queued + errors) != 0;
|
||||
}
|
||||
|
||||
@ -1896,11 +2003,11 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||
if (q->elevator && !bypass_insert)
|
||||
goto insert;
|
||||
|
||||
if (!blk_mq_get_dispatch_budget(hctx))
|
||||
if (!blk_mq_get_dispatch_budget(q))
|
||||
goto insert;
|
||||
|
||||
if (!blk_mq_get_driver_tag(rq)) {
|
||||
blk_mq_put_dispatch_budget(hctx);
|
||||
blk_mq_put_dispatch_budget(q);
|
||||
goto insert;
|
||||
}
|
||||
|
||||
@ -2005,8 +2112,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_make_request - Create and send a request to block device.
|
||||
* @q: Request queue pointer.
|
||||
* blk_mq_submit_bio - Create and send a request to block device.
|
||||
* @bio: Bio pointer.
|
||||
*
|
||||
* Builds up a request structure from @q and @bio and send to the device. The
|
||||
@ -2020,8 +2126,9 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
||||
*
|
||||
* Returns: Request queue cookie.
|
||||
*/
|
||||
blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
blk_qc_t blk_mq_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
const int is_sync = op_is_sync(bio->bi_opf);
|
||||
const int is_flush_fua = op_is_flush(bio->bi_opf);
|
||||
struct blk_mq_alloc_data data = {
|
||||
@ -2035,7 +2142,7 @@ blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
blk_status_t ret;
|
||||
|
||||
blk_queue_bounce(q, &bio);
|
||||
__blk_queue_split(q, &bio, &nr_segs);
|
||||
__blk_queue_split(&bio, &nr_segs);
|
||||
|
||||
if (!bio_integrity_prep(bio))
|
||||
goto queue_exit;
|
||||
@ -2146,7 +2253,7 @@ queue_exit:
|
||||
blk_queue_exit(q);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_make_request); /* only for request based dm */
|
||||
EXPORT_SYMBOL_GPL(blk_mq_submit_bio); /* only for request based dm */
|
||||
|
||||
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
|
||||
unsigned int hctx_idx)
|
||||
@ -2792,7 +2899,7 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
|
||||
struct blk_mq_tag_set *set = q->tag_set;
|
||||
|
||||
mutex_lock(&set->tag_list_lock);
|
||||
list_del_rcu(&q->tag_set_list);
|
||||
list_del(&q->tag_set_list);
|
||||
if (list_is_singular(&set->tag_list)) {
|
||||
/* just transitioned to unshared */
|
||||
set->flags &= ~BLK_MQ_F_TAG_SHARED;
|
||||
@ -2819,7 +2926,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
|
||||
}
|
||||
if (set->flags & BLK_MQ_F_TAG_SHARED)
|
||||
queue_set_hctx_shared(q, true);
|
||||
list_add_tail_rcu(&q->tag_set_list, &set->tag_list);
|
||||
list_add_tail(&q->tag_set_list, &set->tag_list);
|
||||
|
||||
mutex_unlock(&set->tag_list_lock);
|
||||
}
|
||||
@ -2886,7 +2993,7 @@ struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
|
||||
{
|
||||
struct request_queue *uninit_q, *q;
|
||||
|
||||
uninit_q = __blk_alloc_queue(set->numa_node);
|
||||
uninit_q = blk_alloc_queue(set->numa_node);
|
||||
if (!uninit_q)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
uninit_q->queuedata = queuedata;
|
||||
@ -3760,6 +3867,15 @@ EXPORT_SYMBOL(blk_mq_rq_cpu);
|
||||
|
||||
static int __init blk_mq_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
|
||||
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
|
||||
|
||||
cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
|
||||
"block/softirq:dead", NULL,
|
||||
blk_softirq_cpu_dead);
|
||||
cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
|
||||
blk_mq_hctx_notify_dead);
|
||||
cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
|
||||
|
@ -40,7 +40,8 @@ struct blk_mq_ctx {
|
||||
void blk_mq_exit_queue(struct request_queue *q);
|
||||
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
|
||||
void blk_mq_wake_waiters(struct request_queue *q);
|
||||
bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
|
||||
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
|
||||
unsigned int);
|
||||
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
|
||||
bool kick_requeue_list);
|
||||
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
|
||||
@ -159,7 +160,7 @@ struct blk_mq_alloc_data {
|
||||
|
||||
static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
|
||||
{
|
||||
if (data->flags & BLK_MQ_REQ_INTERNAL)
|
||||
if (data->q->elevator)
|
||||
return data->hctx->sched_tags;
|
||||
|
||||
return data->hctx->tags;
|
||||
@ -179,20 +180,16 @@ unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part);
|
||||
void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
|
||||
unsigned int inflight[2]);
|
||||
|
||||
static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx)
|
||||
static inline void blk_mq_put_dispatch_budget(struct request_queue *q)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
if (q->mq_ops->put_budget)
|
||||
q->mq_ops->put_budget(hctx);
|
||||
q->mq_ops->put_budget(q);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx)
|
||||
static inline bool blk_mq_get_dispatch_budget(struct request_queue *q)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
if (q->mq_ops->get_budget)
|
||||
return q->mq_ops->get_budget(hctx);
|
||||
return q->mq_ops->get_budget(q);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,156 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Functions related to softirq rq completions
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/topology.h>
|
||||
|
||||
#include "blk.h"
|
||||
|
||||
static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
|
||||
|
||||
/*
|
||||
* Softirq action handler - move entries to local list and loop over them
|
||||
* while passing them to the queue registered handler.
|
||||
*/
|
||||
static __latent_entropy void blk_done_softirq(struct softirq_action *h)
|
||||
{
|
||||
struct list_head *cpu_list, local_list;
|
||||
|
||||
local_irq_disable();
|
||||
cpu_list = this_cpu_ptr(&blk_cpu_done);
|
||||
list_replace_init(cpu_list, &local_list);
|
||||
local_irq_enable();
|
||||
|
||||
while (!list_empty(&local_list)) {
|
||||
struct request *rq;
|
||||
|
||||
rq = list_entry(local_list.next, struct request, ipi_list);
|
||||
list_del_init(&rq->ipi_list);
|
||||
rq->q->mq_ops->complete(rq);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void trigger_softirq(void *data)
|
||||
{
|
||||
struct request *rq = data;
|
||||
struct list_head *list;
|
||||
|
||||
list = this_cpu_ptr(&blk_cpu_done);
|
||||
list_add_tail(&rq->ipi_list, list);
|
||||
|
||||
if (list->next == &rq->ipi_list)
|
||||
raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup and invoke a run of 'trigger_softirq' on the given cpu.
|
||||
*/
|
||||
static int raise_blk_irq(int cpu, struct request *rq)
|
||||
{
|
||||
if (cpu_online(cpu)) {
|
||||
call_single_data_t *data = &rq->csd;
|
||||
|
||||
data->func = trigger_softirq;
|
||||
data->info = rq;
|
||||
data->flags = 0;
|
||||
|
||||
smp_call_function_single_async(cpu, data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
#else /* CONFIG_SMP */
|
||||
static int raise_blk_irq(int cpu, struct request *rq)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int blk_softirq_cpu_dead(unsigned int cpu)
|
||||
{
|
||||
/*
|
||||
* If a CPU goes away, splice its entries to the current CPU
|
||||
* and trigger a run of the softirq
|
||||
*/
|
||||
local_irq_disable();
|
||||
list_splice_init(&per_cpu(blk_cpu_done, cpu),
|
||||
this_cpu_ptr(&blk_cpu_done));
|
||||
raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
||||
local_irq_enable();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __blk_complete_request(struct request *req)
|
||||
{
|
||||
struct request_queue *q = req->q;
|
||||
int cpu, ccpu = req->mq_ctx->cpu;
|
||||
unsigned long flags;
|
||||
bool shared = false;
|
||||
|
||||
BUG_ON(!q->mq_ops->complete);
|
||||
|
||||
local_irq_save(flags);
|
||||
cpu = smp_processor_id();
|
||||
|
||||
/*
|
||||
* Select completion CPU
|
||||
*/
|
||||
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) {
|
||||
if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
|
||||
shared = cpus_share_cache(cpu, ccpu);
|
||||
} else
|
||||
ccpu = cpu;
|
||||
|
||||
/*
|
||||
* If current CPU and requested CPU share a cache, run the softirq on
|
||||
* the current CPU. One might concern this is just like
|
||||
* QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
|
||||
* running in interrupt handler, and currently I/O controller doesn't
|
||||
* support multiple interrupts, so current CPU is unique actually. This
|
||||
* avoids IPI sending from current CPU to the first CPU of a group.
|
||||
*/
|
||||
if (ccpu == cpu || shared) {
|
||||
struct list_head *list;
|
||||
do_local:
|
||||
list = this_cpu_ptr(&blk_cpu_done);
|
||||
list_add_tail(&req->ipi_list, list);
|
||||
|
||||
/*
|
||||
* if the list only contains our just added request,
|
||||
* signal a raise of the softirq. If there are already
|
||||
* entries there, someone already raised the irq but it
|
||||
* hasn't run yet.
|
||||
*/
|
||||
if (list->next == &req->ipi_list)
|
||||
raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
||||
} else if (raise_blk_irq(ccpu, req))
|
||||
goto do_local;
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static __init int blk_softirq_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
|
||||
|
||||
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
|
||||
cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
|
||||
"block/softirq:dead", NULL,
|
||||
blk_softirq_cpu_dead);
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(blk_softirq_init);
|
@ -11,6 +11,7 @@
|
||||
#include <linux/blktrace_api.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
@ -873,22 +874,32 @@ static void blk_exit_queue(struct request_queue *q)
|
||||
bdi_put(q->backing_dev_info);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* __blk_release_queue - release a request queue
|
||||
* @work: pointer to the release_work member of the request queue to be released
|
||||
* blk_release_queue - releases all allocated resources of the request_queue
|
||||
* @kobj: pointer to a kobject, whose container is a request_queue
|
||||
*
|
||||
* Description:
|
||||
* This function is called when a block device is being unregistered. The
|
||||
* process of releasing a request queue starts with blk_cleanup_queue, which
|
||||
* set the appropriate flags and then calls blk_put_queue, that decrements
|
||||
* the reference counter of the request queue. Once the reference counter
|
||||
* of the request queue reaches zero, blk_release_queue is called to release
|
||||
* all allocated resources of the request queue.
|
||||
* This function releases all allocated resources of the request queue.
|
||||
*
|
||||
* The struct request_queue refcount is incremented with blk_get_queue() and
|
||||
* decremented with blk_put_queue(). Once the refcount reaches 0 this function
|
||||
* is called.
|
||||
*
|
||||
* For drivers that have a request_queue on a gendisk and added with
|
||||
* __device_add_disk() the refcount to request_queue will reach 0 with
|
||||
* the last put_disk() called by the driver. For drivers which don't use
|
||||
* __device_add_disk() this happens with blk_cleanup_queue().
|
||||
*
|
||||
* Drivers exist which depend on the release of the request_queue to be
|
||||
* synchronous, it should not be deferred.
|
||||
*
|
||||
* Context: can sleep
|
||||
*/
|
||||
static void __blk_release_queue(struct work_struct *work)
|
||||
static void blk_release_queue(struct kobject *kobj)
|
||||
{
|
||||
struct request_queue *q = container_of(work, typeof(*q), release_work);
|
||||
struct request_queue *q =
|
||||
container_of(kobj, struct request_queue, kobj);
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
|
||||
blk_stat_remove_callback(q, q->poll_cb);
|
||||
@ -907,6 +918,9 @@ static void __blk_release_queue(struct work_struct *work)
|
||||
blk_mq_release(q);
|
||||
|
||||
blk_trace_shutdown(q);
|
||||
mutex_lock(&q->debugfs_mutex);
|
||||
debugfs_remove_recursive(q->debugfs_dir);
|
||||
mutex_unlock(&q->debugfs_mutex);
|
||||
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_debugfs_unregister(q);
|
||||
@ -917,15 +931,6 @@ static void __blk_release_queue(struct work_struct *work)
|
||||
call_rcu(&q->rcu_head, blk_free_queue_rcu);
|
||||
}
|
||||
|
||||
static void blk_release_queue(struct kobject *kobj)
|
||||
{
|
||||
struct request_queue *q =
|
||||
container_of(kobj, struct request_queue, kobj);
|
||||
|
||||
INIT_WORK(&q->release_work, __blk_release_queue);
|
||||
schedule_work(&q->release_work);
|
||||
}
|
||||
|
||||
static const struct sysfs_ops queue_sysfs_ops = {
|
||||
.show = queue_attr_show,
|
||||
.store = queue_attr_store,
|
||||
@ -988,6 +993,11 @@ int blk_register_queue(struct gendisk *disk)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
mutex_lock(&q->debugfs_mutex);
|
||||
q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
|
||||
blk_debugfs_root);
|
||||
mutex_unlock(&q->debugfs_mutex);
|
||||
|
||||
if (queue_is_mq(q)) {
|
||||
__blk_mq_register_dev(dev, q);
|
||||
blk_mq_debugfs_register(q);
|
||||
|
@ -1339,8 +1339,8 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work)
|
||||
|
||||
if (!bio_list_empty(&bio_list_on_stack)) {
|
||||
blk_start_plug(&plug);
|
||||
while((bio = bio_list_pop(&bio_list_on_stack)))
|
||||
generic_make_request(bio);
|
||||
while ((bio = bio_list_pop(&bio_list_on_stack)))
|
||||
submit_bio_noacct(bio);
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
}
|
||||
@ -2158,17 +2158,18 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
|
||||
}
|
||||
#endif
|
||||
|
||||
bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
|
||||
struct bio *bio)
|
||||
bool blk_throtl_bio(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct blkcg_gq *blkg = bio->bi_blkg;
|
||||
struct throtl_qnode *qn = NULL;
|
||||
struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
|
||||
struct throtl_grp *tg = blkg_to_tg(blkg);
|
||||
struct throtl_service_queue *sq;
|
||||
bool rw = bio_data_dir(bio);
|
||||
bool throttled = false;
|
||||
struct throtl_data *td = tg->td;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
rcu_read_lock();
|
||||
|
||||
/* see throtl_charge_bio() */
|
||||
if (bio_flagged(bio, BIO_THROTTLED))
|
||||
@ -2273,6 +2274,7 @@ out:
|
||||
if (throttled || !td->track_bio_latency)
|
||||
bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
|
||||
#endif
|
||||
rcu_read_unlock();
|
||||
return throttled;
|
||||
}
|
||||
|
||||
|
@ -20,13 +20,11 @@ static int __init setup_fail_io_timeout(char *str)
|
||||
}
|
||||
__setup("fail_io_timeout=", setup_fail_io_timeout);
|
||||
|
||||
int blk_should_fake_timeout(struct request_queue *q)
|
||||
bool __blk_should_fake_timeout(struct request_queue *q)
|
||||
{
|
||||
if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
|
||||
return 0;
|
||||
|
||||
return should_fail(&fail_io_timeout, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blk_should_fake_timeout);
|
||||
|
||||
static int __init fail_io_timeout_debugfs(void)
|
||||
{
|
||||
@ -70,7 +68,7 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
|
||||
#endif /* CONFIG_FAIL_IO_TIMEOUT */
|
||||
|
||||
/**
|
||||
* blk_abort_request -- Request request recovery for the specified command
|
||||
* blk_abort_request - Request recovery for the specified command
|
||||
* @req: pointer to the request of interest
|
||||
*
|
||||
* This function requests that the block layer start recovery for the
|
||||
@ -90,11 +88,29 @@ void blk_abort_request(struct request *req)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_abort_request);
|
||||
|
||||
static unsigned long blk_timeout_mask __read_mostly;
|
||||
|
||||
static int __init blk_timeout_init(void)
|
||||
{
|
||||
blk_timeout_mask = roundup_pow_of_two(HZ) - 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(blk_timeout_init);
|
||||
|
||||
/*
|
||||
* Just a rough estimate, we don't care about specific values for timeouts.
|
||||
*/
|
||||
static inline unsigned long blk_round_jiffies(unsigned long j)
|
||||
{
|
||||
return (j + blk_timeout_mask) + 1;
|
||||
}
|
||||
|
||||
unsigned long blk_rq_timeout(unsigned long timeout)
|
||||
{
|
||||
unsigned long maxt;
|
||||
|
||||
maxt = round_jiffies_up(jiffies + BLK_MAX_TIMEOUT);
|
||||
maxt = blk_round_jiffies(jiffies + BLK_MAX_TIMEOUT);
|
||||
if (time_after(timeout, maxt))
|
||||
timeout = maxt;
|
||||
|
||||
@ -131,7 +147,7 @@ void blk_add_timer(struct request *req)
|
||||
* than an existing one, modify the timer. Round up to next nearest
|
||||
* second.
|
||||
*/
|
||||
expiry = blk_rq_timeout(round_jiffies_up(expiry));
|
||||
expiry = blk_rq_timeout(blk_round_jiffies(expiry));
|
||||
|
||||
if (!timer_pending(&q->timeout) ||
|
||||
time_before(expiry, q->timeout.expires)) {
|
||||
|
37
block/blk.h
37
block/blk.h
@ -14,9 +14,7 @@
|
||||
/* Max future timer expiry for timeouts */
|
||||
#define BLK_MAX_TIMEOUT (5 * HZ)
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
extern struct dentry *blk_debugfs_root;
|
||||
#endif
|
||||
|
||||
struct blk_flush_queue {
|
||||
unsigned int flush_pending_idx:1;
|
||||
@ -27,11 +25,6 @@ struct blk_flush_queue {
|
||||
struct list_head flush_data_in_flight;
|
||||
struct request *flush_rq;
|
||||
|
||||
/*
|
||||
* flush_rq shares tag with this rq, both can't be active
|
||||
* at the same time
|
||||
*/
|
||||
struct request *orig_rq;
|
||||
struct lock_class_key key;
|
||||
spinlock_t mq_flush_lock;
|
||||
};
|
||||
@ -223,21 +216,11 @@ ssize_t part_fail_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf);
|
||||
ssize_t part_fail_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count);
|
||||
|
||||
#ifdef CONFIG_FAIL_IO_TIMEOUT
|
||||
int blk_should_fake_timeout(struct request_queue *);
|
||||
ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
|
||||
ssize_t part_timeout_store(struct device *, struct device_attribute *,
|
||||
const char *, size_t);
|
||||
#else
|
||||
static inline int blk_should_fake_timeout(struct request_queue *q)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
unsigned int *nr_segs);
|
||||
void __blk_queue_split(struct bio **bio, unsigned int *nr_segs);
|
||||
int ll_back_merge_fn(struct request *req, struct bio *bio,
|
||||
unsigned int nr_segs);
|
||||
int ll_front_merge_fn(struct request *req, struct bio *bio,
|
||||
@ -281,6 +264,20 @@ static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
|
||||
return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
|
||||
}
|
||||
|
||||
/*
|
||||
* The max bio size which is aligned to q->limits.discard_granularity. This
|
||||
* is a hint to split large discard bio in generic block layer, then if device
|
||||
* driver needs to split the discard bio into smaller ones, their bi_size can
|
||||
* be very probably and easily aligned to discard_granularity of the device's
|
||||
* queue.
|
||||
*/
|
||||
static inline unsigned int bio_aligned_discard_max_sectors(
|
||||
struct request_queue *q)
|
||||
{
|
||||
return round_down(UINT_MAX, q->limits.discard_granularity) >>
|
||||
SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal io_context interface
|
||||
*/
|
||||
@ -299,10 +296,12 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
|
||||
extern int blk_throtl_init(struct request_queue *q);
|
||||
extern void blk_throtl_exit(struct request_queue *q);
|
||||
extern void blk_throtl_register_queue(struct request_queue *q);
|
||||
bool blk_throtl_bio(struct bio *bio);
|
||||
#else /* CONFIG_BLK_DEV_THROTTLING */
|
||||
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
|
||||
static inline void blk_throtl_exit(struct request_queue *q) { }
|
||||
static inline void blk_throtl_register_queue(struct request_queue *q) { }
|
||||
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
|
||||
#endif /* CONFIG_BLK_DEV_THROTTLING */
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
|
||||
@ -434,8 +433,6 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
|
||||
#endif
|
||||
}
|
||||
|
||||
struct request_queue *__blk_alloc_queue(int node_id);
|
||||
|
||||
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned int len, unsigned int offset,
|
||||
unsigned int max_sectors, bool *same_page);
|
||||
|
@ -309,7 +309,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
|
||||
if (!passthrough && sectors < bio_sectors(*bio_orig)) {
|
||||
bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
|
||||
bio_chain(bio, *bio_orig);
|
||||
generic_make_request(*bio_orig);
|
||||
submit_bio_noacct(*bio_orig);
|
||||
*bio_orig = bio;
|
||||
}
|
||||
bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
|
||||
|
@ -181,9 +181,12 @@ EXPORT_SYMBOL_GPL(bsg_job_get);
|
||||
void bsg_job_done(struct bsg_job *job, int result,
|
||||
unsigned int reply_payload_rcv_len)
|
||||
{
|
||||
struct request *rq = blk_mq_rq_from_pdu(job);
|
||||
|
||||
job->result = result;
|
||||
job->reply_payload_rcv_len = reply_payload_rcv_len;
|
||||
blk_mq_complete_request(blk_mq_rq_from_pdu(job));
|
||||
if (likely(!blk_should_fake_timeout(rq->q)))
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_job_done);
|
||||
|
||||
|
@ -95,8 +95,8 @@ static inline bool elv_support_features(unsigned int elv_features,
|
||||
* @name: Elevator name to test
|
||||
* @required_features: Features that the elevator must provide
|
||||
*
|
||||
* Return true is the elevator @e name matches @name and if @e provides all the
|
||||
* the feratures spcified by @required_features.
|
||||
* Return true if the elevator @e name matches @name and if @e provides all
|
||||
* the features specified by @required_features.
|
||||
*/
|
||||
static bool elevator_match(const struct elevator_type *e, const char *name,
|
||||
unsigned int required_features)
|
||||
|
@ -38,8 +38,6 @@ static struct kobject *block_depr;
|
||||
static DEFINE_SPINLOCK(ext_devt_lock);
|
||||
static DEFINE_IDR(ext_devt_idr);
|
||||
|
||||
static const struct device_type disk_type;
|
||||
|
||||
static void disk_check_events(struct disk_events *ev,
|
||||
unsigned int *clearing_ptr);
|
||||
static void disk_alloc_events(struct gendisk *disk);
|
||||
@ -876,11 +874,32 @@ static void invalidate_partition(struct gendisk *disk, int partno)
|
||||
bdput(bdev);
|
||||
}
|
||||
|
||||
/**
|
||||
* del_gendisk - remove the gendisk
|
||||
* @disk: the struct gendisk to remove
|
||||
*
|
||||
* Removes the gendisk and all its associated resources. This deletes the
|
||||
* partitions associated with the gendisk, and unregisters the associated
|
||||
* request_queue.
|
||||
*
|
||||
* This is the counter to the respective __device_add_disk() call.
|
||||
*
|
||||
* The final removal of the struct gendisk happens when its refcount reaches 0
|
||||
* with put_disk(), which should be called after del_gendisk(), if
|
||||
* __device_add_disk() was used.
|
||||
*
|
||||
* Drivers exist which depend on the release of the gendisk to be synchronous,
|
||||
* it should not be deferred.
|
||||
*
|
||||
* Context: can sleep
|
||||
*/
|
||||
void del_gendisk(struct gendisk *disk)
|
||||
{
|
||||
struct disk_part_iter piter;
|
||||
struct hd_struct *part;
|
||||
|
||||
might_sleep();
|
||||
|
||||
blk_integrity_del(disk);
|
||||
disk_del_events(disk);
|
||||
|
||||
@ -971,11 +990,15 @@ static ssize_t disk_badblocks_store(struct device *dev,
|
||||
*
|
||||
* This function gets the structure containing partitioning
|
||||
* information for the given device @devt.
|
||||
*
|
||||
* Context: can sleep
|
||||
*/
|
||||
struct gendisk *get_gendisk(dev_t devt, int *partno)
|
||||
{
|
||||
struct gendisk *disk = NULL;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
|
||||
struct kobject *kobj;
|
||||
|
||||
@ -1514,10 +1537,31 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_release - releases all allocated resources of the gendisk
|
||||
* @dev: the device representing this disk
|
||||
*
|
||||
* This function releases all allocated resources of the gendisk.
|
||||
*
|
||||
* The struct gendisk refcount is incremented with get_gendisk() or
|
||||
* get_disk_and_module(), and its refcount is decremented with
|
||||
* put_disk_and_module() or put_disk(). Once the refcount reaches 0 this
|
||||
* function is called.
|
||||
*
|
||||
* Drivers which used __device_add_disk() have a gendisk with a request_queue
|
||||
* assigned. Since the request_queue sits on top of the gendisk for these
|
||||
* drivers we also call blk_put_queue() for them, and we expect the
|
||||
* request_queue refcount to reach 0 at this point, and so the request_queue
|
||||
* will also be freed prior to the disk.
|
||||
*
|
||||
* Context: can sleep
|
||||
*/
|
||||
static void disk_release(struct device *dev)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
might_sleep();
|
||||
|
||||
blk_free_devt(dev->devt);
|
||||
disk_release_events(disk);
|
||||
kfree(disk->random);
|
||||
@ -1541,7 +1585,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const struct device_type disk_type = {
|
||||
const struct device_type disk_type = {
|
||||
.name = "disk",
|
||||
.groups = disk_attr_groups,
|
||||
.release = disk_release,
|
||||
@ -1727,6 +1771,15 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
|
||||
}
|
||||
EXPORT_SYMBOL(__alloc_disk_node);
|
||||
|
||||
/**
|
||||
* get_disk_and_module - increments the gendisk and gendisk fops module refcount
|
||||
* @disk: the struct gendisk to increment the refcount for
|
||||
*
|
||||
* This increments the refcount for the struct gendisk, and the gendisk's
|
||||
* fops module owner.
|
||||
*
|
||||
* Context: Any context.
|
||||
*/
|
||||
struct kobject *get_disk_and_module(struct gendisk *disk)
|
||||
{
|
||||
struct module *owner;
|
||||
@ -1747,6 +1800,16 @@ struct kobject *get_disk_and_module(struct gendisk *disk)
|
||||
}
|
||||
EXPORT_SYMBOL(get_disk_and_module);
|
||||
|
||||
/**
|
||||
* put_disk - decrements the gendisk refcount
|
||||
* @disk: the struct gendisk to decrement the refcount for
|
||||
*
|
||||
* This decrements the refcount for the struct gendisk. When this reaches 0
|
||||
* we'll have disk_release() called.
|
||||
*
|
||||
* Context: Any context, but the last reference must not be dropped from
|
||||
* atomic context.
|
||||
*/
|
||||
void put_disk(struct gendisk *disk)
|
||||
{
|
||||
if (disk)
|
||||
@ -1754,9 +1817,15 @@ void put_disk(struct gendisk *disk)
|
||||
}
|
||||
EXPORT_SYMBOL(put_disk);
|
||||
|
||||
/*
|
||||
/**
|
||||
* put_disk_and_module - decrements the module and gendisk refcount
|
||||
* @disk: the struct gendisk to decrement the refcount for
|
||||
*
|
||||
* This is a counterpart of get_disk_and_module() and thus also of
|
||||
* get_gendisk().
|
||||
*
|
||||
* Context: Any context, but the last reference must not be dropped from
|
||||
* atomic context.
|
||||
*/
|
||||
void put_disk_and_module(struct gendisk *disk)
|
||||
{
|
||||
@ -1985,18 +2054,12 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
|
||||
*/
|
||||
unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
|
||||
{
|
||||
const struct block_device_operations *bdops = disk->fops;
|
||||
struct disk_events *ev = disk->ev;
|
||||
unsigned int pending;
|
||||
unsigned int clearing = mask;
|
||||
|
||||
if (!ev) {
|
||||
/* for drivers still using the old ->media_changed method */
|
||||
if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
|
||||
bdops->media_changed && bdops->media_changed(disk))
|
||||
return DISK_EVENT_MEDIA_CHANGE;
|
||||
if (!ev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
disk_block_events(disk);
|
||||
|
||||
|
@ -619,8 +619,6 @@ int blk_drop_partitions(struct block_device *bdev)
|
||||
struct disk_part_iter piter;
|
||||
struct hd_struct *part;
|
||||
|
||||
if (!disk_part_scan_enabled(bdev->bd_disk))
|
||||
return 0;
|
||||
if (bdev->bd_part_count)
|
||||
return -EBUSY;
|
||||
|
||||
|
@ -282,7 +282,7 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t brd_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct brd_device *brd = bio->bi_disk->private_data;
|
||||
struct bio_vec bvec;
|
||||
@ -330,6 +330,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
|
||||
|
||||
static const struct block_device_operations brd_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = brd_submit_bio,
|
||||
.rw_page = brd_rw_page,
|
||||
};
|
||||
|
||||
@ -381,7 +382,7 @@ static struct brd_device *brd_alloc(int i)
|
||||
spin_lock_init(&brd->brd_lock);
|
||||
INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
|
||||
|
||||
brd->brd_queue = blk_alloc_queue(brd_make_request, NUMA_NO_NODE);
|
||||
brd->brd_queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!brd->brd_queue)
|
||||
goto out_free_dev;
|
||||
|
||||
|
@ -1451,7 +1451,7 @@ extern void conn_free_crypto(struct drbd_connection *connection);
|
||||
/* drbd_req */
|
||||
extern void do_submit(struct work_struct *ws);
|
||||
extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long);
|
||||
extern blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio);
|
||||
extern blk_qc_t drbd_submit_bio(struct bio *bio);
|
||||
extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req);
|
||||
extern int is_valid_ar_handle(struct drbd_request *, sector_t);
|
||||
|
||||
@ -1576,12 +1576,12 @@ void drbd_set_my_capacity(struct drbd_device *device, sector_t size);
|
||||
/*
|
||||
* used to submit our private bio
|
||||
*/
|
||||
static inline void drbd_generic_make_request(struct drbd_device *device,
|
||||
static inline void drbd_submit_bio_noacct(struct drbd_device *device,
|
||||
int fault_type, struct bio *bio)
|
||||
{
|
||||
__release(local);
|
||||
if (!bio->bi_disk) {
|
||||
drbd_err(device, "drbd_generic_make_request: bio->bi_disk == NULL\n");
|
||||
drbd_err(device, "drbd_submit_bio_noacct: bio->bi_disk == NULL\n");
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
bio_endio(bio);
|
||||
return;
|
||||
@ -1590,7 +1590,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
|
||||
if (drbd_insert_fault(device, fault_type))
|
||||
bio_io_error(bio);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
|
||||
void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
|
||||
|
@ -132,9 +132,10 @@ wait_queue_head_t drbd_pp_wait;
|
||||
DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
|
||||
|
||||
static const struct block_device_operations drbd_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = drbd_open,
|
||||
.release = drbd_release,
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = drbd_submit_bio,
|
||||
.open = drbd_open,
|
||||
.release = drbd_release,
|
||||
};
|
||||
|
||||
struct bio *bio_alloc_drbd(gfp_t gfp_mask)
|
||||
@ -2324,7 +2325,7 @@ static void do_retry(struct work_struct *ws)
|
||||
* workqueues instead.
|
||||
*/
|
||||
|
||||
/* We are not just doing generic_make_request(),
|
||||
/* We are not just doing submit_bio_noacct(),
|
||||
* as we want to keep the start_time information. */
|
||||
inc_ap_bio(device);
|
||||
__drbd_make_request(device, bio, start_jif);
|
||||
@ -2414,62 +2415,6 @@ static void drbd_cleanup(void)
|
||||
pr_info("module cleanup done.\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_congested() - Callback for the flusher thread
|
||||
* @congested_data: User data
|
||||
* @bdi_bits: Bits the BDI flusher thread is currently interested in
|
||||
*
|
||||
* Returns 1<<WB_async_congested and/or 1<<WB_sync_congested if we are congested.
|
||||
*/
|
||||
static int drbd_congested(void *congested_data, int bdi_bits)
|
||||
{
|
||||
struct drbd_device *device = congested_data;
|
||||
struct request_queue *q;
|
||||
char reason = '-';
|
||||
int r = 0;
|
||||
|
||||
if (!may_inc_ap_bio(device)) {
|
||||
/* DRBD has frozen IO */
|
||||
r = bdi_bits;
|
||||
reason = 'd';
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (test_bit(CALLBACK_PENDING, &first_peer_device(device)->connection->flags)) {
|
||||
r |= (1 << WB_async_congested);
|
||||
/* Without good local data, we would need to read from remote,
|
||||
* and that would need the worker thread as well, which is
|
||||
* currently blocked waiting for that usermode helper to
|
||||
* finish.
|
||||
*/
|
||||
if (!get_ldev_if_state(device, D_UP_TO_DATE))
|
||||
r |= (1 << WB_sync_congested);
|
||||
else
|
||||
put_ldev(device);
|
||||
r &= bdi_bits;
|
||||
reason = 'c';
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (get_ldev(device)) {
|
||||
q = bdev_get_queue(device->ldev->backing_bdev);
|
||||
r = bdi_congested(q->backing_dev_info, bdi_bits);
|
||||
put_ldev(device);
|
||||
if (r)
|
||||
reason = 'b';
|
||||
}
|
||||
|
||||
if (bdi_bits & (1 << WB_async_congested) &&
|
||||
test_bit(NET_CONGESTED, &first_peer_device(device)->connection->flags)) {
|
||||
r |= (1 << WB_async_congested);
|
||||
reason = reason == 'b' ? 'a' : 'n';
|
||||
}
|
||||
|
||||
out:
|
||||
device->congestion_reason = reason;
|
||||
return r;
|
||||
}
|
||||
|
||||
static void drbd_init_workqueue(struct drbd_work_queue* wq)
|
||||
{
|
||||
spin_lock_init(&wq->q_lock);
|
||||
@ -2801,11 +2746,10 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
|
||||
drbd_init_set_defaults(device);
|
||||
|
||||
q = blk_alloc_queue(drbd_make_request, NUMA_NO_NODE);
|
||||
q = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!q)
|
||||
goto out_no_q;
|
||||
device->rq_queue = q;
|
||||
q->queuedata = device;
|
||||
|
||||
disk = alloc_disk(1);
|
||||
if (!disk)
|
||||
@ -2825,9 +2769,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
/* we have no partitions. we contain only ourselves. */
|
||||
device->this_bdev->bd_contains = device->this_bdev;
|
||||
|
||||
q->backing_dev_info->congested_fn = drbd_congested;
|
||||
q->backing_dev_info->congested_data = device;
|
||||
|
||||
blk_queue_write_cache(q, true, true);
|
||||
/* Setting the max_hw_sectors to an odd value of 8kibyte here
|
||||
This triggers a max_bio_size message upon first attach or connect */
|
||||
|
@ -265,7 +265,6 @@ int drbd_seq_show(struct seq_file *seq, void *v)
|
||||
seq_printf(seq, "%2d: cs:Unconfigured\n", i);
|
||||
} else {
|
||||
/* reset device->congestion_reason */
|
||||
bdi_rw_congested(device->rq_queue->backing_dev_info);
|
||||
|
||||
nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
|
||||
wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
|
||||
|
@ -1723,7 +1723,7 @@ next_bio:
|
||||
bios = bios->bi_next;
|
||||
bio->bi_next = NULL;
|
||||
|
||||
drbd_generic_make_request(device, fault_type, bio);
|
||||
drbd_submit_bio_noacct(device, fault_type, bio);
|
||||
} while (bios);
|
||||
return 0;
|
||||
|
||||
|
@ -1164,7 +1164,7 @@ drbd_submit_req_private_bio(struct drbd_request *req)
|
||||
else if (bio_op(bio) == REQ_OP_DISCARD)
|
||||
drbd_process_discard_or_zeroes_req(req, EE_TRIM);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
put_ldev(device);
|
||||
} else
|
||||
bio_io_error(bio);
|
||||
@ -1593,12 +1593,12 @@ void do_submit(struct work_struct *ws)
|
||||
}
|
||||
}
|
||||
|
||||
blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
|
||||
blk_qc_t drbd_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct drbd_device *device = (struct drbd_device *) q->queuedata;
|
||||
struct drbd_device *device = bio->bi_disk->private_data;
|
||||
unsigned long start_jif;
|
||||
|
||||
blk_queue_split(q, &bio);
|
||||
blk_queue_split(&bio);
|
||||
|
||||
start_jif = jiffies;
|
||||
|
||||
|
@ -1525,7 +1525,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
|
||||
|
||||
drbd_req_make_private_bio(req, req->master_bio);
|
||||
bio_set_dev(req->private_bio, device->ldev->backing_bdev);
|
||||
generic_make_request(req->private_bio);
|
||||
submit_bio_noacct(req->private_bio);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -4205,7 +4205,6 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
|
||||
struct bio_vec bio_vec;
|
||||
struct page *page;
|
||||
struct rb0_cbdata cbdata;
|
||||
size_t size;
|
||||
|
||||
page = alloc_page(GFP_NOIO);
|
||||
if (!page) {
|
||||
@ -4213,15 +4212,11 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
size = bdev->bd_block_size;
|
||||
if (!size)
|
||||
size = 1024;
|
||||
|
||||
cbdata.drive = drive;
|
||||
|
||||
bio_init(&bio, &bio_vec, 1);
|
||||
bio_set_dev(&bio, bdev);
|
||||
bio_add_page(&bio, page, size, 0);
|
||||
bio_add_page(&bio, page, block_size(bdev), 0);
|
||||
|
||||
bio.bi_iter.bi_sector = 0;
|
||||
bio.bi_flags |= (1 << BIO_QUIET);
|
||||
|
@ -509,7 +509,8 @@ static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
|
||||
return;
|
||||
kfree(cmd->bvec);
|
||||
cmd->bvec = NULL;
|
||||
blk_mq_complete_request(rq);
|
||||
if (likely(!blk_should_fake_timeout(rq->q)))
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
|
||||
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
|
||||
@ -1089,11 +1090,10 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||
* here to avoid changing device under exclusive owner.
|
||||
*/
|
||||
if (!(mode & FMODE_EXCL)) {
|
||||
claimed_bdev = bd_start_claiming(bdev, loop_configure);
|
||||
if (IS_ERR(claimed_bdev)) {
|
||||
error = PTR_ERR(claimed_bdev);
|
||||
claimed_bdev = bdev->bd_contains;
|
||||
error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure);
|
||||
if (error)
|
||||
goto out_putf;
|
||||
}
|
||||
}
|
||||
|
||||
error = mutex_lock_killable(&loop_ctl_mutex);
|
||||
@ -2048,7 +2048,8 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
|
||||
cmd->ret = ret;
|
||||
else
|
||||
cmd->ret = ret ? -EIO : 0;
|
||||
blk_mq_complete_request(rq);
|
||||
if (likely(!blk_should_fake_timeout(rq->q)))
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2402,6 +2403,8 @@ static void __exit loop_exit(void)
|
||||
|
||||
range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
|
||||
|
||||
mutex_lock(&loop_ctl_mutex);
|
||||
|
||||
idr_for_each(&loop_index_idr, &loop_exit_cb, NULL);
|
||||
idr_destroy(&loop_index_idr);
|
||||
|
||||
@ -2409,6 +2412,8 @@ static void __exit loop_exit(void)
|
||||
unregister_blkdev(LOOP_MAJOR, "loop");
|
||||
|
||||
misc_deregister(&loop_misc);
|
||||
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
}
|
||||
|
||||
module_init(loop_init);
|
||||
|
@ -492,7 +492,8 @@ static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
|
||||
struct request *req = blk_mq_rq_from_pdu(cmd);
|
||||
|
||||
cmd->status = status;
|
||||
blk_mq_complete_request(req);
|
||||
if (likely(!blk_should_fake_timeout(req->q)))
|
||||
blk_mq_complete_request(req);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -784,6 +784,7 @@ static void recv_work(struct work_struct *work)
|
||||
struct nbd_device *nbd = args->nbd;
|
||||
struct nbd_config *config = nbd->config;
|
||||
struct nbd_cmd *cmd;
|
||||
struct request *rq;
|
||||
|
||||
while (1) {
|
||||
cmd = nbd_read_stat(nbd, args->index);
|
||||
@ -796,7 +797,9 @@ static void recv_work(struct work_struct *work)
|
||||
break;
|
||||
}
|
||||
|
||||
blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
|
||||
rq = blk_mq_rq_from_pdu(cmd);
|
||||
if (likely(!blk_should_fake_timeout(rq->q)))
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
atomic_dec(&config->recv_threads);
|
||||
wake_up(&config->recv_wq);
|
||||
|
@ -1283,7 +1283,8 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
|
||||
case NULL_IRQ_SOFTIRQ:
|
||||
switch (cmd->nq->dev->queue_mode) {
|
||||
case NULL_Q_MQ:
|
||||
blk_mq_complete_request(cmd->rq);
|
||||
if (likely(!blk_should_fake_timeout(cmd->rq->q)))
|
||||
blk_mq_complete_request(cmd->rq);
|
||||
break;
|
||||
case NULL_Q_BIO:
|
||||
/*
|
||||
@ -1387,11 +1388,11 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
|
||||
return &nullb->queues[index];
|
||||
}
|
||||
|
||||
static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t null_submit_bio(struct bio *bio)
|
||||
{
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
sector_t nr_sectors = bio_sectors(bio);
|
||||
struct nullb *nullb = q->queuedata;
|
||||
struct nullb *nullb = bio->bi_disk->private_data;
|
||||
struct nullb_queue *nq = nullb_to_queue(nullb);
|
||||
struct nullb_cmd *cmd;
|
||||
|
||||
@ -1423,7 +1424,7 @@ static bool should_requeue_request(struct request *rq)
|
||||
static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
|
||||
{
|
||||
pr_info("rq %p timed out\n", rq);
|
||||
blk_mq_force_complete_rq(rq);
|
||||
blk_mq_complete_request(rq);
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
|
||||
@ -1574,7 +1575,13 @@ static void null_config_discard(struct nullb *nullb)
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
|
||||
}
|
||||
|
||||
static const struct block_device_operations null_ops = {
|
||||
static const struct block_device_operations null_bio_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = null_submit_bio,
|
||||
.report_zones = null_report_zones,
|
||||
};
|
||||
|
||||
static const struct block_device_operations null_rq_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.report_zones = null_report_zones,
|
||||
};
|
||||
@ -1646,7 +1653,10 @@ static int null_gendisk_register(struct nullb *nullb)
|
||||
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
|
||||
disk->major = null_major;
|
||||
disk->first_minor = nullb->index;
|
||||
disk->fops = &null_ops;
|
||||
if (queue_is_mq(nullb->q))
|
||||
disk->fops = &null_rq_ops;
|
||||
else
|
||||
disk->fops = &null_bio_ops;
|
||||
disk->private_data = nullb;
|
||||
disk->queue = nullb->q;
|
||||
strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
|
||||
@ -1791,7 +1801,7 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
goto out_cleanup_tags;
|
||||
}
|
||||
} else if (dev->queue_mode == NULL_Q_BIO) {
|
||||
nullb->q = blk_alloc_queue(null_queue_bio, dev->home_node);
|
||||
nullb->q = blk_alloc_queue(dev->home_node);
|
||||
if (!nullb->q) {
|
||||
rv = -ENOMEM;
|
||||
goto out_cleanup_queues;
|
||||
|
@ -36,7 +36,7 @@
|
||||
* block device, assembling the pieces to full packets and queuing them to the
|
||||
* packet I/O scheduler.
|
||||
*
|
||||
* At the top layer there is a custom make_request_fn function that forwards
|
||||
* At the top layer there is a custom ->submit_bio function that forwards
|
||||
* read requests directly to the iosched queue and puts write requests in the
|
||||
* unaligned write queue. A kernel thread performs the necessary read
|
||||
* gathering to convert the unaligned writes to aligned writes and then feeds
|
||||
@ -913,7 +913,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
|
||||
}
|
||||
|
||||
atomic_inc(&pd->cdrw.pending_bios);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2428,15 +2428,15 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t pkt_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct pktcdvd_device *pd;
|
||||
char b[BDEVNAME_SIZE];
|
||||
struct bio *split;
|
||||
|
||||
blk_queue_split(q, &bio);
|
||||
blk_queue_split(&bio);
|
||||
|
||||
pd = q->queuedata;
|
||||
pd = bio->bi_disk->queue->queuedata;
|
||||
if (!pd) {
|
||||
pr_err("%s incorrect request queue\n", bio_devname(bio, b));
|
||||
goto end_io;
|
||||
@ -2480,7 +2480,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
|
||||
split = bio;
|
||||
}
|
||||
|
||||
pkt_make_request_write(q, split);
|
||||
pkt_make_request_write(bio->bi_disk->queue, split);
|
||||
} while (split != bio);
|
||||
|
||||
return BLK_QC_T_NONE;
|
||||
@ -2685,6 +2685,7 @@ static char *pkt_devnode(struct gendisk *disk, umode_t *mode)
|
||||
|
||||
static const struct block_device_operations pktcdvd_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = pkt_submit_bio,
|
||||
.open = pkt_open,
|
||||
.release = pkt_close,
|
||||
.ioctl = pkt_ioctl,
|
||||
@ -2749,7 +2750,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
|
||||
disk->flags = GENHD_FL_REMOVABLE;
|
||||
strcpy(disk->disk_name, pd->name);
|
||||
disk->private_data = pd;
|
||||
disk->queue = blk_alloc_queue(pkt_make_request, NUMA_NO_NODE);
|
||||
disk->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!disk->queue)
|
||||
goto out_mem2;
|
||||
|
||||
|
@ -90,12 +90,6 @@ struct ps3vram_priv {
|
||||
|
||||
static int ps3vram_major;
|
||||
|
||||
|
||||
static const struct block_device_operations ps3vram_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
|
||||
#define DMA_NOTIFIER_HANDLE_BASE 0x66604200 /* first DMA notifier handle */
|
||||
#define DMA_NOTIFIER_OFFSET_BASE 0x1000 /* first DMA notifier offset */
|
||||
#define DMA_NOTIFIER_SIZE 0x40
|
||||
@ -585,15 +579,15 @@ out:
|
||||
return next;
|
||||
}
|
||||
|
||||
static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t ps3vram_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct ps3_system_bus_device *dev = q->queuedata;
|
||||
struct ps3_system_bus_device *dev = bio->bi_disk->private_data;
|
||||
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
|
||||
int busy;
|
||||
|
||||
dev_dbg(&dev->core, "%s\n", __func__);
|
||||
|
||||
blk_queue_split(q, &bio);
|
||||
blk_queue_split(&bio);
|
||||
|
||||
spin_lock_irq(&priv->lock);
|
||||
busy = !bio_list_empty(&priv->list);
|
||||
@ -610,6 +604,11 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
static const struct block_device_operations ps3vram_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = ps3vram_submit_bio,
|
||||
};
|
||||
|
||||
static int ps3vram_probe(struct ps3_system_bus_device *dev)
|
||||
{
|
||||
struct ps3vram_priv *priv;
|
||||
@ -737,7 +736,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
|
||||
|
||||
ps3vram_proc_init(dev);
|
||||
|
||||
queue = blk_alloc_queue(ps3vram_make_request, NUMA_NO_NODE);
|
||||
queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!queue) {
|
||||
dev_err(&dev->core, "blk_alloc_queue failed\n");
|
||||
error = -ENOMEM;
|
||||
@ -745,7 +744,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
|
||||
}
|
||||
|
||||
priv->queue = queue;
|
||||
queue->queuedata = dev;
|
||||
blk_queue_max_segments(queue, BLK_MAX_SEGMENTS);
|
||||
blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
|
||||
blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS);
|
||||
|
@ -50,6 +50,8 @@ struct rsxx_bio_meta {
|
||||
|
||||
static struct kmem_cache *bio_meta_pool;
|
||||
|
||||
static blk_qc_t rsxx_submit_bio(struct bio *bio);
|
||||
|
||||
/*----------------- Block Device Operations -----------------*/
|
||||
static int rsxx_blkdev_ioctl(struct block_device *bdev,
|
||||
fmode_t mode,
|
||||
@ -92,6 +94,7 @@ static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||
|
||||
static const struct block_device_operations rsxx_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = rsxx_submit_bio,
|
||||
.getgeo = rsxx_getgeo,
|
||||
.ioctl = rsxx_blkdev_ioctl,
|
||||
};
|
||||
@ -117,13 +120,13 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
|
||||
}
|
||||
}
|
||||
|
||||
static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t rsxx_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct rsxx_cardinfo *card = q->queuedata;
|
||||
struct rsxx_cardinfo *card = bio->bi_disk->private_data;
|
||||
struct rsxx_bio_meta *bio_meta;
|
||||
blk_status_t st = BLK_STS_IOERR;
|
||||
|
||||
blk_queue_split(q, &bio);
|
||||
blk_queue_split(&bio);
|
||||
|
||||
might_sleep();
|
||||
|
||||
@ -233,7 +236,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
card->queue = blk_alloc_queue(rsxx_make_request, NUMA_NO_NODE);
|
||||
card->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!card->queue) {
|
||||
dev_err(CARD_TO_DEV(card), "Failed queue alloc\n");
|
||||
unregister_blkdev(card->major, DRIVER_NAME);
|
||||
@ -267,8 +270,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
|
||||
card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE;
|
||||
}
|
||||
|
||||
card->queue->queuedata = card;
|
||||
|
||||
snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name),
|
||||
"rsxx%d", card->disk_id);
|
||||
card->gendisk->major = card->major;
|
||||
@ -289,7 +290,6 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card)
|
||||
card->gendisk = NULL;
|
||||
|
||||
blk_cleanup_queue(card->queue);
|
||||
card->queue->queuedata = NULL;
|
||||
unregister_blkdev(card->major, DRIVER_NAME);
|
||||
}
|
||||
|
||||
|
@ -1417,7 +1417,8 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
|
||||
case SKD_CHECK_STATUS_REPORT_GOOD:
|
||||
case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
|
||||
skreq->status = BLK_STS_OK;
|
||||
blk_mq_complete_request(req);
|
||||
if (likely(!blk_should_fake_timeout(req->q)))
|
||||
blk_mq_complete_request(req);
|
||||
break;
|
||||
|
||||
case SKD_CHECK_STATUS_BUSY_IMMINENT:
|
||||
@ -1440,7 +1441,8 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
|
||||
case SKD_CHECK_STATUS_REPORT_ERROR:
|
||||
default:
|
||||
skreq->status = BLK_STS_IOERR;
|
||||
blk_mq_complete_request(req);
|
||||
if (likely(!blk_should_fake_timeout(req->q)))
|
||||
blk_mq_complete_request(req);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1560,7 +1562,8 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
|
||||
*/
|
||||
if (likely(cmp_status == SAM_STAT_GOOD)) {
|
||||
skreq->status = BLK_STS_OK;
|
||||
blk_mq_complete_request(rq);
|
||||
if (likely(!blk_should_fake_timeout(rq->q)))
|
||||
blk_mq_complete_request(rq);
|
||||
} else {
|
||||
skd_resolve_req_exception(skdev, skreq, rq);
|
||||
}
|
||||
|
@ -519,14 +519,15 @@ static int mm_check_plugged(struct cardinfo *card)
|
||||
return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb));
|
||||
}
|
||||
|
||||
static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t mm_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct cardinfo *card = q->queuedata;
|
||||
struct cardinfo *card = bio->bi_disk->private_data;
|
||||
|
||||
pr_debug("mm_make_request %llu %u\n",
|
||||
(unsigned long long)bio->bi_iter.bi_sector,
|
||||
bio->bi_iter.bi_size);
|
||||
|
||||
blk_queue_split(q, &bio);
|
||||
blk_queue_split(&bio);
|
||||
|
||||
spin_lock_irq(&card->lock);
|
||||
*card->biotail = bio;
|
||||
@ -778,6 +779,7 @@ static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||
|
||||
static const struct block_device_operations mm_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = mm_submit_bio,
|
||||
.getgeo = mm_getgeo,
|
||||
.revalidate_disk = mm_revalidate,
|
||||
};
|
||||
@ -885,10 +887,9 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
|
||||
card->biotail = &card->bio;
|
||||
spin_lock_init(&card->lock);
|
||||
|
||||
card->queue = blk_alloc_queue(mm_make_request, NUMA_NO_NODE);
|
||||
card->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!card->queue)
|
||||
goto failed_alloc;
|
||||
card->queue->queuedata = card;
|
||||
|
||||
tasklet_init(&card->tasklet, process_page, (unsigned long)card);
|
||||
|
||||
|
@ -171,7 +171,8 @@ static void virtblk_done(struct virtqueue *vq)
|
||||
while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
|
||||
struct request *req = blk_mq_rq_from_pdu(vbr);
|
||||
|
||||
blk_mq_complete_request(req);
|
||||
if (likely(!blk_should_fake_timeout(req->q)))
|
||||
blk_mq_complete_request(req);
|
||||
req_done = true;
|
||||
}
|
||||
if (unlikely(virtqueue_is_broken(vq)))
|
||||
|
@ -1655,7 +1655,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||
BUG();
|
||||
}
|
||||
|
||||
blk_mq_complete_request(req);
|
||||
if (likely(!blk_should_fake_timeout(req->q)))
|
||||
blk_mq_complete_request(req);
|
||||
}
|
||||
|
||||
rinfo->ring.rsp_cons = i;
|
||||
|
@ -793,9 +793,9 @@ static void zram_sync_read(struct work_struct *work)
|
||||
}
|
||||
|
||||
/*
|
||||
* Block layer want one ->make_request_fn to be active at a time
|
||||
* so if we use chained IO with parent IO in same context,
|
||||
* it's a deadlock. To avoid, it, it uses worker thread context.
|
||||
* Block layer want one ->submit_bio to be active at a time, so if we use
|
||||
* chained IO with parent IO in same context, it's a deadlock. To avoid that,
|
||||
* use a worker thread context.
|
||||
*/
|
||||
static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
|
||||
unsigned long entry, struct bio *bio)
|
||||
@ -1584,9 +1584,9 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
|
||||
/*
|
||||
* Handler function for all zram I/O requests.
|
||||
*/
|
||||
static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
|
||||
static blk_qc_t zram_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct zram *zram = queue->queuedata;
|
||||
struct zram *zram = bio->bi_disk->private_data;
|
||||
|
||||
if (!valid_io_request(zram, bio->bi_iter.bi_sector,
|
||||
bio->bi_iter.bi_size)) {
|
||||
@ -1813,6 +1813,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode)
|
||||
|
||||
static const struct block_device_operations zram_devops = {
|
||||
.open = zram_open,
|
||||
.submit_bio = zram_submit_bio,
|
||||
.swap_slot_free_notify = zram_slot_free_notify,
|
||||
.rw_page = zram_rw_page,
|
||||
.owner = THIS_MODULE
|
||||
@ -1891,7 +1892,7 @@ static int zram_add(void)
|
||||
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||
spin_lock_init(&zram->wb_limit_lock);
|
||||
#endif
|
||||
queue = blk_alloc_queue(zram_make_request, NUMA_NO_NODE);
|
||||
queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!queue) {
|
||||
pr_err("Error allocating disk queue for device %d\n",
|
||||
device_id);
|
||||
@ -1912,7 +1913,6 @@ static int zram_add(void)
|
||||
zram->disk->first_minor = device_id;
|
||||
zram->disk->fops = &zram_devops;
|
||||
zram->disk->queue = queue;
|
||||
zram->disk->queue->queuedata = zram;
|
||||
zram->disk->private_data = zram;
|
||||
snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
|
||||
|
||||
|
@ -605,7 +605,7 @@ int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi)
|
||||
disk->cdi = cdi;
|
||||
|
||||
ENSURE(cdo, drive_status, CDC_DRIVE_STATUS);
|
||||
if (cdo->check_events == NULL && cdo->media_changed == NULL)
|
||||
if (cdo->check_events == NULL)
|
||||
WARN_ON_ONCE(cdo->capability & (CDC_MEDIA_CHANGED | CDC_SELECT_DISC));
|
||||
ENSURE(cdo, tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY);
|
||||
ENSURE(cdo, lock_door, CDC_LOCK);
|
||||
@ -1419,8 +1419,6 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
|
||||
|
||||
if (cdi->ops->check_events)
|
||||
cdi->ops->check_events(cdi, 0, slot);
|
||||
else
|
||||
cdi->ops->media_changed(cdi, slot);
|
||||
|
||||
if (slot == CDSL_NONE) {
|
||||
/* set media changed bits, on both queues */
|
||||
@ -1517,13 +1515,10 @@ int media_changed(struct cdrom_device_info *cdi, int queue)
|
||||
return ret;
|
||||
|
||||
/* changed since last call? */
|
||||
if (cdi->ops->check_events) {
|
||||
BUG_ON(!queue); /* shouldn't be called from VFS path */
|
||||
cdrom_update_events(cdi, DISK_EVENT_MEDIA_CHANGE);
|
||||
changed = cdi->ioctl_events & DISK_EVENT_MEDIA_CHANGE;
|
||||
cdi->ioctl_events = 0;
|
||||
} else
|
||||
changed = cdi->ops->media_changed(cdi, CDSL_CURRENT);
|
||||
BUG_ON(!queue); /* shouldn't be called from VFS path */
|
||||
cdrom_update_events(cdi, DISK_EVENT_MEDIA_CHANGE);
|
||||
changed = cdi->ioctl_events & DISK_EVENT_MEDIA_CHANGE;
|
||||
cdi->ioctl_events = 0;
|
||||
|
||||
if (changed) {
|
||||
cdi->mc_flags = 0x3; /* set bit on both queues */
|
||||
@ -1535,18 +1530,6 @@ int media_changed(struct cdrom_device_info *cdi, int queue)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cdrom_media_changed(struct cdrom_device_info *cdi)
|
||||
{
|
||||
/* This talks to the VFS, which doesn't like errors - just 1 or 0.
|
||||
* Returning "0" is always safe (media hasn't been changed). Do that
|
||||
* if the low-level cdrom driver dosn't support media changed. */
|
||||
if (cdi == NULL || cdi->ops->media_changed == NULL)
|
||||
return 0;
|
||||
if (!CDROM_CAN(CDC_MEDIA_CHANGED))
|
||||
return 0;
|
||||
return media_changed(cdi, 0);
|
||||
}
|
||||
|
||||
/* Requests to the low-level drivers will /always/ be done in the
|
||||
following format convention:
|
||||
|
||||
@ -3464,7 +3447,6 @@ EXPORT_SYMBOL(unregister_cdrom);
|
||||
EXPORT_SYMBOL(cdrom_open);
|
||||
EXPORT_SYMBOL(cdrom_release);
|
||||
EXPORT_SYMBOL(cdrom_ioctl);
|
||||
EXPORT_SYMBOL(cdrom_media_changed);
|
||||
EXPORT_SYMBOL(cdrom_number_of_slots);
|
||||
EXPORT_SYMBOL(cdrom_mode_select);
|
||||
EXPORT_SYMBOL(cdrom_mode_sense);
|
||||
|
@ -59,7 +59,7 @@ EXPORT_SYMBOL(bdev_dax_pgoff);
|
||||
#if IS_ENABLED(CONFIG_FS_DAX)
|
||||
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
|
||||
{
|
||||
if (!blk_queue_dax(bdev->bd_queue))
|
||||
if (!blk_queue_dax(bdev->bd_disk->queue))
|
||||
return NULL;
|
||||
return dax_get_by_host(bdev->bd_disk->disk_name);
|
||||
}
|
||||
|
@ -236,10 +236,6 @@ err_dev:
|
||||
return tgt_dev;
|
||||
}
|
||||
|
||||
static const struct block_device_operations nvm_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
static struct nvm_tgt_type *__nvm_find_target_type(const char *name)
|
||||
{
|
||||
struct nvm_tgt_type *tt;
|
||||
@ -380,7 +376,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
|
||||
goto err_dev;
|
||||
}
|
||||
|
||||
tqueue = blk_alloc_queue(tt->make_rq, dev->q->node);
|
||||
tqueue = blk_alloc_queue(dev->q->node);
|
||||
if (!tqueue) {
|
||||
ret = -ENOMEM;
|
||||
goto err_disk;
|
||||
@ -390,7 +386,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
|
||||
tdisk->flags = GENHD_FL_EXT_DEVT;
|
||||
tdisk->major = 0;
|
||||
tdisk->first_minor = 0;
|
||||
tdisk->fops = &nvm_fops;
|
||||
tdisk->fops = tt->bops;
|
||||
tdisk->queue = tqueue;
|
||||
|
||||
targetdata = tt->init(tgt_dev, tdisk, create->flags);
|
||||
|
@ -47,9 +47,9 @@ static struct pblk_global_caches pblk_caches = {
|
||||
|
||||
struct bio_set pblk_bio_set;
|
||||
|
||||
static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t pblk_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct pblk *pblk = q->queuedata;
|
||||
struct pblk *pblk = bio->bi_disk->queue->queuedata;
|
||||
|
||||
if (bio_op(bio) == REQ_OP_DISCARD) {
|
||||
pblk_discard(pblk, bio);
|
||||
@ -63,7 +63,7 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
|
||||
* constraint. Writes can be of arbitrary size.
|
||||
*/
|
||||
if (bio_data_dir(bio) == READ) {
|
||||
blk_queue_split(q, &bio);
|
||||
blk_queue_split(&bio);
|
||||
pblk_submit_read(pblk, bio);
|
||||
} else {
|
||||
/* Prevent deadlock in the case of a modest LUN configuration
|
||||
@ -71,7 +71,7 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
|
||||
* leaves at least 256KB available for user I/O.
|
||||
*/
|
||||
if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
|
||||
blk_queue_split(q, &bio);
|
||||
blk_queue_split(&bio);
|
||||
|
||||
pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
|
||||
}
|
||||
@ -79,6 +79,12 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
static const struct block_device_operations pblk_bops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = pblk_submit_bio,
|
||||
};
|
||||
|
||||
|
||||
static size_t pblk_trans_map_size(struct pblk *pblk)
|
||||
{
|
||||
int entry_size = 8;
|
||||
@ -1280,7 +1286,7 @@ static struct nvm_tgt_type tt_pblk = {
|
||||
.name = "pblk",
|
||||
.version = {1, 0, 0},
|
||||
|
||||
.make_rq = pblk_make_rq,
|
||||
.bops = &pblk_bops,
|
||||
.capacity = pblk_capacity,
|
||||
|
||||
.init = pblk_init,
|
||||
|
@ -320,7 +320,7 @@ split_retry:
|
||||
split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
|
||||
&pblk_bio_set);
|
||||
bio_chain(split_bio, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
/* New bio contains first N sectors of the previous one, so
|
||||
* we can continue to use existing rqd, but we need to shrink
|
||||
|
@ -929,7 +929,7 @@ static inline void closure_bio_submit(struct cache_set *c,
|
||||
bio_endio(bio);
|
||||
return;
|
||||
}
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -959,7 +959,7 @@ err:
|
||||
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
|
||||
* in from disk if necessary.
|
||||
*
|
||||
* If IO is necessary and running under generic_make_request, returns -EAGAIN.
|
||||
* If IO is necessary and running under submit_bio_noacct, returns -EAGAIN.
|
||||
*
|
||||
* The btree node will have either a read or a write lock held, depending on
|
||||
* level and op->lock.
|
||||
|
@ -1115,7 +1115,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
|
||||
!blk_queue_discard(bdev_get_queue(dc->bdev)))
|
||||
bio->bi_end_io(bio);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
|
||||
static void quit_max_writeback_rate(struct cache_set *c,
|
||||
@ -1158,7 +1158,7 @@ static void quit_max_writeback_rate(struct cache_set *c,
|
||||
|
||||
/* Cached devices - read & write stuff */
|
||||
|
||||
blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
|
||||
blk_qc_t cached_dev_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct search *s;
|
||||
struct bcache_device *d = bio->bi_disk->private_data;
|
||||
@ -1197,7 +1197,7 @@ blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
|
||||
if (!bio->bi_iter.bi_size) {
|
||||
/*
|
||||
* can't call bch_journal_meta from under
|
||||
* generic_make_request
|
||||
* submit_bio_noacct
|
||||
*/
|
||||
continue_at_nobarrier(&s->cl,
|
||||
cached_dev_nodata,
|
||||
@ -1228,36 +1228,8 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
|
||||
return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg);
|
||||
}
|
||||
|
||||
static int cached_dev_congested(void *data, int bits)
|
||||
{
|
||||
struct bcache_device *d = data;
|
||||
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
|
||||
struct request_queue *q = bdev_get_queue(dc->bdev);
|
||||
int ret = 0;
|
||||
|
||||
if (bdi_congested(q->backing_dev_info, bits))
|
||||
return 1;
|
||||
|
||||
if (cached_dev_get(dc)) {
|
||||
unsigned int i;
|
||||
struct cache *ca;
|
||||
|
||||
for_each_cache(ca, d->c, i) {
|
||||
q = bdev_get_queue(ca->bdev);
|
||||
ret |= bdi_congested(q->backing_dev_info, bits);
|
||||
}
|
||||
|
||||
cached_dev_put(dc);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch_cached_dev_request_init(struct cached_dev *dc)
|
||||
{
|
||||
struct gendisk *g = dc->disk.disk;
|
||||
|
||||
g->queue->backing_dev_info->congested_fn = cached_dev_congested;
|
||||
dc->disk.cache_miss = cached_dev_cache_miss;
|
||||
dc->disk.ioctl = cached_dev_ioctl;
|
||||
}
|
||||
@ -1291,7 +1263,7 @@ static void flash_dev_nodata(struct closure *cl)
|
||||
continue_at(cl, search_free, NULL);
|
||||
}
|
||||
|
||||
blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio)
|
||||
blk_qc_t flash_dev_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct search *s;
|
||||
struct closure *cl;
|
||||
@ -1311,8 +1283,7 @@ blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio)
|
||||
|
||||
if (!bio->bi_iter.bi_size) {
|
||||
/*
|
||||
* can't call bch_journal_meta from under
|
||||
* generic_make_request
|
||||
* can't call bch_journal_meta from under submit_bio_noacct
|
||||
*/
|
||||
continue_at_nobarrier(&s->cl,
|
||||
flash_dev_nodata,
|
||||
@ -1342,27 +1313,8 @@ static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode,
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
static int flash_dev_congested(void *data, int bits)
|
||||
{
|
||||
struct bcache_device *d = data;
|
||||
struct request_queue *q;
|
||||
struct cache *ca;
|
||||
unsigned int i;
|
||||
int ret = 0;
|
||||
|
||||
for_each_cache(ca, d->c, i) {
|
||||
q = bdev_get_queue(ca->bdev);
|
||||
ret |= bdi_congested(q->backing_dev_info, bits);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch_flash_dev_request_init(struct bcache_device *d)
|
||||
{
|
||||
struct gendisk *g = d->disk;
|
||||
|
||||
g->queue->backing_dev_info->congested_fn = flash_dev_congested;
|
||||
d->cache_miss = flash_dev_cache_miss;
|
||||
d->ioctl = flash_dev_ioctl;
|
||||
}
|
||||
|
@ -37,10 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c);
|
||||
void bch_data_insert(struct closure *cl);
|
||||
|
||||
void bch_cached_dev_request_init(struct cached_dev *dc);
|
||||
blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio);
|
||||
blk_qc_t cached_dev_submit_bio(struct bio *bio);
|
||||
|
||||
void bch_flash_dev_request_init(struct bcache_device *d);
|
||||
blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio);
|
||||
blk_qc_t flash_dev_submit_bio(struct bio *bio);
|
||||
|
||||
extern struct kmem_cache *bch_search_cache;
|
||||
|
||||
|
@ -680,7 +680,16 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
|
||||
return d->ioctl(d, mode, cmd, arg);
|
||||
}
|
||||
|
||||
static const struct block_device_operations bcache_ops = {
|
||||
static const struct block_device_operations bcache_cached_ops = {
|
||||
.submit_bio = cached_dev_submit_bio,
|
||||
.open = open_dev,
|
||||
.release = release_dev,
|
||||
.ioctl = ioctl_dev,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
static const struct block_device_operations bcache_flash_ops = {
|
||||
.submit_bio = flash_dev_submit_bio,
|
||||
.open = open_dev,
|
||||
.release = release_dev,
|
||||
.ioctl = ioctl_dev,
|
||||
@ -820,8 +829,8 @@ static void bcache_device_free(struct bcache_device *d)
|
||||
}
|
||||
|
||||
static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
sector_t sectors, make_request_fn make_request_fn,
|
||||
struct block_device *cached_bdev)
|
||||
sector_t sectors, struct block_device *cached_bdev,
|
||||
const struct block_device_operations *ops)
|
||||
{
|
||||
struct request_queue *q;
|
||||
const size_t max_stripes = min_t(size_t, INT_MAX,
|
||||
@ -868,16 +877,14 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
|
||||
d->disk->major = bcache_major;
|
||||
d->disk->first_minor = idx_to_first_minor(idx);
|
||||
d->disk->fops = &bcache_ops;
|
||||
d->disk->fops = ops;
|
||||
d->disk->private_data = d;
|
||||
|
||||
q = blk_alloc_queue(make_request_fn, NUMA_NO_NODE);
|
||||
q = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!q)
|
||||
return -ENOMEM;
|
||||
|
||||
d->disk->queue = q;
|
||||
q->queuedata = d;
|
||||
q->backing_dev_info->congested_data = d;
|
||||
q->limits.max_hw_sectors = UINT_MAX;
|
||||
q->limits.max_sectors = UINT_MAX;
|
||||
q->limits.max_segment_size = UINT_MAX;
|
||||
@ -1356,7 +1363,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
|
||||
|
||||
ret = bcache_device_init(&dc->disk, block_size,
|
||||
dc->bdev->bd_part->nr_sects - dc->sb.data_offset,
|
||||
cached_dev_make_request, dc->bdev);
|
||||
dc->bdev, &bcache_cached_ops);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1469,7 +1476,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
|
||||
kobject_init(&d->kobj, &bch_flash_dev_ktype);
|
||||
|
||||
if (bcache_device_init(d, block_bytes(c), u->sectors,
|
||||
flash_dev_make_request, NULL))
|
||||
NULL, &bcache_flash_ops))
|
||||
goto err;
|
||||
|
||||
bcache_device_attach(d, c, u - c->uuids);
|
||||
|
@ -421,8 +421,6 @@ struct cache {
|
||||
|
||||
struct rw_semaphore quiesce_lock;
|
||||
|
||||
struct dm_target_callbacks callbacks;
|
||||
|
||||
/*
|
||||
* origin_blocks entries, discarded if set.
|
||||
*/
|
||||
@ -886,7 +884,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
|
||||
static void accounted_request(struct cache *cache, struct bio *bio)
|
||||
{
|
||||
accounted_begin(cache, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
|
||||
static void issue_op(struct bio *bio, void *context)
|
||||
@ -1792,7 +1790,7 @@ static bool process_bio(struct cache *cache, struct bio *bio)
|
||||
bool commit_needed;
|
||||
|
||||
if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
return commit_needed;
|
||||
}
|
||||
@ -1858,7 +1856,7 @@ static bool process_discard_bio(struct cache *cache, struct bio *bio)
|
||||
|
||||
if (cache->features.discard_passdown) {
|
||||
remap_to_origin(cache, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
} else
|
||||
bio_endio(bio);
|
||||
|
||||
@ -2423,20 +2421,6 @@ static void set_cache_size(struct cache *cache, dm_cblock_t size)
|
||||
cache->cache_size = size;
|
||||
}
|
||||
|
||||
static int is_congested(struct dm_dev *dev, int bdi_bits)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||
return bdi_congested(q->backing_dev_info, bdi_bits);
|
||||
}
|
||||
|
||||
static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
|
||||
{
|
||||
struct cache *cache = container_of(cb, struct cache, callbacks);
|
||||
|
||||
return is_congested(cache->origin_dev, bdi_bits) ||
|
||||
is_congested(cache->cache_dev, bdi_bits);
|
||||
}
|
||||
|
||||
#define DEFAULT_MIGRATION_THRESHOLD 2048
|
||||
|
||||
static int cache_create(struct cache_args *ca, struct cache **result)
|
||||
@ -2471,9 +2455,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
|
||||
goto bad;
|
||||
}
|
||||
|
||||
cache->callbacks.congested_fn = cache_is_congested;
|
||||
dm_table_add_target_callbacks(ti->table, &cache->callbacks);
|
||||
|
||||
cache->metadata_dev = ca->metadata_dev;
|
||||
cache->origin_dev = ca->origin_dev;
|
||||
cache->cache_dev = ca->cache_dev;
|
||||
|
@ -68,7 +68,6 @@ struct hash_table_bucket;
|
||||
|
||||
struct clone {
|
||||
struct dm_target *ti;
|
||||
struct dm_target_callbacks callbacks;
|
||||
|
||||
struct dm_dev *metadata_dev;
|
||||
struct dm_dev *dest_dev;
|
||||
@ -330,7 +329,7 @@ static void submit_bios(struct bio_list *bios)
|
||||
blk_start_plug(&plug);
|
||||
|
||||
while ((bio = bio_list_pop(bios)))
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
@ -346,7 +345,7 @@ static void submit_bios(struct bio_list *bios)
|
||||
static void issue_bio(struct clone *clone, struct bio *bio)
|
||||
{
|
||||
if (!bio_triggers_commit(clone, bio)) {
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -473,7 +472,7 @@ static void complete_discard_bio(struct clone *clone, struct bio *bio, bool succ
|
||||
bio_region_range(clone, bio, &rs, &nr_regions);
|
||||
trim_bio(bio, region_to_sector(clone, rs),
|
||||
nr_regions << clone->region_shift);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
} else
|
||||
bio_endio(bio);
|
||||
}
|
||||
@ -865,7 +864,7 @@ static void hydration_overwrite(struct dm_clone_region_hydration *hd, struct bio
|
||||
bio->bi_private = hd;
|
||||
|
||||
atomic_inc(&hd->clone->hydrations_in_flight);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1281,7 +1280,7 @@ static void process_deferred_flush_bios(struct clone *clone)
|
||||
*/
|
||||
bio_endio(bio);
|
||||
} else {
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1518,18 +1517,6 @@ error:
|
||||
DMEMIT("Error");
|
||||
}
|
||||
|
||||
static int clone_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
|
||||
{
|
||||
struct request_queue *dest_q, *source_q;
|
||||
struct clone *clone = container_of(cb, struct clone, callbacks);
|
||||
|
||||
source_q = bdev_get_queue(clone->source_dev->bdev);
|
||||
dest_q = bdev_get_queue(clone->dest_dev->bdev);
|
||||
|
||||
return (bdi_congested(dest_q->backing_dev_info, bdi_bits) |
|
||||
bdi_congested(source_q->backing_dev_info, bdi_bits));
|
||||
}
|
||||
|
||||
static sector_t get_dev_size(struct dm_dev *dev)
|
||||
{
|
||||
return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
|
||||
@ -1930,8 +1917,6 @@ static int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||
goto out_with_mempool;
|
||||
|
||||
mutex_init(&clone->commit_lock);
|
||||
clone->callbacks.congested_fn = clone_is_congested;
|
||||
dm_table_add_target_callbacks(ti->table, &clone->callbacks);
|
||||
|
||||
/* Enable flushes */
|
||||
ti->num_flush_bios = 1;
|
||||
|
@ -1789,7 +1789,7 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
|
||||
return 1;
|
||||
}
|
||||
|
||||
generic_make_request(clone);
|
||||
submit_bio_noacct(clone);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1815,7 +1815,7 @@ static void kcryptd_io_write(struct dm_crypt_io *io)
|
||||
{
|
||||
struct bio *clone = io->ctx.bio_out;
|
||||
|
||||
generic_make_request(clone);
|
||||
submit_bio_noacct(clone);
|
||||
}
|
||||
|
||||
#define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node)
|
||||
@ -1893,7 +1893,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
|
||||
clone->bi_iter.bi_sector = cc->start + io->sector;
|
||||
|
||||
if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) {
|
||||
generic_make_request(clone);
|
||||
submit_bio_noacct(clone);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -72,7 +72,7 @@ static void flush_bios(struct bio *bio)
|
||||
while (bio) {
|
||||
n = bio->bi_next;
|
||||
bio->bi_next = NULL;
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = n;
|
||||
}
|
||||
}
|
||||
|
@ -1137,7 +1137,6 @@ static int metadata_get_stats(struct era_metadata *md, void *ptr)
|
||||
|
||||
struct era {
|
||||
struct dm_target *ti;
|
||||
struct dm_target_callbacks callbacks;
|
||||
|
||||
struct dm_dev *metadata_dev;
|
||||
struct dm_dev *origin_dev;
|
||||
@ -1265,7 +1264,7 @@ static void process_deferred_bios(struct era *era)
|
||||
bio_io_error(bio);
|
||||
else
|
||||
while ((bio = bio_list_pop(&marked_bios)))
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
|
||||
static void process_rpc_calls(struct era *era)
|
||||
@ -1375,18 +1374,6 @@ static void stop_worker(struct era *era)
|
||||
/*----------------------------------------------------------------
|
||||
* Target methods
|
||||
*--------------------------------------------------------------*/
|
||||
static int dev_is_congested(struct dm_dev *dev, int bdi_bits)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||
return bdi_congested(q->backing_dev_info, bdi_bits);
|
||||
}
|
||||
|
||||
static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
|
||||
{
|
||||
struct era *era = container_of(cb, struct era, callbacks);
|
||||
return dev_is_congested(era->origin_dev, bdi_bits);
|
||||
}
|
||||
|
||||
static void era_destroy(struct era *era)
|
||||
{
|
||||
if (era->md)
|
||||
@ -1514,8 +1501,6 @@ static int era_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||
ti->flush_supported = true;
|
||||
|
||||
ti->num_discard_bios = 1;
|
||||
era->callbacks.congested_fn = era_is_congested;
|
||||
dm_table_add_target_callbacks(ti->table, &era->callbacks);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2115,12 +2115,12 @@ offload_to_thread:
|
||||
dio->in_flight = (atomic_t)ATOMIC_INIT(1);
|
||||
dio->completion = NULL;
|
||||
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
if (need_sync_io) {
|
||||
wait_for_completion_io(&read_comp);
|
||||
|
@ -677,7 +677,7 @@ static void process_queued_bios(struct work_struct *work)
|
||||
bio_endio(bio);
|
||||
break;
|
||||
case DM_MAPIO_REMAPPED:
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
break;
|
||||
case DM_MAPIO_SUBMITTED:
|
||||
break;
|
||||
|
@ -242,7 +242,6 @@ struct raid_set {
|
||||
|
||||
struct mddev md;
|
||||
struct raid_type *raid_type;
|
||||
struct dm_target_callbacks callbacks;
|
||||
|
||||
sector_t array_sectors;
|
||||
sector_t dev_sectors;
|
||||
@ -1705,13 +1704,6 @@ static void do_table_event(struct work_struct *ws)
|
||||
dm_table_event(rs->ti->table);
|
||||
}
|
||||
|
||||
static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
|
||||
{
|
||||
struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
|
||||
|
||||
return mddev_congested(&rs->md, bits);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure a valid takover (level switch) is being requested on @rs
|
||||
*
|
||||
@ -3248,9 +3240,6 @@ size_check:
|
||||
goto bad_md_start;
|
||||
}
|
||||
|
||||
rs->callbacks.congested_fn = raid_is_congested;
|
||||
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
|
||||
|
||||
/* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
|
||||
if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
|
||||
r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
|
||||
@ -3310,7 +3299,6 @@ static void raid_dtr(struct dm_target *ti)
|
||||
{
|
||||
struct raid_set *rs = ti->private;
|
||||
|
||||
list_del_init(&rs->callbacks.list);
|
||||
md_stop(&rs->md);
|
||||
raid_set_free(rs);
|
||||
}
|
||||
|
@ -779,7 +779,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
|
||||
wakeup_mirrord(ms);
|
||||
} else {
|
||||
map_bio(get_default_mirror(ms), bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -284,7 +284,8 @@ static void dm_complete_request(struct request *rq, blk_status_t error)
|
||||
struct dm_rq_target_io *tio = tio_from_request(rq);
|
||||
|
||||
tio->error = error;
|
||||
blk_mq_complete_request(rq);
|
||||
if (likely(!blk_should_fake_timeout(rq->q)))
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -252,7 +252,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op,
|
||||
|
||||
/*
|
||||
* Issue the synchronous I/O from a different thread
|
||||
* to avoid generic_make_request recursion.
|
||||
* to avoid submit_bio_noacct recursion.
|
||||
*/
|
||||
INIT_WORK_ONSTACK(&req.work, do_metadata);
|
||||
queue_work(ps->metadata_wq, &req.work);
|
||||
|
@ -1568,7 +1568,7 @@ static void flush_bios(struct bio *bio)
|
||||
while (bio) {
|
||||
n = bio->bi_next;
|
||||
bio->bi_next = NULL;
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = n;
|
||||
}
|
||||
}
|
||||
@ -1588,7 +1588,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
|
||||
bio->bi_next = NULL;
|
||||
r = do_origin(s->origin, bio, false);
|
||||
if (r == DM_MAPIO_REMAPPED)
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = n;
|
||||
}
|
||||
}
|
||||
@ -1829,7 +1829,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
|
||||
bio->bi_end_io = full_bio_end_io;
|
||||
bio->bi_private = callback_data;
|
||||
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
|
||||
static struct dm_snap_pending_exception *
|
||||
|
@ -64,8 +64,6 @@ struct dm_table {
|
||||
void *event_context;
|
||||
|
||||
struct dm_md_mempools *mempools;
|
||||
|
||||
struct list_head target_callbacks;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -190,7 +188,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&t->devices);
|
||||
INIT_LIST_HEAD(&t->target_callbacks);
|
||||
|
||||
if (!num_targets)
|
||||
num_targets = KEYS_PER_NODE;
|
||||
@ -361,7 +358,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
|
||||
* This upgrades the mode on an already open dm_dev, being
|
||||
* careful to leave things as they were if we fail to reopen the
|
||||
* device and not to touch the existing bdev field in case
|
||||
* it is accessed concurrently inside dm_table_any_congested().
|
||||
* it is accessed concurrently.
|
||||
*/
|
||||
static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
|
||||
struct mapped_device *md)
|
||||
@ -2052,38 +2049,6 @@ int dm_table_resume_targets(struct dm_table *t)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb)
|
||||
{
|
||||
list_add(&cb->list, &t->target_callbacks);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks);
|
||||
|
||||
int dm_table_any_congested(struct dm_table *t, int bdi_bits)
|
||||
{
|
||||
struct dm_dev_internal *dd;
|
||||
struct list_head *devices = dm_table_get_devices(t);
|
||||
struct dm_target_callbacks *cb;
|
||||
int r = 0;
|
||||
|
||||
list_for_each_entry(dd, devices, list) {
|
||||
struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
if (likely(q))
|
||||
r |= bdi_congested(q->backing_dev_info, bdi_bits);
|
||||
else
|
||||
DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
|
||||
dm_device_name(t->md),
|
||||
bdevname(dd->dm_dev->bdev, b));
|
||||
}
|
||||
|
||||
list_for_each_entry(cb, &t->target_callbacks, list)
|
||||
if (cb->congested_fn)
|
||||
r |= cb->congested_fn(cb, bdi_bits);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
struct mapped_device *dm_table_get_md(struct dm_table *t)
|
||||
{
|
||||
return t->md;
|
||||
|
@ -326,7 +326,6 @@ struct pool_c {
|
||||
struct pool *pool;
|
||||
struct dm_dev *data_dev;
|
||||
struct dm_dev *metadata_dev;
|
||||
struct dm_target_callbacks callbacks;
|
||||
|
||||
dm_block_t low_water_blocks;
|
||||
struct pool_features requested_pf; /* Features requested during table load */
|
||||
@ -758,7 +757,7 @@ static void issue(struct thin_c *tc, struct bio *bio)
|
||||
struct pool *pool = tc->pool;
|
||||
|
||||
if (!bio_triggers_commit(tc, bio)) {
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2394,7 +2393,7 @@ static void process_deferred_bios(struct pool *pool)
|
||||
if (bio->bi_opf & REQ_PREFLUSH)
|
||||
bio_endio(bio);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2796,18 +2795,6 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
|
||||
{
|
||||
struct pool_c *pt = container_of(cb, struct pool_c, callbacks);
|
||||
struct request_queue *q;
|
||||
|
||||
if (get_pool_mode(pt->pool) == PM_OUT_OF_DATA_SPACE)
|
||||
return 1;
|
||||
|
||||
q = bdev_get_queue(pt->data_dev->bdev);
|
||||
return bdi_congested(q->backing_dev_info, bdi_bits);
|
||||
}
|
||||
|
||||
static void requeue_bios(struct pool *pool)
|
||||
{
|
||||
struct thin_c *tc;
|
||||
@ -3420,9 +3407,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||
dm_pool_register_pre_commit_callback(pool->pmd,
|
||||
metadata_pre_commit_callback, pool);
|
||||
|
||||
pt->callbacks.congested_fn = pool_is_congested;
|
||||
dm_table_add_target_callbacks(ti->table, &pt->callbacks);
|
||||
|
||||
mutex_unlock(&dm_thin_pool_table.mutex);
|
||||
|
||||
return 0;
|
||||
|
@ -681,7 +681,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
|
||||
|
||||
verity_submit_prefetch(v, io);
|
||||
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
return DM_MAPIO_SUBMITTED;
|
||||
}
|
||||
|
@ -1244,7 +1244,7 @@ static int writecache_flush_thread(void *data)
|
||||
bio_end_sector(bio));
|
||||
wc_unlock(wc);
|
||||
bio_set_dev(bio, wc->dev->bdev);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
} else {
|
||||
writecache_flush(wc);
|
||||
wc_unlock(wc);
|
||||
|
@ -140,7 +140,7 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
|
||||
bio_advance(bio, clone->bi_iter.bi_size);
|
||||
|
||||
refcount_inc(&bioctx->ref);
|
||||
generic_make_request(clone);
|
||||
submit_bio_noacct(clone);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
|
||||
zone->wp_block += nr_blocks;
|
||||
|
@ -1273,7 +1273,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
|
||||
sector_t sector;
|
||||
struct bio *clone = &tio->clone;
|
||||
struct dm_io *io = tio->io;
|
||||
struct mapped_device *md = io->md;
|
||||
struct dm_target *ti = tio->ti;
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
|
||||
@ -1295,10 +1294,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
|
||||
/* the bio has been remapped so dispatch it */
|
||||
trace_block_bio_remap(clone->bi_disk->queue, clone,
|
||||
bio_dev(io->orig_bio), sector);
|
||||
if (md->type == DM_TYPE_NVME_BIO_BASED)
|
||||
ret = direct_make_request(clone);
|
||||
else
|
||||
ret = generic_make_request(clone);
|
||||
ret = submit_bio_noacct(clone);
|
||||
break;
|
||||
case DM_MAPIO_KILL:
|
||||
free_tio(tio);
|
||||
@ -1645,7 +1641,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
|
||||
error = __split_and_process_non_flush(&ci);
|
||||
if (current->bio_list && ci.sector_count && !error) {
|
||||
/*
|
||||
* Remainder must be passed to generic_make_request()
|
||||
* Remainder must be passed to submit_bio_noacct()
|
||||
* so that it gets handled *after* bios already submitted
|
||||
* have been completely processed.
|
||||
* We take a clone of the original to store in
|
||||
@ -1670,7 +1666,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
|
||||
|
||||
bio_chain(b, bio);
|
||||
trace_block_split(md->queue, b, bio->bi_iter.bi_sector);
|
||||
ret = generic_make_request(bio);
|
||||
ret = submit_bio_noacct(bio);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1738,7 +1734,7 @@ static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struc
|
||||
|
||||
bio_chain(split, *bio);
|
||||
trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector);
|
||||
generic_make_request(*bio);
|
||||
submit_bio_noacct(*bio);
|
||||
*bio = split;
|
||||
}
|
||||
}
|
||||
@ -1763,13 +1759,13 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
|
||||
}
|
||||
|
||||
/*
|
||||
* If in ->make_request_fn we need to use blk_queue_split(), otherwise
|
||||
* If in ->queue_bio we need to use blk_queue_split(), otherwise
|
||||
* queue_limits for abnormal requests (e.g. discard, writesame, etc)
|
||||
* won't be imposed.
|
||||
*/
|
||||
if (current->bio_list) {
|
||||
if (is_abnormal_io(bio))
|
||||
blk_queue_split(md->queue, &bio);
|
||||
blk_queue_split(&bio);
|
||||
else
|
||||
dm_queue_split(md, ti, &bio);
|
||||
}
|
||||
@ -1780,9 +1776,9 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
|
||||
return __split_and_process_bio(md, map, bio);
|
||||
}
|
||||
|
||||
static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t dm_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct mapped_device *md = q->queuedata;
|
||||
struct mapped_device *md = bio->bi_disk->private_data;
|
||||
blk_qc_t ret = BLK_QC_T_NONE;
|
||||
int srcu_idx;
|
||||
struct dm_table *map;
|
||||
@ -1791,12 +1787,12 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
|
||||
/*
|
||||
* We are called with a live reference on q_usage_counter, but
|
||||
* that one will be released as soon as we return. Grab an
|
||||
* extra one as blk_mq_make_request expects to be able to
|
||||
* consume a reference (which lives until the request is freed
|
||||
* in case a request is allocated).
|
||||
* extra one as blk_mq_submit_bio expects to be able to consume
|
||||
* a reference (which lives until the request is freed in case a
|
||||
* request is allocated).
|
||||
*/
|
||||
percpu_ref_get(&q->q_usage_counter);
|
||||
return blk_mq_make_request(q, bio);
|
||||
percpu_ref_get(&bio->bi_disk->queue->q_usage_counter);
|
||||
return blk_mq_submit_bio(bio);
|
||||
}
|
||||
|
||||
map = dm_get_live_table(md, &srcu_idx);
|
||||
@ -1818,31 +1814,6 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dm_any_congested(void *congested_data, int bdi_bits)
|
||||
{
|
||||
int r = bdi_bits;
|
||||
struct mapped_device *md = congested_data;
|
||||
struct dm_table *map;
|
||||
|
||||
if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
|
||||
if (dm_request_based(md)) {
|
||||
/*
|
||||
* With request-based DM we only need to check the
|
||||
* top-level queue for congestion.
|
||||
*/
|
||||
struct backing_dev_info *bdi = md->queue->backing_dev_info;
|
||||
r = bdi->wb.congested->state & bdi_bits;
|
||||
} else {
|
||||
map = dm_get_live_table_fast(md);
|
||||
if (map)
|
||||
r = dm_table_any_congested(map, bdi_bits);
|
||||
dm_put_live_table_fast(md);
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* An IDR is used to keep track of allocated minor numbers.
|
||||
*---------------------------------------------------------------*/
|
||||
@ -1981,14 +1952,13 @@ static struct mapped_device *alloc_dev(int minor)
|
||||
spin_lock_init(&md->uevent_lock);
|
||||
|
||||
/*
|
||||
* default to bio-based required ->make_request_fn until DM
|
||||
* table is loaded and md->type established. If request-based
|
||||
* table is loaded: blk-mq will override accordingly.
|
||||
* default to bio-based until DM table is loaded and md->type
|
||||
* established. If request-based table is loaded: blk-mq will
|
||||
* override accordingly.
|
||||
*/
|
||||
md->queue = blk_alloc_queue(dm_make_request, numa_node_id);
|
||||
md->queue = blk_alloc_queue(numa_node_id);
|
||||
if (!md->queue)
|
||||
goto bad;
|
||||
md->queue->queuedata = md;
|
||||
|
||||
md->disk = alloc_disk_node(1, md->numa_node_id);
|
||||
if (!md->disk)
|
||||
@ -2282,12 +2252,6 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
|
||||
|
||||
static void dm_init_congested_fn(struct mapped_device *md)
|
||||
{
|
||||
md->queue->backing_dev_info->congested_data = md;
|
||||
md->queue->backing_dev_info->congested_fn = dm_any_congested;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup the DM device's queue based on md's type
|
||||
*/
|
||||
@ -2304,12 +2268,10 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
|
||||
DMERR("Cannot initialize queue for request-based dm-mq mapped device");
|
||||
return r;
|
||||
}
|
||||
dm_init_congested_fn(md);
|
||||
break;
|
||||
case DM_TYPE_BIO_BASED:
|
||||
case DM_TYPE_DAX_BIO_BASED:
|
||||
case DM_TYPE_NVME_BIO_BASED:
|
||||
dm_init_congested_fn(md);
|
||||
break;
|
||||
case DM_TYPE_NONE:
|
||||
WARN_ON_ONCE(true);
|
||||
@ -2531,7 +2493,7 @@ static void dm_wq_work(struct work_struct *work)
|
||||
break;
|
||||
|
||||
if (dm_request_based(md))
|
||||
(void) generic_make_request(c);
|
||||
(void) submit_bio_noacct(c);
|
||||
else
|
||||
(void) dm_process_bio(md, map, c);
|
||||
}
|
||||
@ -3286,6 +3248,7 @@ static const struct pr_ops dm_pr_ops = {
|
||||
};
|
||||
|
||||
static const struct block_device_operations dm_blk_dops = {
|
||||
.submit_bio = dm_submit_bio,
|
||||
.open = dm_blk_open,
|
||||
.release = dm_blk_close,
|
||||
.ioctl = dm_blk_ioctl,
|
||||
|
@ -63,7 +63,6 @@ void dm_table_presuspend_targets(struct dm_table *t);
|
||||
void dm_table_presuspend_undo_targets(struct dm_table *t);
|
||||
void dm_table_postsuspend_targets(struct dm_table *t);
|
||||
int dm_table_resume_targets(struct dm_table *t);
|
||||
int dm_table_any_congested(struct dm_table *t, int bdi_bits);
|
||||
enum dm_queue_mode dm_table_get_type(struct dm_table *t);
|
||||
struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
|
||||
struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
|
||||
|
@ -169,7 +169,7 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
|
||||
if (bio_data_dir(bio) == WRITE) {
|
||||
/* write request */
|
||||
if (atomic_read(&conf->counters[WriteAll])) {
|
||||
/* special case - don't decrement, don't generic_make_request,
|
||||
/* special case - don't decrement, don't submit_bio_noacct,
|
||||
* just fail immediately
|
||||
*/
|
||||
bio_io_error(bio);
|
||||
@ -214,7 +214,7 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
|
||||
} else
|
||||
bio_set_dev(bio, conf->rdev->bdev);
|
||||
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -46,29 +46,6 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
|
||||
return conf->disks + lo;
|
||||
}
|
||||
|
||||
/*
|
||||
* In linear_congested() conf->raid_disks is used as a copy of
|
||||
* mddev->raid_disks to iterate conf->disks[], because conf->raid_disks
|
||||
* and conf->disks[] are created in linear_conf(), they are always
|
||||
* consitent with each other, but mddev->raid_disks does not.
|
||||
*/
|
||||
static int linear_congested(struct mddev *mddev, int bits)
|
||||
{
|
||||
struct linear_conf *conf;
|
||||
int i, ret = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
conf = rcu_dereference(mddev->private);
|
||||
|
||||
for (i = 0; i < conf->raid_disks && !ret ; i++) {
|
||||
struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
|
||||
ret |= bdi_congested(q->backing_dev_info, bits);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks)
|
||||
{
|
||||
struct linear_conf *conf;
|
||||
@ -267,7 +244,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
struct bio *split = bio_split(bio, end_sector - bio_sector,
|
||||
GFP_NOIO, &mddev->bio_set);
|
||||
bio_chain(split, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = split;
|
||||
}
|
||||
|
||||
@ -286,7 +263,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
bio_sector);
|
||||
mddev_check_writesame(mddev, bio);
|
||||
mddev_check_write_zeroes(mddev, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -322,7 +299,6 @@ static struct md_personality linear_personality =
|
||||
.hot_add_disk = linear_add,
|
||||
.size = linear_size,
|
||||
.quiesce = linear_quiesce,
|
||||
.congested = linear_congested,
|
||||
};
|
||||
|
||||
static int __init linear_init (void)
|
||||
|
@ -131,7 +131,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
|
||||
mp_bh->bio.bi_private = mp_bh;
|
||||
mddev_check_writesame(mddev, &mp_bh->bio);
|
||||
mddev_check_write_zeroes(mddev, &mp_bh->bio);
|
||||
generic_make_request(&mp_bh->bio);
|
||||
submit_bio_noacct(&mp_bh->bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -151,28 +151,6 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
|
||||
seq_putc(seq, ']');
|
||||
}
|
||||
|
||||
static int multipath_congested(struct mddev *mddev, int bits)
|
||||
{
|
||||
struct mpconf *conf = mddev->private;
|
||||
int i, ret = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < mddev->raid_disks ; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
|
||||
if (rdev && !test_bit(Faulty, &rdev->flags)) {
|
||||
struct request_queue *q = bdev_get_queue(rdev->bdev);
|
||||
|
||||
ret |= bdi_congested(q->backing_dev_info, bits);
|
||||
/* Just like multipath_map, we just check the
|
||||
* first available device
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Careful, this can execute in IRQ contexts as well!
|
||||
*/
|
||||
@ -348,7 +326,7 @@ static void multipathd(struct md_thread *thread)
|
||||
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
|
||||
bio->bi_end_io = multipath_end_request;
|
||||
bio->bi_private = mp_bh;
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
@ -478,7 +456,6 @@ static struct md_personality multipath_personality =
|
||||
.hot_add_disk = multipath_add_disk,
|
||||
.hot_remove_disk= multipath_remove_disk,
|
||||
.size = multipath_size,
|
||||
.congested = multipath_congested,
|
||||
};
|
||||
|
||||
static int __init multipath_init (void)
|
||||
|
@ -199,7 +199,7 @@ static int rdevs_init_serial(struct mddev *mddev)
|
||||
static int rdev_need_serial(struct md_rdev *rdev)
|
||||
{
|
||||
return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
|
||||
rdev->bdev->bd_queue->nr_hw_queues != 1 &&
|
||||
rdev->bdev->bd_disk->queue->nr_hw_queues != 1 &&
|
||||
test_bit(WriteMostly, &rdev->flags));
|
||||
}
|
||||
|
||||
@ -463,7 +463,7 @@ check_suspended:
|
||||
}
|
||||
EXPORT_SYMBOL(md_handle_request);
|
||||
|
||||
static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t md_submit_bio(struct bio *bio)
|
||||
{
|
||||
const int rw = bio_data_dir(bio);
|
||||
const int sgrp = op_stat_group(bio_op(bio));
|
||||
@ -475,7 +475,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
blk_queue_split(q, &bio);
|
||||
blk_queue_split(&bio);
|
||||
|
||||
if (mddev == NULL || mddev->pers == NULL) {
|
||||
bio_io_error(bio);
|
||||
@ -549,26 +549,6 @@ void mddev_resume(struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_resume);
|
||||
|
||||
int mddev_congested(struct mddev *mddev, int bits)
|
||||
{
|
||||
struct md_personality *pers = mddev->pers;
|
||||
int ret = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
if (mddev->suspended)
|
||||
ret = 1;
|
||||
else if (pers && pers->congested)
|
||||
ret = pers->congested(mddev, bits);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_congested);
|
||||
static int md_congested(void *data, int bits)
|
||||
{
|
||||
struct mddev *mddev = data;
|
||||
return mddev_congested(mddev, bits);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generic flush handling for md
|
||||
*/
|
||||
@ -5641,7 +5621,7 @@ static int md_alloc(dev_t dev, char *name)
|
||||
mddev->hold_active = UNTIL_STOP;
|
||||
|
||||
error = -ENOMEM;
|
||||
mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
|
||||
mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!mddev->queue)
|
||||
goto abort;
|
||||
|
||||
@ -5670,6 +5650,7 @@ static int md_alloc(dev_t dev, char *name)
|
||||
* remove it now.
|
||||
*/
|
||||
disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
disk->events |= DISK_EVENT_MEDIA_CHANGE;
|
||||
mddev->gendisk = disk;
|
||||
/* As soon as we call add_disk(), another thread could get
|
||||
* through to md_open, so make sure it doesn't get too far
|
||||
@ -5964,8 +5945,6 @@ int md_run(struct mddev *mddev)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
|
||||
mddev->queue->backing_dev_info->congested_data = mddev;
|
||||
mddev->queue->backing_dev_info->congested_fn = md_congested;
|
||||
}
|
||||
if (pers->sync_request) {
|
||||
if (mddev->kobj.sd &&
|
||||
@ -6350,7 +6329,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
|
||||
|
||||
__md_stop_writes(mddev);
|
||||
__md_stop(mddev);
|
||||
mddev->queue->backing_dev_info->congested_fn = NULL;
|
||||
|
||||
/* tell userspace to handle 'inactive' */
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
@ -7806,23 +7784,21 @@ static void md_release(struct gendisk *disk, fmode_t mode)
|
||||
mddev_put(mddev);
|
||||
}
|
||||
|
||||
static int md_media_changed(struct gendisk *disk)
|
||||
{
|
||||
struct mddev *mddev = disk->private_data;
|
||||
|
||||
return mddev->changed;
|
||||
}
|
||||
|
||||
static int md_revalidate(struct gendisk *disk)
|
||||
static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing)
|
||||
{
|
||||
struct mddev *mddev = disk->private_data;
|
||||
unsigned int ret = 0;
|
||||
|
||||
if (mddev->changed)
|
||||
ret = DISK_EVENT_MEDIA_CHANGE;
|
||||
mddev->changed = 0;
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct block_device_operations md_fops =
|
||||
{
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = md_submit_bio,
|
||||
.open = md_open,
|
||||
.release = md_release,
|
||||
.ioctl = md_ioctl,
|
||||
@ -7830,8 +7806,7 @@ static const struct block_device_operations md_fops =
|
||||
.compat_ioctl = md_compat_ioctl,
|
||||
#endif
|
||||
.getgeo = md_getgeo,
|
||||
.media_changed = md_media_changed,
|
||||
.revalidate_disk= md_revalidate,
|
||||
.check_events = md_check_events,
|
||||
};
|
||||
|
||||
static int md_thread(void *arg)
|
||||
|
@ -597,9 +597,6 @@ struct md_personality
|
||||
* array.
|
||||
*/
|
||||
void *(*takeover) (struct mddev *mddev);
|
||||
/* congested implements bdi.congested_fn().
|
||||
* Will not be called while array is 'suspended' */
|
||||
int (*congested)(struct mddev *mddev, int bits);
|
||||
/* Changes the consistency policy of an active array. */
|
||||
int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
|
||||
};
|
||||
@ -710,7 +707,6 @@ extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
|
||||
extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
|
||||
extern void md_finish_reshape(struct mddev *mddev);
|
||||
|
||||
extern int mddev_congested(struct mddev *mddev, int bits);
|
||||
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
|
||||
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
|
||||
sector_t sector, int size, struct page *page);
|
||||
|
@ -29,21 +29,6 @@ module_param(default_layout, int, 0644);
|
||||
(1L << MD_HAS_PPL) | \
|
||||
(1L << MD_HAS_MULTIPLE_PPLS))
|
||||
|
||||
static int raid0_congested(struct mddev *mddev, int bits)
|
||||
{
|
||||
struct r0conf *conf = mddev->private;
|
||||
struct md_rdev **devlist = conf->devlist;
|
||||
int raid_disks = conf->strip_zone[0].nb_dev;
|
||||
int i, ret = 0;
|
||||
|
||||
for (i = 0; i < raid_disks && !ret ; i++) {
|
||||
struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
|
||||
|
||||
ret |= bdi_congested(q->backing_dev_info, bits);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* inform the user of the raid configuration
|
||||
*/
|
||||
@ -495,7 +480,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||
zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
|
||||
&mddev->bio_set);
|
||||
bio_chain(split, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = split;
|
||||
end = zone->zone_end;
|
||||
} else
|
||||
@ -559,7 +544,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||
trace_block_bio_remap(bdev_get_queue(rdev->bdev),
|
||||
discard_bio, disk_devt(mddev->gendisk),
|
||||
bio->bi_iter.bi_sector);
|
||||
generic_make_request(discard_bio);
|
||||
submit_bio_noacct(discard_bio);
|
||||
}
|
||||
bio_endio(bio);
|
||||
}
|
||||
@ -600,7 +585,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||
struct bio *split = bio_split(bio, sectors, GFP_NOIO,
|
||||
&mddev->bio_set);
|
||||
bio_chain(split, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = split;
|
||||
}
|
||||
|
||||
@ -633,7 +618,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||
disk_devt(mddev->gendisk), bio_sector);
|
||||
mddev_check_writesame(mddev, bio);
|
||||
mddev_check_write_zeroes(mddev, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -818,7 +803,6 @@ static struct md_personality raid0_personality=
|
||||
.size = raid0_size,
|
||||
.takeover = raid0_takeover,
|
||||
.quiesce = raid0_quiesce,
|
||||
.congested = raid0_congested,
|
||||
};
|
||||
|
||||
static int __init raid0_init (void)
|
||||
|
@ -786,36 +786,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||
return best_disk;
|
||||
}
|
||||
|
||||
static int raid1_congested(struct mddev *mddev, int bits)
|
||||
{
|
||||
struct r1conf *conf = mddev->private;
|
||||
int i, ret = 0;
|
||||
|
||||
if ((bits & (1 << WB_async_congested)) &&
|
||||
conf->pending_count >= max_queued_requests)
|
||||
return 1;
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks * 2; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
if (rdev && !test_bit(Faulty, &rdev->flags)) {
|
||||
struct request_queue *q = bdev_get_queue(rdev->bdev);
|
||||
|
||||
BUG_ON(!q);
|
||||
|
||||
/* Note the '|| 1' - when read_balance prefers
|
||||
* non-congested targets, it can be removed
|
||||
*/
|
||||
if ((bits & (1 << WB_async_congested)) || 1)
|
||||
ret |= bdi_congested(q->backing_dev_info, bits);
|
||||
else
|
||||
ret &= bdi_congested(q->backing_dev_info, bits);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void flush_bio_list(struct r1conf *conf, struct bio *bio)
|
||||
{
|
||||
/* flush any pending bitmap writes to disk before proceeding w/ I/O */
|
||||
@ -834,7 +804,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = next;
|
||||
cond_resched();
|
||||
}
|
||||
@ -1312,7 +1282,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
struct bio *split = bio_split(bio, max_sectors,
|
||||
gfp, &conf->bio_split);
|
||||
bio_chain(split, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = split;
|
||||
r1_bio->master_bio = bio;
|
||||
r1_bio->sectors = max_sectors;
|
||||
@ -1338,7 +1308,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
trace_block_bio_remap(read_bio->bi_disk->queue, read_bio,
|
||||
disk_devt(mddev->gendisk), r1_bio->sector);
|
||||
|
||||
generic_make_request(read_bio);
|
||||
submit_bio_noacct(read_bio);
|
||||
}
|
||||
|
||||
static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
@ -1483,7 +1453,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
struct bio *split = bio_split(bio, max_sectors,
|
||||
GFP_NOIO, &conf->bio_split);
|
||||
bio_chain(split, bio);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = split;
|
||||
r1_bio->master_bio = bio;
|
||||
r1_bio->sectors = max_sectors;
|
||||
@ -2240,7 +2210,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
|
||||
atomic_inc(&r1_bio->remaining);
|
||||
md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
|
||||
|
||||
generic_make_request(wbio);
|
||||
submit_bio_noacct(wbio);
|
||||
}
|
||||
|
||||
put_sync_write_buf(r1_bio, 1);
|
||||
@ -2926,7 +2896,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
md_sync_acct_bio(bio, nr_sectors);
|
||||
if (read_targets == 1)
|
||||
bio->bi_opf &= ~MD_FAILFAST;
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -2935,7 +2905,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
md_sync_acct_bio(bio, nr_sectors);
|
||||
if (read_targets == 1)
|
||||
bio->bi_opf &= ~MD_FAILFAST;
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
return nr_sectors;
|
||||
}
|
||||
@ -3396,7 +3366,6 @@ static struct md_personality raid1_personality =
|
||||
.check_reshape = raid1_reshape,
|
||||
.quiesce = raid1_quiesce,
|
||||
.takeover = raid1_takeover,
|
||||
.congested = raid1_congested,
|
||||
};
|
||||
|
||||
static int __init raid_init(void)
|
||||
|
@ -848,31 +848,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
|
||||
return rdev;
|
||||
}
|
||||
|
||||
static int raid10_congested(struct mddev *mddev, int bits)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
int i, ret = 0;
|
||||
|
||||
if ((bits & (1 << WB_async_congested)) &&
|
||||
conf->pending_count >= max_queued_requests)
|
||||
return 1;
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = 0;
|
||||
(i < conf->geo.raid_disks || i < conf->prev.raid_disks)
|
||||
&& ret == 0;
|
||||
i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
if (rdev && !test_bit(Faulty, &rdev->flags)) {
|
||||
struct request_queue *q = bdev_get_queue(rdev->bdev);
|
||||
|
||||
ret |= bdi_congested(q->backing_dev_info, bits);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void flush_pending_writes(struct r10conf *conf)
|
||||
{
|
||||
/* Any writes that have been queued but are awaiting
|
||||
@ -917,7 +892,7 @@ static void flush_pending_writes(struct r10conf *conf)
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = next;
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
@ -1102,7 +1077,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
/* Just ignore it */
|
||||
bio_endio(bio);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
bio = next;
|
||||
}
|
||||
kfree(plug);
|
||||
@ -1194,7 +1169,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
gfp, &conf->bio_split);
|
||||
bio_chain(split, bio);
|
||||
allow_barrier(conf);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
wait_barrier(conf);
|
||||
bio = split;
|
||||
r10_bio->master_bio = bio;
|
||||
@ -1221,7 +1196,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
trace_block_bio_remap(read_bio->bi_disk->queue,
|
||||
read_bio, disk_devt(mddev->gendisk),
|
||||
r10_bio->sector);
|
||||
generic_make_request(read_bio);
|
||||
submit_bio_noacct(read_bio);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1479,7 +1454,7 @@ retry_write:
|
||||
GFP_NOIO, &conf->bio_split);
|
||||
bio_chain(split, bio);
|
||||
allow_barrier(conf);
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
wait_barrier(conf);
|
||||
bio = split;
|
||||
r10_bio->master_bio = bio;
|
||||
@ -2099,7 +2074,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
tbio->bi_opf |= MD_FAILFAST;
|
||||
tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
|
||||
bio_set_dev(tbio, conf->mirrors[d].rdev->bdev);
|
||||
generic_make_request(tbio);
|
||||
submit_bio_noacct(tbio);
|
||||
}
|
||||
|
||||
/* Now write out to any replacement devices
|
||||
@ -2118,7 +2093,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
md_sync_acct(conf->mirrors[d].replacement->bdev,
|
||||
bio_sectors(tbio));
|
||||
generic_make_request(tbio);
|
||||
submit_bio_noacct(tbio);
|
||||
}
|
||||
|
||||
done:
|
||||
@ -2241,7 +2216,7 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
wbio = r10_bio->devs[1].bio;
|
||||
wbio2 = r10_bio->devs[1].repl_bio;
|
||||
/* Need to test wbio2->bi_end_io before we call
|
||||
* generic_make_request as if the former is NULL,
|
||||
* submit_bio_noacct as if the former is NULL,
|
||||
* the latter is free to free wbio2.
|
||||
*/
|
||||
if (wbio2 && !wbio2->bi_end_io)
|
||||
@ -2249,13 +2224,13 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
if (wbio->bi_end_io) {
|
||||
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
||||
md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
|
||||
generic_make_request(wbio);
|
||||
submit_bio_noacct(wbio);
|
||||
}
|
||||
if (wbio2) {
|
||||
atomic_inc(&conf->mirrors[d].replacement->nr_pending);
|
||||
md_sync_acct(conf->mirrors[d].replacement->bdev,
|
||||
bio_sectors(wbio2));
|
||||
generic_make_request(wbio2);
|
||||
submit_bio_noacct(wbio2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2889,7 +2864,7 @@ static void raid10_set_cluster_sync_high(struct r10conf *conf)
|
||||
* a number of r10_bio structures, one for each out-of-sync device.
|
||||
* As we setup these structures, we collect all bio's together into a list
|
||||
* which we then process collectively to add pages, and then process again
|
||||
* to pass to generic_make_request.
|
||||
* to pass to submit_bio_noacct.
|
||||
*
|
||||
* The r10_bio structures are linked using a borrowed master_bio pointer.
|
||||
* This link is counted in ->remaining. When the r10_bio that points to NULL
|
||||
@ -3496,7 +3471,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
if (bio->bi_end_io == end_sync_read) {
|
||||
md_sync_acct_bio(bio, nr_sectors);
|
||||
bio->bi_status = 0;
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4654,7 +4629,7 @@ read_more:
|
||||
md_sync_acct_bio(read_bio, r10_bio->sectors);
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
read_bio->bi_next = NULL;
|
||||
generic_make_request(read_bio);
|
||||
submit_bio_noacct(read_bio);
|
||||
sectors_done += nr_sectors;
|
||||
if (sector_nr <= last)
|
||||
goto read_more;
|
||||
@ -4717,7 +4692,7 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
md_sync_acct_bio(b, r10_bio->sectors);
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
b->bi_next = NULL;
|
||||
generic_make_request(b);
|
||||
submit_bio_noacct(b);
|
||||
}
|
||||
end_reshape_request(r10_bio);
|
||||
}
|
||||
@ -4929,7 +4904,6 @@ static struct md_personality raid10_personality =
|
||||
.start_reshape = raid10_start_reshape,
|
||||
.finish_reshape = raid10_finish_reshape,
|
||||
.update_reshape_pos = raid10_update_reshape_pos,
|
||||
.congested = raid10_congested,
|
||||
};
|
||||
|
||||
static int __init raid_init(void)
|
||||
|
@ -873,7 +873,7 @@ static void dispatch_bio_list(struct bio_list *tmp)
|
||||
struct bio *bio;
|
||||
|
||||
while ((bio = bio_list_pop(tmp)))
|
||||
generic_make_request(bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
|
||||
static int cmp_stripe(void *priv, struct list_head *a, struct list_head *b)
|
||||
@ -1151,7 +1151,7 @@ again:
|
||||
if (should_defer && op_is_write(op))
|
||||
bio_list_add(&pending_bios, bi);
|
||||
else
|
||||
generic_make_request(bi);
|
||||
submit_bio_noacct(bi);
|
||||
}
|
||||
if (rrdev) {
|
||||
if (s->syncing || s->expanding || s->expanded
|
||||
@ -1201,7 +1201,7 @@ again:
|
||||
if (should_defer && op_is_write(op))
|
||||
bio_list_add(&pending_bios, rbi);
|
||||
else
|
||||
generic_make_request(rbi);
|
||||
submit_bio_noacct(rbi);
|
||||
}
|
||||
if (!rdev && !rrdev) {
|
||||
if (op_is_write(op))
|
||||
@ -5099,28 +5099,6 @@ static void activate_bit_delay(struct r5conf *conf,
|
||||
}
|
||||
}
|
||||
|
||||
static int raid5_congested(struct mddev *mddev, int bits)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
|
||||
/* No difference between reads and writes. Just check
|
||||
* how busy the stripe_cache is
|
||||
*/
|
||||
|
||||
if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
|
||||
return 1;
|
||||
|
||||
/* Also checks whether there is pressure on r5cache log space */
|
||||
if (test_bit(R5C_LOG_TIGHT, &conf->cache_state))
|
||||
return 1;
|
||||
if (conf->quiesce)
|
||||
return 1;
|
||||
if (atomic_read(&conf->empty_inactive_list_nr))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
@ -5289,7 +5267,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
trace_block_bio_remap(align_bi->bi_disk->queue,
|
||||
align_bi, disk_devt(mddev->gendisk),
|
||||
raid_bio->bi_iter.bi_sector);
|
||||
generic_make_request(align_bi);
|
||||
submit_bio_noacct(align_bi);
|
||||
return 1;
|
||||
} else {
|
||||
rcu_read_unlock();
|
||||
@ -5309,7 +5287,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
|
||||
struct r5conf *conf = mddev->private;
|
||||
split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split);
|
||||
bio_chain(split, raid_bio);
|
||||
generic_make_request(raid_bio);
|
||||
submit_bio_noacct(raid_bio);
|
||||
raid_bio = split;
|
||||
}
|
||||
|
||||
@ -8427,7 +8405,6 @@ static struct md_personality raid6_personality =
|
||||
.finish_reshape = raid5_finish_reshape,
|
||||
.quiesce = raid5_quiesce,
|
||||
.takeover = raid6_takeover,
|
||||
.congested = raid5_congested,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
};
|
||||
static struct md_personality raid5_personality =
|
||||
@ -8452,7 +8429,6 @@ static struct md_personality raid5_personality =
|
||||
.finish_reshape = raid5_finish_reshape,
|
||||
.quiesce = raid5_quiesce,
|
||||
.takeover = raid5_takeover,
|
||||
.congested = raid5_congested,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
};
|
||||
|
||||
@ -8478,7 +8454,6 @@ static struct md_personality raid4_personality =
|
||||
.finish_reshape = raid5_finish_reshape,
|
||||
.quiesce = raid5_quiesce,
|
||||
.takeover = raid4_takeover,
|
||||
.congested = raid5_congested,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
};
|
||||
|
||||
|
@ -312,10 +312,7 @@ static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
|
||||
|
||||
mutex_lock(&block_mutex);
|
||||
if (md) {
|
||||
if (md->usage == 2)
|
||||
check_disk_change(bdev);
|
||||
ret = 0;
|
||||
|
||||
if ((mode & FMODE_WRITE) && md->read_only) {
|
||||
mmc_blk_put(md);
|
||||
ret = -EROFS;
|
||||
@ -1446,7 +1443,7 @@ static void mmc_blk_cqe_req_done(struct mmc_request *mrq)
|
||||
*/
|
||||
if (mq->in_recovery)
|
||||
mmc_blk_cqe_complete_rq(mq, req);
|
||||
else
|
||||
else if (likely(!blk_should_fake_timeout(req->q)))
|
||||
blk_mq_complete_request(req);
|
||||
}
|
||||
|
||||
@ -1926,7 +1923,7 @@ static void mmc_blk_hsq_req_done(struct mmc_request *mrq)
|
||||
*/
|
||||
if (mq->in_recovery)
|
||||
mmc_blk_cqe_complete_rq(mq, req);
|
||||
else
|
||||
else if (likely(!blk_should_fake_timeout(req->q)))
|
||||
blk_mq_complete_request(req);
|
||||
}
|
||||
|
||||
@ -1936,7 +1933,7 @@ void mmc_blk_mq_complete(struct request *req)
|
||||
|
||||
if (mq->use_cqe)
|
||||
mmc_blk_cqe_complete_rq(mq, req);
|
||||
else
|
||||
else if (likely(!blk_should_fake_timeout(req->q)))
|
||||
mmc_blk_mq_complete_rq(mq, req);
|
||||
}
|
||||
|
||||
@ -1988,7 +1985,7 @@ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req)
|
||||
*/
|
||||
if (mq->in_recovery)
|
||||
mmc_blk_mq_complete_rq(mq, req);
|
||||
else
|
||||
else if (likely(!blk_should_fake_timeout(req->q)))
|
||||
blk_mq_complete_request(req);
|
||||
|
||||
mmc_blk_mq_dec_in_flight(mq, req);
|
||||
|
@ -162,7 +162,7 @@ static int nsblk_do_bvec(struct nd_namespace_blk *nsblk,
|
||||
return err;
|
||||
}
|
||||
|
||||
static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t nd_blk_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct bio_integrity_payload *bip;
|
||||
struct nd_namespace_blk *nsblk = bio->bi_disk->private_data;
|
||||
@ -225,6 +225,7 @@ static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
|
||||
|
||||
static const struct block_device_operations nd_blk_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = nd_blk_submit_bio,
|
||||
.revalidate_disk = nvdimm_revalidate_disk,
|
||||
};
|
||||
|
||||
@ -250,7 +251,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
|
||||
internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
|
||||
available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
|
||||
|
||||
q = blk_alloc_queue(nd_blk_make_request, NUMA_NO_NODE);
|
||||
q = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!q)
|
||||
return -ENOMEM;
|
||||
if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
|
||||
|
@ -1439,7 +1439,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t btt_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct btt *btt = bio->bi_disk->private_data;
|
||||
@ -1512,6 +1512,7 @@ static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo)
|
||||
|
||||
static const struct block_device_operations btt_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = btt_submit_bio,
|
||||
.rw_page = btt_rw_page,
|
||||
.getgeo = btt_getgeo,
|
||||
.revalidate_disk = nvdimm_revalidate_disk,
|
||||
@ -1523,7 +1524,7 @@ static int btt_blk_init(struct btt *btt)
|
||||
struct nd_namespace_common *ndns = nd_btt->ndns;
|
||||
|
||||
/* create a new disk and request queue for btt */
|
||||
btt->btt_queue = blk_alloc_queue(btt_make_request, NUMA_NO_NODE);
|
||||
btt->btt_queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!btt->btt_queue)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -189,7 +189,7 @@ static blk_status_t pmem_do_write(struct pmem_device *pmem,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
|
||||
static blk_qc_t pmem_submit_bio(struct bio *bio)
|
||||
{
|
||||
int ret = 0;
|
||||
blk_status_t rc = 0;
|
||||
@ -281,6 +281,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
|
||||
|
||||
static const struct block_device_operations pmem_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = pmem_submit_bio,
|
||||
.rw_page = pmem_rw_page,
|
||||
.revalidate_disk = nvdimm_revalidate_disk,
|
||||
};
|
||||
@ -423,7 +424,7 @@ static int pmem_attach_disk(struct device *dev,
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
q = blk_alloc_queue(pmem_make_request, dev_to_node(dev));
|
||||
q = blk_alloc_queue(dev_to_node(dev));
|
||||
if (!q)
|
||||
return -ENOMEM;
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user