block: rework bio splitting

The current setup with bio_may_exceed_limit and __bio_split_to_limits
is a bit of a mess.

Change it so that __bio_split_to_limits does all the work and is just
a variant of bio_split_to_limits that returns nr_segs.  This is done
by inlining it and instead have the various bio_split_* helpers directly
submit the potentially split bios.

To support btrfs, the rw version has a lower level helper split out
that just returns the offset to split.  This turns out to nicely clean
up the btrfs flow as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David Sterba <dsterba@suse.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Tested-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
Link: https://lore.kernel.org/r/20240826173820.1690925-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Christoph Hellwig 2024-08-26 19:37:54 +02:00 committed by Jens Axboe
parent f6f84be089
commit b35243a447
5 changed files with 128 additions and 132 deletions

View File

@ -105,9 +105,33 @@ static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
}
static struct bio *bio_split_discard(struct bio *bio,
const struct queue_limits *lim,
unsigned *nsegs, struct bio_set *bs)
static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
{
if (unlikely(split_sectors < 0)) {
bio->bi_status = errno_to_blk_status(split_sectors);
bio_endio(bio);
return NULL;
}
if (split_sectors) {
struct bio *split;
split = bio_split(bio, split_sectors, GFP_NOIO,
&bio->bi_bdev->bd_disk->bio_split);
split->bi_opf |= REQ_NOMERGE;
blkcg_bio_issue_init(split);
bio_chain(split, bio);
trace_block_split(split, bio->bi_iter.bi_sector);
WARN_ON_ONCE(bio_zone_write_plugging(bio));
submit_bio_noacct(bio);
return split;
}
return bio;
}
struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
unsigned *nsegs)
{
unsigned int max_discard_sectors, granularity;
sector_t tmp;
@ -121,10 +145,10 @@ static struct bio *bio_split_discard(struct bio *bio,
min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
return NULL;
return bio;
if (bio_sectors(bio) <= max_discard_sectors)
return NULL;
return bio;
split_sectors = max_discard_sectors;
@ -139,19 +163,18 @@ static struct bio *bio_split_discard(struct bio *bio,
if (split_sectors > tmp)
split_sectors -= tmp;
return bio_split(bio, split_sectors, GFP_NOIO, bs);
return bio_submit_split(bio, split_sectors);
}
static struct bio *bio_split_write_zeroes(struct bio *bio,
const struct queue_limits *lim,
unsigned *nsegs, struct bio_set *bs)
struct bio *bio_split_write_zeroes(struct bio *bio,
const struct queue_limits *lim, unsigned *nsegs)
{
*nsegs = 0;
if (!lim->max_write_zeroes_sectors)
return NULL;
return bio;
if (bio_sectors(bio) <= lim->max_write_zeroes_sectors)
return NULL;
return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
return bio;
return bio_submit_split(bio, lim->max_write_zeroes_sectors);
}
static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
@ -274,27 +297,19 @@ static bool bvec_split_segs(const struct queue_limits *lim,
}
/**
* bio_split_rw - split a bio in two bios
* bio_split_rw_at - check if and where to split a read/write bio
* @bio: [in] bio to be split
* @lim: [in] queue limits to split based on
* @segs: [out] number of segments in the bio with the first half of the sectors
* @bs: [in] bio set to allocate the clone from
* @max_bytes: [in] maximum number of bytes per bio
*
* Clone @bio, update the bi_iter of the clone to represent the first sectors
* of @bio and update @bio->bi_iter to represent the remaining sectors. The
* following is guaranteed for the cloned bio:
* - That it has at most @max_bytes worth of data
* - That it has at most queue_max_segments(@q) segments.
*
* Except for discard requests the cloned bio will point at the bi_io_vec of
* the original bio. It is the responsibility of the caller to ensure that the
* original bio is not freed before the cloned bio. The caller is also
* responsible for ensuring that @bs is only destroyed after processing of the
* split bio has finished.
* Find out if @bio needs to be split to fit the queue limits in @lim and a
* maximum size of @max_bytes. Returns a negative error number if @bio can't be
* split, 0 if the bio doesn't have to be split, or a positive sector offset if
* @bio needs to be split.
*/
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
unsigned *segs, struct bio_set *bs, unsigned max_bytes)
int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
unsigned *segs, unsigned max_bytes)
{
struct bio_vec bv, bvprv, *bvprvp = NULL;
struct bvec_iter iter;
@ -324,22 +339,17 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
}
*segs = nsegs;
return NULL;
return 0;
split:
if (bio->bi_opf & REQ_ATOMIC) {
bio->bi_status = BLK_STS_INVAL;
bio_endio(bio);
return ERR_PTR(-EINVAL);
}
if (bio->bi_opf & REQ_ATOMIC)
return -EINVAL;
/*
* We can't sanely support splitting for a REQ_NOWAIT bio. End it
* with EAGAIN if splitting is required and return an error pointer.
*/
if (bio->bi_opf & REQ_NOWAIT) {
bio->bi_status = BLK_STS_AGAIN;
bio_endio(bio);
return ERR_PTR(-EAGAIN);
}
if (bio->bi_opf & REQ_NOWAIT)
return -EAGAIN;
*segs = nsegs;
@ -356,58 +366,16 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
* big IO can be trival, disable iopoll when split needed.
*/
bio_clear_polled(bio);
return bio_split(bio, bytes >> SECTOR_SHIFT, GFP_NOIO, bs);
return bytes >> SECTOR_SHIFT;
}
EXPORT_SYMBOL_GPL(bio_split_rw);
EXPORT_SYMBOL_GPL(bio_split_rw_at);
/**
* __bio_split_to_limits - split a bio to fit the queue limits
* @bio: bio to be split
* @lim: queue limits to split based on
* @nr_segs: returns the number of segments in the returned bio
*
* Check if @bio needs splitting based on the queue limits, and if so split off
* a bio fitting the limits from the beginning of @bio and return it. @bio is
* shortened to the remainder and re-submitted.
*
* The split bio is allocated from @q->bio_split, which is provided by the
* block layer.
*/
struct bio *__bio_split_to_limits(struct bio *bio,
const struct queue_limits *lim,
unsigned int *nr_segs)
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
unsigned *nr_segs)
{
struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
struct bio *split;
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
split = bio_split_discard(bio, lim, nr_segs, bs);
break;
case REQ_OP_WRITE_ZEROES:
split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
break;
default:
split = bio_split_rw(bio, lim, nr_segs, bs,
get_max_io_size(bio, lim) << SECTOR_SHIFT);
if (IS_ERR(split))
return NULL;
break;
}
if (split) {
/* there isn't chance to merge the split bio */
split->bi_opf |= REQ_NOMERGE;
blkcg_bio_issue_init(split);
bio_chain(split, bio);
trace_block_split(split, bio->bi_iter.bi_sector);
WARN_ON_ONCE(bio_zone_write_plugging(bio));
submit_bio_noacct(bio);
return split;
}
return bio;
return bio_submit_split(bio,
bio_split_rw_at(bio, lim, nr_segs,
get_max_io_size(bio, lim) << SECTOR_SHIFT));
}
/**
@ -426,9 +394,7 @@ struct bio *bio_split_to_limits(struct bio *bio)
const struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits;
unsigned int nr_segs;
if (bio_may_exceed_limits(bio, lim))
return __bio_split_to_limits(bio, lim, &nr_segs);
return bio;
return __bio_split_to_limits(bio, lim, &nr_segs);
}
EXPORT_SYMBOL(bio_split_to_limits);

View File

@ -2939,7 +2939,7 @@ void blk_mq_submit_bio(struct bio *bio)
struct blk_plug *plug = current->plug;
const int is_sync = op_is_sync(bio->bi_opf);
struct blk_mq_hw_ctx *hctx;
unsigned int nr_segs = 1;
unsigned int nr_segs;
struct request *rq;
blk_status_t ret;
@ -2981,11 +2981,10 @@ void blk_mq_submit_bio(struct bio *bio)
goto queue_exit;
}
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
if (!bio)
goto queue_exit;
}
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
if (!bio)
goto queue_exit;
if (!bio_integrity_prep(bio))
goto queue_exit;

View File

@ -331,33 +331,58 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
ssize_t part_timeout_store(struct device *, struct device_attribute *,
const char *, size_t);
static inline bool bio_may_exceed_limits(struct bio *bio,
const struct queue_limits *lim)
{
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
case REQ_OP_WRITE_ZEROES:
return true; /* non-trivial splitting decisions */
default:
break;
}
struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
unsigned *nsegs);
struct bio *bio_split_write_zeroes(struct bio *bio,
const struct queue_limits *lim, unsigned *nsegs);
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
unsigned *nr_segs);
/*
* All drivers must accept single-segments bios that are <= PAGE_SIZE.
* This is a quick and dirty check that relies on the fact that
* bi_io_vec[0] is always valid if a bio has data. The check might
* lead to occasional false negatives when bios are cloned, but compared
* to the performance impact of cloned bios themselves the loop below
* doesn't matter anyway.
*/
/*
* All drivers must accept single-segments bios that are smaller than PAGE_SIZE.
*
* This is a quick and dirty check that relies on the fact that bi_io_vec[0] is
* always valid if a bio has data. The check might lead to occasional false
* positives when bios are cloned, but compared to the performance impact of
* cloned bios themselves the loop below doesn't matter anyway.
*/
static inline bool bio_may_need_split(struct bio *bio,
const struct queue_limits *lim)
{
return lim->chunk_sectors || bio->bi_vcnt != 1 ||
bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
}
struct bio *__bio_split_to_limits(struct bio *bio,
const struct queue_limits *lim,
unsigned int *nr_segs);
/**
* __bio_split_to_limits - split a bio to fit the queue limits
* @bio: bio to be split
* @lim: queue limits to split based on
* @nr_segs: returns the number of segments in the returned bio
*
* Check if @bio needs splitting based on the queue limits, and if so split off
* a bio fitting the limits from the beginning of @bio and return it. @bio is
* shortened to the remainder and re-submitted.
*
* The split bio is allocated from @q->bio_split, which is provided by the
* block layer.
*/
static inline struct bio *__bio_split_to_limits(struct bio *bio,
const struct queue_limits *lim, unsigned int *nr_segs)
{
switch (bio_op(bio)) {
default:
if (bio_may_need_split(bio, lim))
return bio_split_rw(bio, lim, nr_segs);
*nr_segs = 1;
return bio;
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
return bio_split_discard(bio, lim, nr_segs);
case REQ_OP_WRITE_ZEROES:
return bio_split_write_zeroes(bio, lim, nr_segs);
}
}
int ll_back_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs);
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,

View File

@ -73,20 +73,13 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
struct btrfs_bio *orig_bbio,
u64 map_length, bool use_append)
u64 map_length)
{
struct btrfs_bio *bbio;
struct bio *bio;
if (use_append) {
unsigned int nr_segs;
bio = bio_split_rw(&orig_bbio->bio, &fs_info->limits, &nr_segs,
&btrfs_clone_bioset, map_length);
} else {
bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT,
GFP_NOFS, &btrfs_clone_bioset);
}
bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT, GFP_NOFS,
&btrfs_clone_bioset);
bbio = btrfs_bio(bio);
btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
bbio->inode = orig_bbio->inode;
@ -664,6 +657,19 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
return true;
}
static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
{
unsigned int nr_segs;
int sector_offset;
map_length = min(map_length, bbio->fs_info->max_zone_append_size);
sector_offset = bio_split_rw_at(&bbio->bio, &bbio->fs_info->limits,
&nr_segs, map_length);
if (sector_offset)
return sector_offset << SECTOR_SHIFT;
return map_length;
}
static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
{
struct btrfs_inode *inode = bbio->inode;
@ -691,10 +697,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
map_length = min(map_length, length);
if (use_append)
map_length = min(map_length, fs_info->max_zone_append_size);
map_length = btrfs_append_map_length(bbio, map_length);
if (map_length < length) {
bbio = btrfs_split_bio(fs_info, bbio, map_length, use_append);
bbio = btrfs_split_bio(fs_info, bbio, map_length);
bio = &bbio->bio;
}

View File

@ -324,8 +324,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
void bio_trim(struct bio *bio, sector_t offset, sector_t size);
extern struct bio *bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs);
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
unsigned *segs, struct bio_set *bs, unsigned max_bytes);
int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
unsigned *segs, unsigned max_bytes);
/**
* bio_next_split - get next @sectors from a bio, splitting if necessary