mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-29 09:13:38 +00:00
block-6.13-20241213
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmdckwAQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgppJmD/9uUPcPm51b5E7fzD5Hqvlb22uZMYbXs1vR 1NZWdPJhMoMPBXyQ0GN7wHThvwQ8VuTcZNs8pzSJWpZMMhgsdleViDL8hedPeblt TSDc6g2gEt7TtIGIhNqq7bQNW61a+KxZz55B/qKqlJOUsW7ALPuM4m34vSMTNKw8 c/RK3PtTxSvE5nmqLzeynw2Zo7IZ0PL2NSYZ0oID9ZcGtj4ItezhshXTPLuLNuRM ppvc9u3JGyAzVJI/I0GNNW2Xo2maFWtvcWznaegowoBzjQO4Qfo9WDtn3uJFl8Di N6M7H4GASo80l+Hd1eAal3YrM53Z1RW1Mj4xaA2+vtL+p7k5tfV3WAr00hNsK9TW 401KbBGqgvrVS7Y/Y0ADVqqoCePPhblZmWbJu36Jrz0/nDGi3lPOSigYSANlxGsC t0aeeD7lRTd5qJ5+pQnVQL9uaNTXZcUPizgGGG0y9/RcNPzot54ap0/cIEM3fXEQ nd2Tv7O1lSyE5O2JCaoBY/P5ytI7LgZHDH2/ZaUZyDxKFgXSj+2NufTUGj/YmZhr ZKU6U25cFhXEkMVDV4AUUh9Dq723dYkXE4xyl00eNiz6C0J9PQ8uvrlRgC9mPJeQ g4xvyIZxkqeRmzAvUIipKQSZ5Iia1/Jtwqs1RBcda8/7w3sOmQOA9xJGjevLI8G6 he9AQP63JQ== =trxL -----END PGP SIGNATURE----- Merge tag 'block-6.13-20241213' of git://git.kernel.dk/linux Pull block fixes from Jens Axboe: - Series from Damien fixing issues with the zoned write plugging - Fix for a potential UAF in block cgroups - Fix deadlock around queue freezing and the sysfs lock - Various little cleanups and fixes * tag 'block-6.13-20241213' of git://git.kernel.dk/linux: block: Fix potential deadlock while freezing queue and acquiring sysfs_lock block: Fix queue_iostats_passthrough_show() blk-mq: Clean up blk_mq_requeue_work() mq-deadline: Remove a local variable blk-iocost: Avoid using clamp() on inuse in __propagate_weights() block: Make bio_iov_bvec_set() accept pointer to const iov_iter block: get wp_offset by bdev_offset_from_zone_start blk-cgroup: Fix UAF in blkcg_unpin_online() MAINTAINERS: update Coly Li's email address block: Prevent potential deadlocks in zone write plug error recovery dm: Fix dm-zoned-reclaim zone write pointer alignment block: Ignore REQ_NOWAIT for zone reset and zone finish operations block: Use a zone write plug BIO work for REQ_NOWAIT BIOs
This commit is contained in:
commit
c30c65f3fe
@ -3893,7 +3893,7 @@ W: http://www.baycom.org/~tom/ham/ham.html
|
||||
F: drivers/net/hamradio/baycom*
|
||||
|
||||
BCACHE (BLOCK LAYER CACHE)
|
||||
M: Coly Li <colyli@suse.de>
|
||||
M: Coly Li <colyli@kernel.org>
|
||||
M: Kent Overstreet <kent.overstreet@linux.dev>
|
||||
L: linux-bcache@vger.kernel.org
|
||||
S: Maintained
|
||||
|
@ -1171,7 +1171,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__bio_release_pages);
|
||||
|
||||
void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
|
||||
void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter)
|
||||
{
|
||||
WARN_ON_ONCE(bio->bi_max_vecs);
|
||||
|
||||
|
@ -1324,10 +1324,14 @@ void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css)
|
||||
struct blkcg *blkcg = css_to_blkcg(blkcg_css);
|
||||
|
||||
do {
|
||||
struct blkcg *parent;
|
||||
|
||||
if (!refcount_dec_and_test(&blkcg->online_pin))
|
||||
break;
|
||||
|
||||
parent = blkcg_parent(blkcg);
|
||||
blkcg_destroy_blkgs(blkcg);
|
||||
blkcg = blkcg_parent(blkcg);
|
||||
blkcg = parent;
|
||||
} while (blkcg);
|
||||
}
|
||||
|
||||
|
@ -1098,7 +1098,14 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
|
||||
inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum,
|
||||
iocg->child_active_sum);
|
||||
} else {
|
||||
inuse = clamp_t(u32, inuse, 1, active);
|
||||
/*
|
||||
* It may be tempting to turn this into a clamp expression with
|
||||
* a lower limit of 1 but active may be 0, which cannot be used
|
||||
* as an upper limit in that situation. This expression allows
|
||||
* active to clamp inuse unless it is 0, in which case inuse
|
||||
* becomes 1.
|
||||
*/
|
||||
inuse = min(inuse, active) ?: 1;
|
||||
}
|
||||
|
||||
iocg->last_inuse = iocg->inuse;
|
||||
|
@ -574,7 +574,7 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
|
||||
bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
bio_iov_bvec_set(bio, (struct iov_iter *)iter);
|
||||
bio_iov_bvec_set(bio, iter);
|
||||
|
||||
/* check that the data layout matches the hardware restrictions */
|
||||
ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
|
||||
|
@ -275,15 +275,13 @@ void blk_mq_sysfs_unregister_hctxs(struct request_queue *q)
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
unsigned long i;
|
||||
|
||||
mutex_lock(&q->sysfs_dir_lock);
|
||||
lockdep_assert_held(&q->sysfs_dir_lock);
|
||||
|
||||
if (!q->mq_sysfs_init_done)
|
||||
goto unlock;
|
||||
return;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i)
|
||||
blk_mq_unregister_hctx(hctx);
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&q->sysfs_dir_lock);
|
||||
}
|
||||
|
||||
int blk_mq_sysfs_register_hctxs(struct request_queue *q)
|
||||
@ -292,9 +290,10 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q)
|
||||
unsigned long i;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&q->sysfs_dir_lock);
|
||||
lockdep_assert_held(&q->sysfs_dir_lock);
|
||||
|
||||
if (!q->mq_sysfs_init_done)
|
||||
goto unlock;
|
||||
return ret;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
ret = blk_mq_register_hctx(hctx);
|
||||
@ -302,8 +301,5 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q)
|
||||
break;
|
||||
}
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&q->sysfs_dir_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1544,19 +1544,17 @@ static void blk_mq_requeue_work(struct work_struct *work)
|
||||
|
||||
while (!list_empty(&rq_list)) {
|
||||
rq = list_entry(rq_list.next, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
/*
|
||||
* If RQF_DONTPREP ist set, the request has been started by the
|
||||
* If RQF_DONTPREP is set, the request has been started by the
|
||||
* driver already and might have driver-specific data allocated
|
||||
* already. Insert it into the hctx dispatch list to avoid
|
||||
* block layer merges for the request.
|
||||
*/
|
||||
if (rq->rq_flags & RQF_DONTPREP) {
|
||||
list_del_init(&rq->queuelist);
|
||||
if (rq->rq_flags & RQF_DONTPREP)
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
} else {
|
||||
list_del_init(&rq->queuelist);
|
||||
else
|
||||
blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
|
||||
}
|
||||
}
|
||||
|
||||
while (!list_empty(&flush_list)) {
|
||||
@ -4455,7 +4453,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||
unsigned long i, j;
|
||||
|
||||
/* protect against switching io scheduler */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
|
||||
for (i = 0; i < set->nr_hw_queues; i++) {
|
||||
int old_node;
|
||||
int node = blk_mq_get_hctx_node(set, i);
|
||||
@ -4488,7 +4487,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||
|
||||
xa_for_each_start(&q->hctx_table, j, hctx, j)
|
||||
blk_mq_exit_hctx(q, set, hctx, j);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
/* unregister cpuhp callbacks for exited hctxs */
|
||||
blk_mq_remove_hw_queues_cpuhp(q);
|
||||
@ -4520,10 +4518,14 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
|
||||
xa_init(&q->hctx_table);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
|
||||
blk_mq_realloc_hw_ctxs(set, q);
|
||||
if (!q->nr_hw_queues)
|
||||
goto err_hctxs;
|
||||
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
|
||||
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
|
||||
|
||||
@ -4542,6 +4544,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
return 0;
|
||||
|
||||
err_hctxs:
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
blk_mq_release(q);
|
||||
err_exit:
|
||||
q->mq_ops = NULL;
|
||||
@ -4922,12 +4925,12 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
|
||||
return false;
|
||||
|
||||
/* q->elevator needs protection from ->sysfs_lock */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
|
||||
/* the check has to be done with holding sysfs_lock */
|
||||
if (!q->elevator) {
|
||||
kfree(qe);
|
||||
goto unlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&qe->node);
|
||||
@ -4937,9 +4940,7 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
|
||||
__elevator_get(qe->type);
|
||||
list_add(&qe->node, head);
|
||||
elevator_disable(q);
|
||||
unlock:
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
out:
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -4968,11 +4969,9 @@ static void blk_mq_elv_switch_back(struct list_head *head,
|
||||
list_del(&qe->node);
|
||||
kfree(qe);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
elevator_switch(q, t);
|
||||
/* drop the reference acquired in blk_mq_elv_switch_none */
|
||||
elevator_put(t);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
}
|
||||
|
||||
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
@ -4992,8 +4991,11 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
|
||||
return;
|
||||
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||
mutex_lock(&q->sysfs_dir_lock);
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
blk_mq_freeze_queue(q);
|
||||
}
|
||||
/*
|
||||
* Switch IO scheduler to 'none', cleaning up the data associated
|
||||
* with the previous scheduler. We will switch back once we are done
|
||||
@ -5049,8 +5051,11 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||
blk_mq_elv_switch_back(&head, q);
|
||||
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||
blk_mq_unfreeze_queue(q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->sysfs_dir_lock);
|
||||
}
|
||||
|
||||
/* Free the excess tags when nr_hw_queues shrink. */
|
||||
for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
|
||||
|
@ -263,7 +263,7 @@ static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
|
||||
|
||||
static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return queue_var_show(blk_queue_passthrough_stat(disk->queue), page);
|
||||
return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_iostats_passthrough_store(struct gendisk *disk,
|
||||
@ -706,11 +706,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
if (entry->load_module)
|
||||
entry->load_module(disk, page, length);
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
blk_mq_freeze_queue(q);
|
||||
res = entry->store(disk, page, length);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,6 @@ static const char *const zone_cond_name[] = {
|
||||
/*
|
||||
* Per-zone write plug.
|
||||
* @node: hlist_node structure for managing the plug using a hash table.
|
||||
* @link: To list the plug in the zone write plug error list of the disk.
|
||||
* @ref: Zone write plug reference counter. A zone write plug reference is
|
||||
* always at least 1 when the plug is hashed in the disk plug hash table.
|
||||
* The reference is incremented whenever a new BIO needing plugging is
|
||||
@ -63,7 +62,6 @@ static const char *const zone_cond_name[] = {
|
||||
*/
|
||||
struct blk_zone_wplug {
|
||||
struct hlist_node node;
|
||||
struct list_head link;
|
||||
refcount_t ref;
|
||||
spinlock_t lock;
|
||||
unsigned int flags;
|
||||
@ -80,8 +78,8 @@ struct blk_zone_wplug {
|
||||
* - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged,
|
||||
* that is, that write BIOs are being throttled due to a write BIO already
|
||||
* being executed or the zone write plug bio list is not empty.
|
||||
* - BLK_ZONE_WPLUG_ERROR: Indicates that a write error happened which will be
|
||||
* recovered with a report zone to update the zone write pointer offset.
|
||||
* - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
|
||||
* write pointer offset and need to update it.
|
||||
* - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed
|
||||
* from the disk hash table and that the initial reference to the zone
|
||||
* write plug set when the plug was first added to the hash table has been
|
||||
@ -91,11 +89,9 @@ struct blk_zone_wplug {
|
||||
* freed once all remaining references from BIOs or functions are dropped.
|
||||
*/
|
||||
#define BLK_ZONE_WPLUG_PLUGGED (1U << 0)
|
||||
#define BLK_ZONE_WPLUG_ERROR (1U << 1)
|
||||
#define BLK_ZONE_WPLUG_NEED_WP_UPDATE (1U << 1)
|
||||
#define BLK_ZONE_WPLUG_UNHASHED (1U << 2)
|
||||
|
||||
#define BLK_ZONE_WPLUG_BUSY (BLK_ZONE_WPLUG_PLUGGED | BLK_ZONE_WPLUG_ERROR)
|
||||
|
||||
/**
|
||||
* blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
|
||||
* @zone_cond: BLK_ZONE_COND_XXX.
|
||||
@ -115,6 +111,30 @@ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_zone_cond_str);
|
||||
|
||||
struct disk_report_zones_cb_args {
|
||||
struct gendisk *disk;
|
||||
report_zones_cb user_cb;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk,
|
||||
struct blk_zone *zone);
|
||||
|
||||
static int disk_report_zones_cb(struct blk_zone *zone, unsigned int idx,
|
||||
void *data)
|
||||
{
|
||||
struct disk_report_zones_cb_args *args = data;
|
||||
struct gendisk *disk = args->disk;
|
||||
|
||||
if (disk->zone_wplugs_hash)
|
||||
disk_zone_wplug_sync_wp_offset(disk, zone);
|
||||
|
||||
if (!args->user_cb)
|
||||
return 0;
|
||||
|
||||
return args->user_cb(zone, idx, args->user_data);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkdev_report_zones - Get zones information
|
||||
* @bdev: Target block device
|
||||
@ -139,6 +159,11 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
sector_t capacity = get_capacity(disk);
|
||||
struct disk_report_zones_cb_args args = {
|
||||
.disk = disk,
|
||||
.user_cb = cb,
|
||||
.user_data = data,
|
||||
};
|
||||
|
||||
if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones))
|
||||
return -EOPNOTSUPP;
|
||||
@ -146,7 +171,8 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
|
||||
if (!nr_zones || sector >= capacity)
|
||||
return 0;
|
||||
|
||||
return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
|
||||
return disk->fops->report_zones(disk, sector, nr_zones,
|
||||
disk_report_zones_cb, &args);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkdev_report_zones);
|
||||
|
||||
@ -427,7 +453,7 @@ static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
if (refcount_dec_and_test(&zwplug->ref)) {
|
||||
WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
|
||||
WARN_ON_ONCE(!list_empty(&zwplug->link));
|
||||
WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED);
|
||||
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED));
|
||||
|
||||
call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu);
|
||||
@ -441,8 +467,8 @@ static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)
|
||||
return false;
|
||||
|
||||
/* If the zone write plug is still busy, it cannot be removed. */
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_BUSY)
|
||||
/* If the zone write plug is still plugged, it cannot be removed. */
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)
|
||||
return false;
|
||||
|
||||
/*
|
||||
@ -525,12 +551,11 @@ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
|
||||
return NULL;
|
||||
|
||||
INIT_HLIST_NODE(&zwplug->node);
|
||||
INIT_LIST_HEAD(&zwplug->link);
|
||||
refcount_set(&zwplug->ref, 2);
|
||||
spin_lock_init(&zwplug->lock);
|
||||
zwplug->flags = 0;
|
||||
zwplug->zone_no = zno;
|
||||
zwplug->wp_offset = sector & (disk->queue->limits.chunk_sectors - 1);
|
||||
zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector);
|
||||
bio_list_init(&zwplug->bio_list);
|
||||
INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
|
||||
zwplug->disk = disk;
|
||||
@ -574,115 +599,22 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
|
||||
}
|
||||
|
||||
/*
|
||||
* Abort (fail) all plugged BIOs of a zone write plug that are not aligned
|
||||
* with the assumed write pointer location of the zone when the BIO will
|
||||
* be unplugged.
|
||||
*/
|
||||
static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
unsigned int wp_offset = zwplug->wp_offset;
|
||||
struct bio_list bl = BIO_EMPTY_LIST;
|
||||
struct bio *bio;
|
||||
|
||||
while ((bio = bio_list_pop(&zwplug->bio_list))) {
|
||||
if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) ||
|
||||
(bio_op(bio) != REQ_OP_ZONE_APPEND &&
|
||||
bio_offset_from_zone_start(bio) != wp_offset)) {
|
||||
blk_zone_wplug_bio_io_error(zwplug, bio);
|
||||
continue;
|
||||
}
|
||||
|
||||
wp_offset += bio_sectors(bio);
|
||||
bio_list_add(&bl, bio);
|
||||
}
|
||||
|
||||
bio_list_merge(&zwplug->bio_list, &bl);
|
||||
}
|
||||
|
||||
static inline void disk_zone_wplug_set_error(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_ERROR)
|
||||
return;
|
||||
|
||||
/*
|
||||
* At this point, we already have a reference on the zone write plug.
|
||||
* However, since we are going to add the plug to the disk zone write
|
||||
* plugs work list, increase its reference count. This reference will
|
||||
* be dropped in disk_zone_wplugs_work() once the error state is
|
||||
* handled, or in disk_zone_wplug_clear_error() if the zone is reset or
|
||||
* finished.
|
||||
*/
|
||||
zwplug->flags |= BLK_ZONE_WPLUG_ERROR;
|
||||
refcount_inc(&zwplug->ref);
|
||||
|
||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||
list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list);
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||
}
|
||||
|
||||
static inline void disk_zone_wplug_clear_error(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We are racing with the error handling work which drops the reference
|
||||
* on the zone write plug after handling the error state. So remove the
|
||||
* plug from the error list and drop its reference count only if the
|
||||
* error handling has not yet started, that is, if the zone write plug
|
||||
* is still listed.
|
||||
*/
|
||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||
if (!list_empty(&zwplug->link)) {
|
||||
list_del_init(&zwplug->link);
|
||||
zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR;
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a zone write plug write pointer offset to either 0 (zone reset case)
|
||||
* or to the zone size (zone finish case). This aborts all plugged BIOs, which
|
||||
* is fine to do as doing a zone reset or zone finish while writes are in-flight
|
||||
* is a mistake from the user which will most likely cause all plugged BIOs to
|
||||
* fail anyway.
|
||||
* Set a zone write plug write pointer offset to the specified value.
|
||||
* This aborts all plugged BIOs, which is fine as this function is called for
|
||||
* a zone reset operation, a zone finish operation or if the zone needs a wp
|
||||
* update from a report zone after a write error.
|
||||
*/
|
||||
static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug,
|
||||
unsigned int wp_offset)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
|
||||
/*
|
||||
* Make sure that a BIO completion or another zone reset or finish
|
||||
* operation has not already removed the plug from the hash table.
|
||||
*/
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) {
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
return;
|
||||
}
|
||||
lockdep_assert_held(&zwplug->lock);
|
||||
|
||||
/* Update the zone write pointer and abort all plugged BIOs. */
|
||||
zwplug->flags &= ~BLK_ZONE_WPLUG_NEED_WP_UPDATE;
|
||||
zwplug->wp_offset = wp_offset;
|
||||
disk_zone_wplug_abort(zwplug);
|
||||
|
||||
/*
|
||||
* Updating the write pointer offset puts back the zone
|
||||
* in a good state. So clear the error flag and decrement the
|
||||
* error count if we were in error state.
|
||||
*/
|
||||
disk_zone_wplug_clear_error(disk, zwplug);
|
||||
|
||||
/*
|
||||
* The zone write plug now has no BIO plugged: remove it from the
|
||||
* hash table so that it cannot be seen. The plug will be freed
|
||||
@ -690,8 +622,58 @@ static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
|
||||
*/
|
||||
if (disk_should_remove_zone_wplug(disk, zwplug))
|
||||
disk_remove_zone_wplug(disk, zwplug);
|
||||
}
|
||||
|
||||
static unsigned int blk_zone_wp_offset(struct blk_zone *zone)
|
||||
{
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
return zone->wp - zone->start;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
return zone->len;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
return 0;
|
||||
case BLK_ZONE_COND_NOT_WP:
|
||||
case BLK_ZONE_COND_OFFLINE:
|
||||
case BLK_ZONE_COND_READONLY:
|
||||
default:
|
||||
/*
|
||||
* Conventional, offline and read-only zones do not have a valid
|
||||
* write pointer.
|
||||
*/
|
||||
return UINT_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk,
|
||||
struct blk_zone *zone)
|
||||
{
|
||||
struct blk_zone_wplug *zwplug;
|
||||
unsigned long flags;
|
||||
|
||||
zwplug = disk_get_zone_wplug(disk, zone->start);
|
||||
if (!zwplug)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE)
|
||||
disk_zone_wplug_set_wp_offset(disk, zwplug,
|
||||
blk_zone_wp_offset(zone));
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
|
||||
static int disk_zone_sync_wp_offset(struct gendisk *disk, sector_t sector)
|
||||
{
|
||||
struct disk_report_zones_cb_args args = {
|
||||
.disk = disk,
|
||||
};
|
||||
|
||||
return disk->fops->report_zones(disk, sector, 1,
|
||||
disk_report_zones_cb, &args);
|
||||
}
|
||||
|
||||
static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio,
|
||||
@ -700,6 +682,7 @@ static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio,
|
||||
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
struct blk_zone_wplug *zwplug;
|
||||
unsigned long flags;
|
||||
|
||||
/* Conventional zones cannot be reset nor finished. */
|
||||
if (!bdev_zone_is_seq(bio->bi_bdev, sector)) {
|
||||
@ -707,6 +690,15 @@ static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio,
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* No-wait reset or finish BIOs do not make much sense as the callers
|
||||
* issue these as blocking operations in most cases. To avoid issues
|
||||
* the BIO execution potentially failing with BLK_STS_AGAIN, warn about
|
||||
* REQ_NOWAIT being set and ignore that flag.
|
||||
*/
|
||||
if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT))
|
||||
bio->bi_opf &= ~REQ_NOWAIT;
|
||||
|
||||
/*
|
||||
* If we have a zone write plug, set its write pointer offset to 0
|
||||
* (reset case) or to the zone size (finish case). This will abort all
|
||||
@ -716,7 +708,9 @@ static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio,
|
||||
*/
|
||||
zwplug = disk_get_zone_wplug(disk, sector);
|
||||
if (zwplug) {
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset);
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
|
||||
@ -727,6 +721,7 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
|
||||
{
|
||||
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||
struct blk_zone_wplug *zwplug;
|
||||
unsigned long flags;
|
||||
sector_t sector;
|
||||
|
||||
/*
|
||||
@ -738,7 +733,9 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
|
||||
sector += disk->queue->limits.chunk_sectors) {
|
||||
zwplug = disk_get_zone_wplug(disk, sector);
|
||||
if (zwplug) {
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
disk_zone_wplug_set_wp_offset(disk, zwplug, 0);
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
}
|
||||
@ -746,9 +743,25 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void blk_zone_wplug_add_bio(struct blk_zone_wplug *zwplug,
|
||||
struct bio *bio, unsigned int nr_segs)
|
||||
static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
/*
|
||||
* Take a reference on the zone write plug and schedule the submission
|
||||
* of the next plugged BIO. blk_zone_wplug_bio_work() will release the
|
||||
* reference we take here.
|
||||
*/
|
||||
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
|
||||
refcount_inc(&zwplug->ref);
|
||||
queue_work(disk->zone_wplugs_wq, &zwplug->bio_work);
|
||||
}
|
||||
|
||||
static inline void disk_zone_wplug_add_bio(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug,
|
||||
struct bio *bio, unsigned int nr_segs)
|
||||
{
|
||||
bool schedule_bio_work = false;
|
||||
|
||||
/*
|
||||
* Grab an extra reference on the BIO request queue usage counter.
|
||||
* This reference will be reused to submit a request for the BIO for
|
||||
@ -764,6 +777,16 @@ static inline void blk_zone_wplug_add_bio(struct blk_zone_wplug *zwplug,
|
||||
*/
|
||||
bio_clear_polled(bio);
|
||||
|
||||
/*
|
||||
* REQ_NOWAIT BIOs are always handled using the zone write plug BIO
|
||||
* work, which can block. So clear the REQ_NOWAIT flag and schedule the
|
||||
* work if this is the first BIO we are plugging.
|
||||
*/
|
||||
if (bio->bi_opf & REQ_NOWAIT) {
|
||||
schedule_bio_work = !(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED);
|
||||
bio->bi_opf &= ~REQ_NOWAIT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reuse the poll cookie field to store the number of segments when
|
||||
* split to the hardware limits.
|
||||
@ -777,6 +800,11 @@ static inline void blk_zone_wplug_add_bio(struct blk_zone_wplug *zwplug,
|
||||
* at the tail of the list to preserve the sequential write order.
|
||||
*/
|
||||
bio_list_add(&zwplug->bio_list, bio);
|
||||
|
||||
zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
|
||||
|
||||
if (schedule_bio_work)
|
||||
disk_zone_wplug_schedule_bio_work(disk, zwplug);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -889,13 +917,23 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
|
||||
{
|
||||
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||
|
||||
/*
|
||||
* If we lost track of the zone write pointer due to a write error,
|
||||
* the user must either execute a report zones, reset the zone or finish
|
||||
* the to recover a reliable write pointer position. Fail BIOs if the
|
||||
* user did not do that as we cannot handle emulated zone append
|
||||
* otherwise.
|
||||
*/
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Check that the user is not attempting to write to a full zone.
|
||||
* We know such BIO will fail, and that would potentially overflow our
|
||||
* write pointer offset beyond the end of the zone.
|
||||
*/
|
||||
if (disk_zone_wplug_is_full(disk, zwplug))
|
||||
goto err;
|
||||
return false;
|
||||
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
/*
|
||||
@ -914,24 +952,18 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
|
||||
bio_set_flag(bio, BIO_EMULATES_ZONE_APPEND);
|
||||
} else {
|
||||
/*
|
||||
* Check for non-sequential writes early because we avoid a
|
||||
* whole lot of error handling trouble if we don't send it off
|
||||
* to the driver.
|
||||
* Check for non-sequential writes early as we know that BIOs
|
||||
* with a start sector not unaligned to the zone write pointer
|
||||
* will fail.
|
||||
*/
|
||||
if (bio_offset_from_zone_start(bio) != zwplug->wp_offset)
|
||||
goto err;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Advance the zone write pointer offset. */
|
||||
zwplug->wp_offset += bio_sectors(bio);
|
||||
|
||||
return true;
|
||||
|
||||
err:
|
||||
/* We detected an invalid write BIO: schedule error recovery. */
|
||||
disk_zone_wplug_set_error(disk, zwplug);
|
||||
kblockd_schedule_work(&disk->zone_wplugs_work);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
|
||||
@ -970,7 +1002,10 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
|
||||
|
||||
zwplug = disk_get_and_lock_zone_wplug(disk, sector, gfp_mask, &flags);
|
||||
if (!zwplug) {
|
||||
bio_io_error(bio);
|
||||
if (bio->bi_opf & REQ_NOWAIT)
|
||||
bio_wouldblock_error(bio);
|
||||
else
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -978,18 +1013,20 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
|
||||
bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING);
|
||||
|
||||
/*
|
||||
* If the zone is already plugged or has a pending error, add the BIO
|
||||
* to the plug BIO list. Otherwise, plug and let the BIO execute.
|
||||
* If the zone is already plugged, add the BIO to the plug BIO list.
|
||||
* Do the same for REQ_NOWAIT BIOs to ensure that we will not see a
|
||||
* BLK_STS_AGAIN failure if we let the BIO execute.
|
||||
* Otherwise, plug and let the BIO execute.
|
||||
*/
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_BUSY)
|
||||
if ((zwplug->flags & BLK_ZONE_WPLUG_PLUGGED) ||
|
||||
(bio->bi_opf & REQ_NOWAIT))
|
||||
goto plug;
|
||||
|
||||
/*
|
||||
* If an error is detected when preparing the BIO, add it to the BIO
|
||||
* list so that error recovery can deal with it.
|
||||
*/
|
||||
if (!blk_zone_wplug_prepare_bio(zwplug, bio))
|
||||
goto plug;
|
||||
if (!blk_zone_wplug_prepare_bio(zwplug, bio)) {
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
bio_io_error(bio);
|
||||
return true;
|
||||
}
|
||||
|
||||
zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
|
||||
|
||||
@ -998,8 +1035,7 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
|
||||
return false;
|
||||
|
||||
plug:
|
||||
zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
|
||||
blk_zone_wplug_add_bio(zwplug, bio, nr_segs);
|
||||
disk_zone_wplug_add_bio(disk, zwplug, bio, nr_segs);
|
||||
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
|
||||
@ -1083,19 +1119,6 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_zone_plug_bio);
|
||||
|
||||
static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
/*
|
||||
* Take a reference on the zone write plug and schedule the submission
|
||||
* of the next plugged BIO. blk_zone_wplug_bio_work() will release the
|
||||
* reference we take here.
|
||||
*/
|
||||
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
|
||||
refcount_inc(&zwplug->ref);
|
||||
queue_work(disk->zone_wplugs_wq, &zwplug->bio_work);
|
||||
}
|
||||
|
||||
static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
@ -1103,16 +1126,6 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
|
||||
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
|
||||
/*
|
||||
* If we had an error, schedule error recovery. The recovery work
|
||||
* will restart submission of plugged BIOs.
|
||||
*/
|
||||
if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) {
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
kblockd_schedule_work(&disk->zone_wplugs_work);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Schedule submission of the next plugged BIO if we have one. */
|
||||
if (!bio_list_empty(&zwplug->bio_list)) {
|
||||
disk_zone_wplug_schedule_bio_work(disk, zwplug);
|
||||
@ -1155,12 +1168,13 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
|
||||
}
|
||||
|
||||
/*
|
||||
* If the BIO failed, mark the plug as having an error to trigger
|
||||
* recovery.
|
||||
* If the BIO failed, abort all plugged BIOs and mark the plug as
|
||||
* needing a write pointer update.
|
||||
*/
|
||||
if (bio->bi_status != BLK_STS_OK) {
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
disk_zone_wplug_set_error(disk, zwplug);
|
||||
disk_zone_wplug_abort(zwplug);
|
||||
zwplug->flags |= BLK_ZONE_WPLUG_NEED_WP_UPDATE;
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
}
|
||||
|
||||
@ -1216,6 +1230,7 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
|
||||
*/
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
|
||||
again:
|
||||
bio = bio_list_pop(&zwplug->bio_list);
|
||||
if (!bio) {
|
||||
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
|
||||
@ -1224,10 +1239,8 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
|
||||
}
|
||||
|
||||
if (!blk_zone_wplug_prepare_bio(zwplug, bio)) {
|
||||
/* Error recovery will decide what to do with the BIO. */
|
||||
bio_list_add_head(&zwplug->bio_list, bio);
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
goto put_zwplug;
|
||||
blk_zone_wplug_bio_io_error(zwplug, bio);
|
||||
goto again;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
@ -1249,120 +1262,6 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
|
||||
disk_put_zone_wplug(zwplug);
|
||||
}
|
||||
|
||||
static unsigned int blk_zone_wp_offset(struct blk_zone *zone)
|
||||
{
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
return zone->wp - zone->start;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
return zone->len;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
return 0;
|
||||
case BLK_ZONE_COND_NOT_WP:
|
||||
case BLK_ZONE_COND_OFFLINE:
|
||||
case BLK_ZONE_COND_READONLY:
|
||||
default:
|
||||
/*
|
||||
* Conventional, offline and read-only zones do not have a valid
|
||||
* write pointer.
|
||||
*/
|
||||
return UINT_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
static int blk_zone_wplug_report_zone_cb(struct blk_zone *zone,
|
||||
unsigned int idx, void *data)
|
||||
{
|
||||
struct blk_zone *zonep = data;
|
||||
|
||||
*zonep = *zone;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void disk_zone_wplug_handle_error(struct gendisk *disk,
|
||||
struct blk_zone_wplug *zwplug)
|
||||
{
|
||||
sector_t zone_start_sector =
|
||||
bdev_zone_sectors(disk->part0) * zwplug->zone_no;
|
||||
unsigned int noio_flag;
|
||||
struct blk_zone zone;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
/* Get the current zone information from the device. */
|
||||
noio_flag = memalloc_noio_save();
|
||||
ret = disk->fops->report_zones(disk, zone_start_sector, 1,
|
||||
blk_zone_wplug_report_zone_cb, &zone);
|
||||
memalloc_noio_restore(noio_flag);
|
||||
|
||||
spin_lock_irqsave(&zwplug->lock, flags);
|
||||
|
||||
/*
|
||||
* A zone reset or finish may have cleared the error already. In such
|
||||
* case, do nothing as the report zones may have seen the "old" write
|
||||
* pointer value before the reset/finish operation completed.
|
||||
*/
|
||||
if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR))
|
||||
goto unlock;
|
||||
|
||||
zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR;
|
||||
|
||||
if (ret != 1) {
|
||||
/*
|
||||
* We failed to get the zone information, meaning that something
|
||||
* is likely really wrong with the device. Abort all remaining
|
||||
* plugged BIOs as otherwise we could endup waiting forever on
|
||||
* plugged BIOs to complete if there is a queue freeze on-going.
|
||||
*/
|
||||
disk_zone_wplug_abort(zwplug);
|
||||
goto unplug;
|
||||
}
|
||||
|
||||
/* Update the zone write pointer offset. */
|
||||
zwplug->wp_offset = blk_zone_wp_offset(&zone);
|
||||
disk_zone_wplug_abort_unaligned(disk, zwplug);
|
||||
|
||||
/* Restart BIO submission if we still have any BIO left. */
|
||||
if (!bio_list_empty(&zwplug->bio_list)) {
|
||||
disk_zone_wplug_schedule_bio_work(disk, zwplug);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
unplug:
|
||||
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
|
||||
if (disk_should_remove_zone_wplug(disk, zwplug))
|
||||
disk_remove_zone_wplug(disk, zwplug);
|
||||
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&zwplug->lock, flags);
|
||||
}
|
||||
|
||||
static void disk_zone_wplugs_work(struct work_struct *work)
|
||||
{
|
||||
struct gendisk *disk =
|
||||
container_of(work, struct gendisk, zone_wplugs_work);
|
||||
struct blk_zone_wplug *zwplug;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||
|
||||
while (!list_empty(&disk->zone_wplugs_err_list)) {
|
||||
zwplug = list_first_entry(&disk->zone_wplugs_err_list,
|
||||
struct blk_zone_wplug, link);
|
||||
list_del_init(&zwplug->link);
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||
|
||||
disk_zone_wplug_handle_error(disk, zwplug);
|
||||
disk_put_zone_wplug(zwplug);
|
||||
|
||||
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
|
||||
}
|
||||
|
||||
static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk)
|
||||
{
|
||||
return 1U << disk->zone_wplugs_hash_bits;
|
||||
@ -1371,8 +1270,6 @@ static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk)
|
||||
void disk_init_zone_resources(struct gendisk *disk)
|
||||
{
|
||||
spin_lock_init(&disk->zone_wplugs_lock);
|
||||
INIT_LIST_HEAD(&disk->zone_wplugs_err_list);
|
||||
INIT_WORK(&disk->zone_wplugs_work, disk_zone_wplugs_work);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1471,8 +1368,6 @@ void disk_free_zone_resources(struct gendisk *disk)
|
||||
if (!disk->zone_wplugs_pool)
|
||||
return;
|
||||
|
||||
cancel_work_sync(&disk->zone_wplugs_work);
|
||||
|
||||
if (disk->zone_wplugs_wq) {
|
||||
destroy_workqueue(disk->zone_wplugs_wq);
|
||||
disk->zone_wplugs_wq = NULL;
|
||||
@ -1669,6 +1564,8 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
|
||||
if (!disk->zone_wplugs_hash)
|
||||
return 0;
|
||||
|
||||
disk_zone_wplug_sync_wp_offset(disk, zone);
|
||||
|
||||
wp_offset = blk_zone_wp_offset(zone);
|
||||
if (!wp_offset || wp_offset >= zone->capacity)
|
||||
return 0;
|
||||
@ -1799,6 +1696,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
memalloc_noio_restore(noio_flag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = disk->fops->report_zones(disk, 0, UINT_MAX,
|
||||
blk_revalidate_zone_cb, &args);
|
||||
if (!ret) {
|
||||
@ -1835,6 +1733,48 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
|
||||
|
||||
/**
|
||||
* blk_zone_issue_zeroout - zero-fill a block range in a zone
|
||||
* @bdev: blockdev to write
|
||||
* @sector: start sector
|
||||
* @nr_sects: number of sectors to write
|
||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||
*
|
||||
* Description:
|
||||
* Zero-fill a block range in a zone (@sector must be equal to the zone write
|
||||
* pointer), handling potential errors due to the (initially unknown) lack of
|
||||
* hardware offload (See blkdev_issue_zeroout()).
|
||||
*/
|
||||
int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (WARN_ON_ONCE(!bdev_is_zoned(bdev)))
|
||||
return -EIO;
|
||||
|
||||
ret = blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
|
||||
BLKDEV_ZERO_NOFALLBACK);
|
||||
if (ret != -EOPNOTSUPP)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* The failed call to blkdev_issue_zeroout() advanced the zone write
|
||||
* pointer. Undo this using a report zone to update the zone write
|
||||
* pointer to the correct current value.
|
||||
*/
|
||||
ret = disk_zone_sync_wp_offset(bdev->bd_disk, sector);
|
||||
if (ret != 1)
|
||||
return ret < 0 ? ret : -EIO;
|
||||
|
||||
/*
|
||||
* Retry without BLKDEV_ZERO_NOFALLBACK to force the fallback to a
|
||||
* regular write with zero-pages.
|
||||
*/
|
||||
return blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_zone_issue_zeroout);
|
||||
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
|
||||
int queue_zone_wplugs_show(void *data, struct seq_file *m)
|
||||
|
@ -698,8 +698,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
list_add(&rq->queuelist, &per_prio->dispatch);
|
||||
rq->fifo_time = jiffies;
|
||||
} else {
|
||||
struct list_head *insert_before;
|
||||
|
||||
deadline_add_rq_rb(per_prio, rq);
|
||||
|
||||
if (rq_mergeable(rq)) {
|
||||
@ -712,8 +710,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
* set expire time and add to fifo list
|
||||
*/
|
||||
rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
|
||||
insert_before = &per_prio->fifo_list[data_dir];
|
||||
list_add_tail(&rq->queuelist, insert_before);
|
||||
list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,9 +76,9 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone,
|
||||
* pointer and the requested position.
|
||||
*/
|
||||
nr_blocks = block - wp_block;
|
||||
ret = blkdev_issue_zeroout(dev->bdev,
|
||||
dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block),
|
||||
dmz_blk2sect(nr_blocks), GFP_NOIO, 0);
|
||||
ret = blk_zone_issue_zeroout(dev->bdev,
|
||||
dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block),
|
||||
dmz_blk2sect(nr_blocks), GFP_NOIO);
|
||||
if (ret) {
|
||||
dmz_dev_err(dev,
|
||||
"Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
|
||||
|
@ -423,7 +423,7 @@ void __bio_add_page(struct bio *bio, struct page *page,
|
||||
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
|
||||
size_t off);
|
||||
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
|
||||
void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter);
|
||||
void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
|
||||
void __bio_release_pages(struct bio *bio, bool mark_dirty);
|
||||
extern void bio_set_pages_dirty(struct bio *bio);
|
||||
extern void bio_check_pages_dirty(struct bio *bio);
|
||||
|
@ -200,8 +200,6 @@ struct gendisk {
|
||||
spinlock_t zone_wplugs_lock;
|
||||
struct mempool_s *zone_wplugs_pool;
|
||||
struct hlist_head *zone_wplugs_hash;
|
||||
struct list_head zone_wplugs_err_list;
|
||||
struct work_struct zone_wplugs_work;
|
||||
struct workqueue_struct *zone_wplugs_wq;
|
||||
#endif /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
@ -1421,6 +1419,9 @@ static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
|
||||
return is_seq;
|
||||
}
|
||||
|
||||
int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask);
|
||||
|
||||
static inline unsigned int queue_dma_alignment(const struct request_queue *q)
|
||||
{
|
||||
return q->limits.dma_alignment;
|
||||
|
Loading…
Reference in New Issue
Block a user