mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
block-6.13-20242901
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmdJ6jwQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpvgeEADaPL+qmsRyp070K1OI/yTA9Jhf6liEyJ31 0GPVar5Vt6ObH6/POObrJqbBtAo5asQanFvwyVyLztKYxPHU7sdQaRcD+vvj7q3+ EhmQZSKM7Spp77awWhRWbeQfUBvdhTeGHjQH/0e60eIrF9KtEL9sM9hVqc8hBD9F YtDNWPCk7Rz1PPNYlGEkQ2JmYmaxh3Gn29c/k1cSSo3InEQOFj6x+0Cgz6RjbTx3 9HfpLhVG3WV5MlZCCwp7KG36aJzlc0nq53x/sC9cg+F17RvL2EwNAOUfLl75/Kp/ t7PCQSd2ODciiDN9qZW71KGtVtlJ07W048Rk0nB+ogneC0uh4fuIYTidP9D7io7D bBMrhDuUpnlPzlOqg0aeedXePQL7TRfT3CTentol6xldqg14n7C4QTQFQMSJCgJf gr4YCTwl0RTknXo0A3ja16XwsUq5+2xsSoCTU25TY+wgKiAcc5lN9fhbvPRzbCQC u9EQ9I9IFAMqEdnE51sw0x16fLtN2w4/zOkvTF+gD/KooEjSn9lcfeNue7jt1O0/ gFvFJCdXK/2GgxwHihvsEVdcNeaS8JowNafKUsfOM2G0qWQbY+l2vl/b5PfwecWi 0knOaqNWlGMwrQ+z+fgsEeFG7X98ninC7tqVZpzoZ7j0x65anH+Jq4q1Egongj0H 90zclclxjg== =6cbB -----END PGP SIGNATURE----- Merge tag 'block-6.13-20242901' of git://git.kernel.dk/linux Pull more block updates from Jens Axboe: - NVMe pull request via Keith: - Use correct srcu list traversal (Breno) - Scatter-gather support for metadata (Keith) - Fabrics shutdown race condition fix (Nilay) - Persistent reservations updates (Guixin) - Add the required bits for MD atomic write support for raid0/1/10 - Correct return value for unknown opcode in ublk - Fix deadlock with zone revalidation - Fix for the io priority request vs bio cleanups - Use the correct unsigned int type for various limit helpers - Fix for a race in loop - Cleanup blk_rq_prep_clone() to prevent uninit-value warning and make it easier for actual humans to read - Fix potential UAF when iterating tags - A few fixes for bfq-iosched UAF issues - Fix for brd discard not decrementing the allocated page count - Various little fixes and cleanups * tag 'block-6.13-20242901' of git://git.kernel.dk/linux: (36 commits) brd: decrease the number of allocated pages which discarded block, bfq: fix bfqq uaf in bfq_limit_depth() block: Don't allow an atomic write be truncated in blkdev_write_iter() mq-deadline: don't call req_get_ioprio from the I/O completion handler block: Prevent potential deadlock in blk_revalidate_disk_zones() block: Remove extra part pointer NULLify in blk_rq_init() nvme: tuning pr code by using defined structs and macros nvme: introduce change ptpl and iekey definition block: return bool from get_disk_ro and bdev_read_only block: remove a duplicate definition for bdev_read_only block: return bool from blk_rq_aligned block: return unsigned int from blk_lim_dma_alignment_and_pad block: return unsigned int from queue_dma_alignment block: return unsigned int from bdev_io_opt block: req->bio is always set in the merge code block: don't bother checking the data direction for merges block: blk-mq: fix uninit-value in blk_rq_prep_clone and refactor Revert "block, bfq: merge bfq_release_process_ref() into bfq_put_cooperator()" md/raid10: Atomic write support md/raid1: Atomic write support ...
This commit is contained in:
commit
cfd47302ac
@ -736,6 +736,7 @@ static void bfq_sync_bfqq_move(struct bfq_data *bfqd,
|
|||||||
*/
|
*/
|
||||||
bfq_put_cooperator(sync_bfqq);
|
bfq_put_cooperator(sync_bfqq);
|
||||||
bic_set_bfqq(bic, NULL, true, act_idx);
|
bic_set_bfqq(bic, NULL, true, act_idx);
|
||||||
|
bfq_release_process_ref(bfqd, sync_bfqq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -582,23 +582,31 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
|
|||||||
#define BFQ_LIMIT_INLINE_DEPTH 16
|
#define BFQ_LIMIT_INLINE_DEPTH 16
|
||||||
|
|
||||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||||
static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
|
static bool bfqq_request_over_limit(struct bfq_data *bfqd,
|
||||||
|
struct bfq_io_cq *bic, blk_opf_t opf,
|
||||||
|
unsigned int act_idx, int limit)
|
||||||
{
|
{
|
||||||
struct bfq_data *bfqd = bfqq->bfqd;
|
|
||||||
struct bfq_entity *entity = &bfqq->entity;
|
|
||||||
struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH];
|
struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH];
|
||||||
struct bfq_entity **entities = inline_entities;
|
struct bfq_entity **entities = inline_entities;
|
||||||
int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
|
int alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
|
||||||
int class_idx = bfqq->ioprio_class - 1;
|
|
||||||
struct bfq_sched_data *sched_data;
|
struct bfq_sched_data *sched_data;
|
||||||
|
struct bfq_entity *entity;
|
||||||
|
struct bfq_queue *bfqq;
|
||||||
unsigned long wsum;
|
unsigned long wsum;
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
int depth;
|
||||||
if (!entity->on_st_or_in_serv)
|
int level;
|
||||||
return false;
|
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
spin_lock_irq(&bfqd->lock);
|
spin_lock_irq(&bfqd->lock);
|
||||||
|
bfqq = bic_to_bfqq(bic, op_is_sync(opf), act_idx);
|
||||||
|
if (!bfqq)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
entity = &bfqq->entity;
|
||||||
|
if (!entity->on_st_or_in_serv)
|
||||||
|
goto out;
|
||||||
|
|
||||||
/* +1 for bfqq entity, root cgroup not included */
|
/* +1 for bfqq entity, root cgroup not included */
|
||||||
depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1;
|
depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1;
|
||||||
if (depth > alloc_depth) {
|
if (depth > alloc_depth) {
|
||||||
@ -643,7 +651,7 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
|
|||||||
* class.
|
* class.
|
||||||
*/
|
*/
|
||||||
wsum = 0;
|
wsum = 0;
|
||||||
for (i = 0; i <= class_idx; i++) {
|
for (i = 0; i <= bfqq->ioprio_class - 1; i++) {
|
||||||
wsum = wsum * IOPRIO_BE_NR +
|
wsum = wsum * IOPRIO_BE_NR +
|
||||||
sched_data->service_tree[i].wsum;
|
sched_data->service_tree[i].wsum;
|
||||||
}
|
}
|
||||||
@ -666,7 +674,9 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
|
static bool bfqq_request_over_limit(struct bfq_data *bfqd,
|
||||||
|
struct bfq_io_cq *bic, blk_opf_t opf,
|
||||||
|
unsigned int act_idx, int limit)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -704,8 +714,9 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) {
|
for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) {
|
||||||
struct bfq_queue *bfqq =
|
/* Fast path to check if bfqq is already allocated. */
|
||||||
bic_to_bfqq(bic, op_is_sync(opf), act_idx);
|
if (!bic_to_bfqq(bic, op_is_sync(opf), act_idx))
|
||||||
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Does queue (or any parent entity) exceed number of
|
* Does queue (or any parent entity) exceed number of
|
||||||
@ -713,7 +724,7 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
|
|||||||
* limit depth so that it cannot consume more
|
* limit depth so that it cannot consume more
|
||||||
* available requests and thus starve other entities.
|
* available requests and thus starve other entities.
|
||||||
*/
|
*/
|
||||||
if (bfqq && bfqq_request_over_limit(bfqq, limit)) {
|
if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) {
|
||||||
depth = 1;
|
depth = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -5434,8 +5445,6 @@ void bfq_put_cooperator(struct bfq_queue *bfqq)
|
|||||||
bfq_put_queue(__bfqq);
|
bfq_put_queue(__bfqq);
|
||||||
__bfqq = next;
|
__bfqq = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
bfq_release_process_ref(bfqq->bfqd, bfqq);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||||||
@ -5448,6 +5457,8 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
|||||||
bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref);
|
bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref);
|
||||||
|
|
||||||
bfq_put_cooperator(bfqq);
|
bfq_put_cooperator(bfqq);
|
||||||
|
|
||||||
|
bfq_release_process_ref(bfqd, bfqq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync,
|
static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync,
|
||||||
@ -6734,6 +6745,8 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
|
|||||||
bic_set_bfqq(bic, NULL, true, bfqq->actuator_idx);
|
bic_set_bfqq(bic, NULL, true, bfqq->actuator_idx);
|
||||||
|
|
||||||
bfq_put_cooperator(bfqq);
|
bfq_put_cooperator(bfqq);
|
||||||
|
|
||||||
|
bfq_release_process_ref(bfqq->bfqd, bfqq);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -864,17 +864,10 @@ static struct request *attempt_merge(struct request_queue *q,
|
|||||||
if (req_op(req) != req_op(next))
|
if (req_op(req) != req_op(next))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (rq_data_dir(req) != rq_data_dir(next))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (req->bio && next->bio) {
|
|
||||||
/* Don't merge requests with different write hints. */
|
|
||||||
if (req->bio->bi_write_hint != next->bio->bi_write_hint)
|
if (req->bio->bi_write_hint != next->bio->bi_write_hint)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (req->bio->bi_ioprio != next->bio->bi_ioprio)
|
if (req->bio->bi_ioprio != next->bio->bi_ioprio)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
|
||||||
|
|
||||||
if (!blk_atomic_write_mergeable_rqs(req, next))
|
if (!blk_atomic_write_mergeable_rqs(req, next))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@ -986,30 +979,16 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
|
|||||||
if (req_op(rq) != bio_op(bio))
|
if (req_op(rq) != bio_op(bio))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* different data direction or already started, don't merge */
|
|
||||||
if (bio_data_dir(bio) != rq_data_dir(rq))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* don't merge across cgroup boundaries */
|
|
||||||
if (!blk_cgroup_mergeable(rq, bio))
|
if (!blk_cgroup_mergeable(rq, bio))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* only merge integrity protected bio into ditto rq */
|
|
||||||
if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
|
if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Only merge if the crypt contexts are compatible */
|
|
||||||
if (!bio_crypt_rq_ctx_compatible(rq, bio))
|
if (!bio_crypt_rq_ctx_compatible(rq, bio))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (rq->bio) {
|
|
||||||
/* Don't merge requests with different write hints. */
|
|
||||||
if (rq->bio->bi_write_hint != bio->bi_write_hint)
|
if (rq->bio->bi_write_hint != bio->bi_write_hint)
|
||||||
return false;
|
return false;
|
||||||
if (rq->bio->bi_ioprio != bio->bi_ioprio)
|
if (rq->bio->bi_ioprio != bio->bi_ioprio)
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
|
if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -388,7 +388,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
|
|||||||
rq->tag = BLK_MQ_NO_TAG;
|
rq->tag = BLK_MQ_NO_TAG;
|
||||||
rq->internal_tag = BLK_MQ_NO_TAG;
|
rq->internal_tag = BLK_MQ_NO_TAG;
|
||||||
rq->start_time_ns = blk_time_get_ns();
|
rq->start_time_ns = blk_time_get_ns();
|
||||||
rq->part = NULL;
|
|
||||||
blk_crypto_rq_set_defaults(rq);
|
blk_crypto_rq_set_defaults(rq);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_rq_init);
|
EXPORT_SYMBOL(blk_rq_init);
|
||||||
@ -3273,19 +3272,21 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
|
|||||||
int (*bio_ctr)(struct bio *, struct bio *, void *),
|
int (*bio_ctr)(struct bio *, struct bio *, void *),
|
||||||
void *data)
|
void *data)
|
||||||
{
|
{
|
||||||
struct bio *bio, *bio_src;
|
struct bio *bio_src;
|
||||||
|
|
||||||
if (!bs)
|
if (!bs)
|
||||||
bs = &fs_bio_set;
|
bs = &fs_bio_set;
|
||||||
|
|
||||||
__rq_for_each_bio(bio_src, rq_src) {
|
__rq_for_each_bio(bio_src, rq_src) {
|
||||||
bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask,
|
struct bio *bio = bio_alloc_clone(rq->q->disk->part0, bio_src,
|
||||||
bs);
|
gfp_mask, bs);
|
||||||
if (!bio)
|
if (!bio)
|
||||||
goto free_and_out;
|
goto free_and_out;
|
||||||
|
|
||||||
if (bio_ctr && bio_ctr(bio, bio_src, data))
|
if (bio_ctr && bio_ctr(bio, bio_src, data)) {
|
||||||
|
bio_put(bio);
|
||||||
goto free_and_out;
|
goto free_and_out;
|
||||||
|
}
|
||||||
|
|
||||||
if (rq->bio) {
|
if (rq->bio) {
|
||||||
rq->biotail->bi_next = bio;
|
rq->biotail->bi_next = bio;
|
||||||
@ -3293,7 +3294,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
|
|||||||
} else {
|
} else {
|
||||||
rq->bio = rq->biotail = bio;
|
rq->bio = rq->biotail = bio;
|
||||||
}
|
}
|
||||||
bio = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Copy attributes of the original request to the clone request. */
|
/* Copy attributes of the original request to the clone request. */
|
||||||
@ -3311,8 +3311,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
free_and_out:
|
free_and_out:
|
||||||
if (bio)
|
|
||||||
bio_put(bio);
|
|
||||||
blk_rq_unprep_clone(rq);
|
blk_rq_unprep_clone(rq);
|
||||||
|
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -178,9 +178,26 @@ static void blk_validate_atomic_write_limits(struct queue_limits *lim)
|
|||||||
if (!lim->atomic_write_hw_max)
|
if (!lim->atomic_write_hw_max)
|
||||||
goto unsupported;
|
goto unsupported;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!is_power_of_2(lim->atomic_write_hw_unit_min)))
|
||||||
|
goto unsupported;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!is_power_of_2(lim->atomic_write_hw_unit_max)))
|
||||||
|
goto unsupported;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(lim->atomic_write_hw_unit_min >
|
||||||
|
lim->atomic_write_hw_unit_max))
|
||||||
|
goto unsupported;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(lim->atomic_write_hw_unit_max >
|
||||||
|
lim->atomic_write_hw_max))
|
||||||
|
goto unsupported;
|
||||||
|
|
||||||
boundary_sectors = lim->atomic_write_hw_boundary >> SECTOR_SHIFT;
|
boundary_sectors = lim->atomic_write_hw_boundary >> SECTOR_SHIFT;
|
||||||
|
|
||||||
if (boundary_sectors) {
|
if (boundary_sectors) {
|
||||||
|
if (WARN_ON_ONCE(lim->atomic_write_hw_max >
|
||||||
|
lim->atomic_write_hw_boundary))
|
||||||
|
goto unsupported;
|
||||||
/*
|
/*
|
||||||
* A feature of boundary support is that it disallows bios to
|
* A feature of boundary support is that it disallows bios to
|
||||||
* be merged which would result in a merged request which
|
* be merged which would result in a merged request which
|
||||||
@ -248,6 +265,13 @@ int blk_validate_limits(struct queue_limits *lim)
|
|||||||
if (lim->io_min < lim->physical_block_size)
|
if (lim->io_min < lim->physical_block_size)
|
||||||
lim->io_min = lim->physical_block_size;
|
lim->io_min = lim->physical_block_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The optimal I/O size may not be aligned to physical block size
|
||||||
|
* (because it may be limited by dma engines which have no clue about
|
||||||
|
* block size of the disks attached to them), so we round it down here.
|
||||||
|
*/
|
||||||
|
lim->io_opt = round_down(lim->io_opt, lim->physical_block_size);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* max_hw_sectors has a somewhat weird default for historical reason,
|
* max_hw_sectors has a somewhat weird default for historical reason,
|
||||||
* but driver really should set their own instead of relying on this
|
* but driver really should set their own instead of relying on this
|
||||||
@ -458,8 +482,6 @@ static unsigned int queue_limit_discard_alignment(
|
|||||||
/* Why are these in bytes, not sectors? */
|
/* Why are these in bytes, not sectors? */
|
||||||
alignment = lim->discard_alignment >> SECTOR_SHIFT;
|
alignment = lim->discard_alignment >> SECTOR_SHIFT;
|
||||||
granularity = lim->discard_granularity >> SECTOR_SHIFT;
|
granularity = lim->discard_granularity >> SECTOR_SHIFT;
|
||||||
if (!granularity)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* Offset of the partition start in 'granularity' sectors */
|
/* Offset of the partition start in 'granularity' sectors */
|
||||||
offset = sector_div(sector, granularity);
|
offset = sector_div(sector, granularity);
|
||||||
@ -479,6 +501,119 @@ static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lb
|
|||||||
return sectors;
|
return sectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check if second and later bottom devices are compliant */
|
||||||
|
static bool blk_stack_atomic_writes_tail(struct queue_limits *t,
|
||||||
|
struct queue_limits *b)
|
||||||
|
{
|
||||||
|
/* We're not going to support different boundary sizes.. yet */
|
||||||
|
if (t->atomic_write_hw_boundary != b->atomic_write_hw_boundary)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Can't support this */
|
||||||
|
if (t->atomic_write_hw_unit_min > b->atomic_write_hw_unit_max)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Or this */
|
||||||
|
if (t->atomic_write_hw_unit_max < b->atomic_write_hw_unit_min)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
t->atomic_write_hw_max = min(t->atomic_write_hw_max,
|
||||||
|
b->atomic_write_hw_max);
|
||||||
|
t->atomic_write_hw_unit_min = max(t->atomic_write_hw_unit_min,
|
||||||
|
b->atomic_write_hw_unit_min);
|
||||||
|
t->atomic_write_hw_unit_max = min(t->atomic_write_hw_unit_max,
|
||||||
|
b->atomic_write_hw_unit_max);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for valid boundary of first bottom device */
|
||||||
|
static bool blk_stack_atomic_writes_boundary_head(struct queue_limits *t,
|
||||||
|
struct queue_limits *b)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Ensure atomic write boundary is aligned with chunk sectors. Stacked
|
||||||
|
* devices store chunk sectors in t->io_min.
|
||||||
|
*/
|
||||||
|
if (b->atomic_write_hw_boundary > t->io_min &&
|
||||||
|
b->atomic_write_hw_boundary % t->io_min)
|
||||||
|
return false;
|
||||||
|
if (t->io_min > b->atomic_write_hw_boundary &&
|
||||||
|
t->io_min % b->atomic_write_hw_boundary)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
t->atomic_write_hw_boundary = b->atomic_write_hw_boundary;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Check stacking of first bottom device */
|
||||||
|
static bool blk_stack_atomic_writes_head(struct queue_limits *t,
|
||||||
|
struct queue_limits *b)
|
||||||
|
{
|
||||||
|
if (b->atomic_write_hw_boundary &&
|
||||||
|
!blk_stack_atomic_writes_boundary_head(t, b))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (t->io_min <= SECTOR_SIZE) {
|
||||||
|
/* No chunk sectors, so use bottom device values directly */
|
||||||
|
t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
|
||||||
|
t->atomic_write_hw_unit_min = b->atomic_write_hw_unit_min;
|
||||||
|
t->atomic_write_hw_max = b->atomic_write_hw_max;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find values for limits which work for chunk size.
|
||||||
|
* b->atomic_write_hw_unit_{min, max} may not be aligned with chunk
|
||||||
|
* size (t->io_min), as chunk size is not restricted to a power-of-2.
|
||||||
|
* So we need to find highest power-of-2 which works for the chunk
|
||||||
|
* size.
|
||||||
|
* As an example scenario, we could have b->unit_max = 16K and
|
||||||
|
* t->io_min = 24K. For this case, reduce t->unit_max to a value
|
||||||
|
* aligned with both limits, i.e. 8K in this example.
|
||||||
|
*/
|
||||||
|
t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
|
||||||
|
while (t->io_min % t->atomic_write_hw_unit_max)
|
||||||
|
t->atomic_write_hw_unit_max /= 2;
|
||||||
|
|
||||||
|
t->atomic_write_hw_unit_min = min(b->atomic_write_hw_unit_min,
|
||||||
|
t->atomic_write_hw_unit_max);
|
||||||
|
t->atomic_write_hw_max = min(b->atomic_write_hw_max, t->io_min);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blk_stack_atomic_writes_limits(struct queue_limits *t,
|
||||||
|
struct queue_limits *b)
|
||||||
|
{
|
||||||
|
if (!(t->features & BLK_FEAT_ATOMIC_WRITES_STACKED))
|
||||||
|
goto unsupported;
|
||||||
|
|
||||||
|
if (!b->atomic_write_unit_min)
|
||||||
|
goto unsupported;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If atomic_write_hw_max is set, we have already stacked 1x bottom
|
||||||
|
* device, so check for compliance.
|
||||||
|
*/
|
||||||
|
if (t->atomic_write_hw_max) {
|
||||||
|
if (!blk_stack_atomic_writes_tail(t, b))
|
||||||
|
goto unsupported;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!blk_stack_atomic_writes_head(t, b))
|
||||||
|
goto unsupported;
|
||||||
|
return;
|
||||||
|
|
||||||
|
unsupported:
|
||||||
|
t->atomic_write_hw_max = 0;
|
||||||
|
t->atomic_write_hw_unit_max = 0;
|
||||||
|
t->atomic_write_hw_unit_min = 0;
|
||||||
|
t->atomic_write_hw_boundary = 0;
|
||||||
|
t->features &= ~BLK_FEAT_ATOMIC_WRITES_STACKED;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* blk_stack_limits - adjust queue_limits for stacked devices
|
* blk_stack_limits - adjust queue_limits for stacked devices
|
||||||
* @t: the stacking driver limits (top device)
|
* @t: the stacking driver limits (top device)
|
||||||
@ -639,6 +774,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
|||||||
t->zone_write_granularity = 0;
|
t->zone_write_granularity = 0;
|
||||||
t->max_zone_append_sectors = 0;
|
t->max_zone_append_sectors = 0;
|
||||||
}
|
}
|
||||||
|
blk_stack_atomic_writes_limits(t, b);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_stack_limits);
|
EXPORT_SYMBOL(blk_stack_limits);
|
||||||
|
@ -810,10 +810,8 @@ int blk_register_queue(struct gendisk *disk)
|
|||||||
* faster to shut down and is made fully functional here as
|
* faster to shut down and is made fully functional here as
|
||||||
* request_queues for non-existent devices never get registered.
|
* request_queues for non-existent devices never get registered.
|
||||||
*/
|
*/
|
||||||
if (!blk_queue_init_done(q)) {
|
|
||||||
blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q);
|
blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q);
|
||||||
percpu_ref_switch_to_percpu(&q->q_usage_counter);
|
percpu_ref_switch_to_percpu(&q->q_usage_counter);
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@ -1551,6 +1551,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
|||||||
unsigned int nr_seq_zones, nr_conv_zones;
|
unsigned int nr_seq_zones, nr_conv_zones;
|
||||||
unsigned int pool_size;
|
unsigned int pool_size;
|
||||||
struct queue_limits lim;
|
struct queue_limits lim;
|
||||||
|
int ret;
|
||||||
|
|
||||||
disk->nr_zones = args->nr_zones;
|
disk->nr_zones = args->nr_zones;
|
||||||
disk->zone_capacity = args->zone_capacity;
|
disk->zone_capacity = args->zone_capacity;
|
||||||
@ -1601,7 +1602,11 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
|||||||
}
|
}
|
||||||
|
|
||||||
commit:
|
commit:
|
||||||
return queue_limits_commit_update(q, &lim);
|
blk_mq_freeze_queue(q);
|
||||||
|
ret = queue_limits_commit_update(q, &lim);
|
||||||
|
blk_mq_unfreeze_queue(q);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
|
static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
|
||||||
@ -1816,14 +1821,15 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
|||||||
* Set the new disk zone parameters only once the queue is frozen and
|
* Set the new disk zone parameters only once the queue is frozen and
|
||||||
* all I/Os are completed.
|
* all I/Os are completed.
|
||||||
*/
|
*/
|
||||||
blk_mq_freeze_queue(q);
|
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
ret = disk_update_zone_resources(disk, &args);
|
ret = disk_update_zone_resources(disk, &args);
|
||||||
else
|
else
|
||||||
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
|
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
|
||||||
if (ret)
|
if (ret) {
|
||||||
|
blk_mq_freeze_queue(q);
|
||||||
disk_free_zone_resources(disk);
|
disk_free_zone_resources(disk);
|
||||||
blk_mq_unfreeze_queue(q);
|
blk_mq_unfreeze_queue(q);
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -677,6 +677,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||||||
struct file *file = iocb->ki_filp;
|
struct file *file = iocb->ki_filp;
|
||||||
struct inode *bd_inode = bdev_file_inode(file);
|
struct inode *bd_inode = bdev_file_inode(file);
|
||||||
struct block_device *bdev = I_BDEV(bd_inode);
|
struct block_device *bdev = I_BDEV(bd_inode);
|
||||||
|
bool atomic = iocb->ki_flags & IOCB_ATOMIC;
|
||||||
loff_t size = bdev_nr_bytes(bdev);
|
loff_t size = bdev_nr_bytes(bdev);
|
||||||
size_t shorted = 0;
|
size_t shorted = 0;
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
@ -696,7 +697,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||||||
if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
|
if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
if (iocb->ki_flags & IOCB_ATOMIC) {
|
if (atomic) {
|
||||||
ret = generic_atomic_write_valid(iocb, from);
|
ret = generic_atomic_write_valid(iocb, from);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@ -704,6 +705,8 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||||||
|
|
||||||
size -= iocb->ki_pos;
|
size -= iocb->ki_pos;
|
||||||
if (iov_iter_count(from) > size) {
|
if (iov_iter_count(from) > size) {
|
||||||
|
if (atomic)
|
||||||
|
return -EINVAL;
|
||||||
shorted = iov_iter_count(from) - size;
|
shorted = iov_iter_count(from) - size;
|
||||||
iov_iter_truncate(from, size);
|
iov_iter_truncate(from, size);
|
||||||
}
|
}
|
||||||
|
@ -742,13 +742,10 @@ void del_gendisk(struct gendisk *disk)
|
|||||||
* If the disk does not own the queue, allow using passthrough requests
|
* If the disk does not own the queue, allow using passthrough requests
|
||||||
* again. Else leave the queue frozen to fail all I/O.
|
* again. Else leave the queue frozen to fail all I/O.
|
||||||
*/
|
*/
|
||||||
if (!test_bit(GD_OWNS_QUEUE, &disk->state)) {
|
if (!test_bit(GD_OWNS_QUEUE, &disk->state))
|
||||||
blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
|
|
||||||
__blk_mq_unfreeze_queue(q, true);
|
__blk_mq_unfreeze_queue(q, true);
|
||||||
} else {
|
else if (queue_is_mq(q))
|
||||||
if (queue_is_mq(q))
|
|
||||||
blk_mq_exit_queue(q);
|
blk_mq_exit_queue(q);
|
||||||
}
|
|
||||||
|
|
||||||
if (start_drain)
|
if (start_drain)
|
||||||
blk_unfreeze_release_lock(q, true, queue_dying);
|
blk_unfreeze_release_lock(q, true, queue_dying);
|
||||||
|
@ -685,10 +685,9 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
|||||||
|
|
||||||
prio = ioprio_class_to_prio[ioprio_class];
|
prio = ioprio_class_to_prio[ioprio_class];
|
||||||
per_prio = &dd->per_prio[prio];
|
per_prio = &dd->per_prio[prio];
|
||||||
if (!rq->elv.priv[0]) {
|
if (!rq->elv.priv[0])
|
||||||
per_prio->stats.inserted++;
|
per_prio->stats.inserted++;
|
||||||
rq->elv.priv[0] = (void *)(uintptr_t)1;
|
rq->elv.priv[0] = per_prio;
|
||||||
}
|
|
||||||
|
|
||||||
if (blk_mq_sched_try_insert_merge(q, rq, free))
|
if (blk_mq_sched_try_insert_merge(q, rq, free))
|
||||||
return;
|
return;
|
||||||
@ -753,18 +752,14 @@ static void dd_prepare_request(struct request *rq)
|
|||||||
*/
|
*/
|
||||||
static void dd_finish_request(struct request *rq)
|
static void dd_finish_request(struct request *rq)
|
||||||
{
|
{
|
||||||
struct request_queue *q = rq->q;
|
struct dd_per_prio *per_prio = rq->elv.priv[0];
|
||||||
struct deadline_data *dd = q->elevator->elevator_data;
|
|
||||||
const u8 ioprio_class = dd_rq_ioclass(rq);
|
|
||||||
const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
|
|
||||||
struct dd_per_prio *per_prio = &dd->per_prio[prio];
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The block layer core may call dd_finish_request() without having
|
* The block layer core may call dd_finish_request() without having
|
||||||
* called dd_insert_requests(). Skip requests that bypassed I/O
|
* called dd_insert_requests(). Skip requests that bypassed I/O
|
||||||
* scheduling. See also blk_mq_request_bypass_insert().
|
* scheduling. See also blk_mq_request_bypass_insert().
|
||||||
*/
|
*/
|
||||||
if (rq->elv.priv[0])
|
if (per_prio)
|
||||||
atomic_inc(&per_prio->stats.completed);
|
atomic_inc(&per_prio->stats.completed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -231,8 +231,10 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
|
|||||||
xa_lock(&brd->brd_pages);
|
xa_lock(&brd->brd_pages);
|
||||||
while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) {
|
while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) {
|
||||||
page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
|
page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
|
||||||
if (page)
|
if (page) {
|
||||||
__free_page(page);
|
__free_page(page);
|
||||||
|
brd->brd_nr_pages--;
|
||||||
|
}
|
||||||
aligned_sector += PAGE_SECTORS;
|
aligned_sector += PAGE_SECTORS;
|
||||||
size -= PAGE_SIZE;
|
size -= PAGE_SIZE;
|
||||||
}
|
}
|
||||||
|
@ -770,12 +770,11 @@ static void loop_sysfs_exit(struct loop_device *lo)
|
|||||||
&loop_attribute_group);
|
&loop_attribute_group);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void loop_config_discard(struct loop_device *lo,
|
static void loop_get_discard_config(struct loop_device *lo,
|
||||||
struct queue_limits *lim)
|
u32 *granularity, u32 *max_discard_sectors)
|
||||||
{
|
{
|
||||||
struct file *file = lo->lo_backing_file;
|
struct file *file = lo->lo_backing_file;
|
||||||
struct inode *inode = file->f_mapping->host;
|
struct inode *inode = file->f_mapping->host;
|
||||||
u32 granularity = 0, max_discard_sectors = 0;
|
|
||||||
struct kstatfs sbuf;
|
struct kstatfs sbuf;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -788,24 +787,17 @@ static void loop_config_discard(struct loop_device *lo,
|
|||||||
if (S_ISBLK(inode->i_mode)) {
|
if (S_ISBLK(inode->i_mode)) {
|
||||||
struct block_device *bdev = I_BDEV(inode);
|
struct block_device *bdev = I_BDEV(inode);
|
||||||
|
|
||||||
max_discard_sectors = bdev_write_zeroes_sectors(bdev);
|
*max_discard_sectors = bdev_write_zeroes_sectors(bdev);
|
||||||
granularity = bdev_discard_granularity(bdev);
|
*granularity = bdev_discard_granularity(bdev);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use punch hole to reclaim the free space used by the
|
* We use punch hole to reclaim the free space used by the
|
||||||
* image a.k.a. discard.
|
* image a.k.a. discard.
|
||||||
*/
|
*/
|
||||||
} else if (file->f_op->fallocate && !vfs_statfs(&file->f_path, &sbuf)) {
|
} else if (file->f_op->fallocate && !vfs_statfs(&file->f_path, &sbuf)) {
|
||||||
max_discard_sectors = UINT_MAX >> 9;
|
*max_discard_sectors = UINT_MAX >> 9;
|
||||||
granularity = sbuf.f_bsize;
|
*granularity = sbuf.f_bsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
lim->max_hw_discard_sectors = max_discard_sectors;
|
|
||||||
lim->max_write_zeroes_sectors = max_discard_sectors;
|
|
||||||
if (max_discard_sectors)
|
|
||||||
lim->discard_granularity = granularity;
|
|
||||||
else
|
|
||||||
lim->discard_granularity = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct loop_worker {
|
struct loop_worker {
|
||||||
@ -991,6 +983,7 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize)
|
|||||||
struct inode *inode = file->f_mapping->host;
|
struct inode *inode = file->f_mapping->host;
|
||||||
struct block_device *backing_bdev = NULL;
|
struct block_device *backing_bdev = NULL;
|
||||||
struct queue_limits lim;
|
struct queue_limits lim;
|
||||||
|
u32 granularity = 0, max_discard_sectors = 0;
|
||||||
|
|
||||||
if (S_ISBLK(inode->i_mode))
|
if (S_ISBLK(inode->i_mode))
|
||||||
backing_bdev = I_BDEV(inode);
|
backing_bdev = I_BDEV(inode);
|
||||||
@ -1000,6 +993,8 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize)
|
|||||||
if (!bsize)
|
if (!bsize)
|
||||||
bsize = loop_default_blocksize(lo, backing_bdev);
|
bsize = loop_default_blocksize(lo, backing_bdev);
|
||||||
|
|
||||||
|
loop_get_discard_config(lo, &granularity, &max_discard_sectors);
|
||||||
|
|
||||||
lim = queue_limits_start_update(lo->lo_queue);
|
lim = queue_limits_start_update(lo->lo_queue);
|
||||||
lim.logical_block_size = bsize;
|
lim.logical_block_size = bsize;
|
||||||
lim.physical_block_size = bsize;
|
lim.physical_block_size = bsize;
|
||||||
@ -1009,7 +1004,12 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize)
|
|||||||
lim.features |= BLK_FEAT_WRITE_CACHE;
|
lim.features |= BLK_FEAT_WRITE_CACHE;
|
||||||
if (backing_bdev && !bdev_nonrot(backing_bdev))
|
if (backing_bdev && !bdev_nonrot(backing_bdev))
|
||||||
lim.features |= BLK_FEAT_ROTATIONAL;
|
lim.features |= BLK_FEAT_ROTATIONAL;
|
||||||
loop_config_discard(lo, &lim);
|
lim.max_hw_discard_sectors = max_discard_sectors;
|
||||||
|
lim.max_write_zeroes_sectors = max_discard_sectors;
|
||||||
|
if (max_discard_sectors)
|
||||||
|
lim.discard_granularity = granularity;
|
||||||
|
else
|
||||||
|
lim.discard_granularity = 0;
|
||||||
return queue_limits_commit_update(lo->lo_queue, &lim);
|
return queue_limits_commit_update(lo->lo_queue, &lim);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3041,7 +3041,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
|||||||
ret = ublk_ctrl_end_recovery(ub, cmd);
|
ret = ublk_ctrl_end_recovery(ub, cmd);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -ENOTSUPP;
|
ret = -EOPNOTSUPP;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -384,6 +384,7 @@ static int raid0_set_limits(struct mddev *mddev)
|
|||||||
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
|
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
|
||||||
lim.io_min = mddev->chunk_sectors << 9;
|
lim.io_min = mddev->chunk_sectors << 9;
|
||||||
lim.io_opt = lim.io_min * mddev->raid_disks;
|
lim.io_opt = lim.io_min * mddev->raid_disks;
|
||||||
|
lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
|
||||||
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
|
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
|
||||||
if (err) {
|
if (err) {
|
||||||
queue_limits_cancel_update(mddev->gendisk->queue);
|
queue_limits_cancel_update(mddev->gendisk->queue);
|
||||||
|
@ -1571,7 +1571,21 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (is_bad) {
|
if (is_bad) {
|
||||||
int good_sectors = first_bad - r1_bio->sector;
|
int good_sectors;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We cannot atomically write this, so just
|
||||||
|
* error in that case. It could be possible to
|
||||||
|
* atomically write other mirrors, but the
|
||||||
|
* complexity of supporting that is not worth
|
||||||
|
* the benefit.
|
||||||
|
*/
|
||||||
|
if (bio->bi_opf & REQ_ATOMIC) {
|
||||||
|
error = -EIO;
|
||||||
|
goto err_handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
good_sectors = first_bad - r1_bio->sector;
|
||||||
if (good_sectors < max_sectors)
|
if (good_sectors < max_sectors)
|
||||||
max_sectors = good_sectors;
|
max_sectors = good_sectors;
|
||||||
}
|
}
|
||||||
@ -1657,7 +1671,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
|||||||
|
|
||||||
mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset);
|
mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset);
|
||||||
mbio->bi_end_io = raid1_end_write_request;
|
mbio->bi_end_io = raid1_end_write_request;
|
||||||
mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA));
|
mbio->bi_opf = bio_op(bio) |
|
||||||
|
(bio->bi_opf & (REQ_SYNC | REQ_FUA | REQ_ATOMIC));
|
||||||
if (test_bit(FailFast, &rdev->flags) &&
|
if (test_bit(FailFast, &rdev->flags) &&
|
||||||
!test_bit(WriteMostly, &rdev->flags) &&
|
!test_bit(WriteMostly, &rdev->flags) &&
|
||||||
conf->raid_disks - mddev->degraded > 1)
|
conf->raid_disks - mddev->degraded > 1)
|
||||||
@ -3224,6 +3239,7 @@ static int raid1_set_limits(struct mddev *mddev)
|
|||||||
|
|
||||||
md_init_stacking_limits(&lim);
|
md_init_stacking_limits(&lim);
|
||||||
lim.max_write_zeroes_sectors = 0;
|
lim.max_write_zeroes_sectors = 0;
|
||||||
|
lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
|
||||||
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
|
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
|
||||||
if (err) {
|
if (err) {
|
||||||
queue_limits_cancel_update(mddev->gendisk->queue);
|
queue_limits_cancel_update(mddev->gendisk->queue);
|
||||||
|
@ -1255,6 +1255,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
|||||||
const enum req_op op = bio_op(bio);
|
const enum req_op op = bio_op(bio);
|
||||||
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
|
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
|
||||||
const blk_opf_t do_fua = bio->bi_opf & REQ_FUA;
|
const blk_opf_t do_fua = bio->bi_opf & REQ_FUA;
|
||||||
|
const blk_opf_t do_atomic = bio->bi_opf & REQ_ATOMIC;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct r10conf *conf = mddev->private;
|
struct r10conf *conf = mddev->private;
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
@ -1273,7 +1274,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
|||||||
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
|
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
|
||||||
choose_data_offset(r10_bio, rdev));
|
choose_data_offset(r10_bio, rdev));
|
||||||
mbio->bi_end_io = raid10_end_write_request;
|
mbio->bi_end_io = raid10_end_write_request;
|
||||||
mbio->bi_opf = op | do_sync | do_fua;
|
mbio->bi_opf = op | do_sync | do_fua | do_atomic;
|
||||||
if (!replacement && test_bit(FailFast,
|
if (!replacement && test_bit(FailFast,
|
||||||
&conf->mirrors[devnum].rdev->flags)
|
&conf->mirrors[devnum].rdev->flags)
|
||||||
&& enough(conf, devnum))
|
&& enough(conf, devnum))
|
||||||
@ -1468,7 +1469,21 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (is_bad) {
|
if (is_bad) {
|
||||||
int good_sectors = first_bad - dev_sector;
|
int good_sectors;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We cannot atomically write this, so just
|
||||||
|
* error in that case. It could be possible to
|
||||||
|
* atomically write other mirrors, but the
|
||||||
|
* complexity of supporting that is not worth
|
||||||
|
* the benefit.
|
||||||
|
*/
|
||||||
|
if (bio->bi_opf & REQ_ATOMIC) {
|
||||||
|
error = -EIO;
|
||||||
|
goto err_handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
good_sectors = first_bad - dev_sector;
|
||||||
if (good_sectors < max_sectors)
|
if (good_sectors < max_sectors)
|
||||||
max_sectors = good_sectors;
|
max_sectors = good_sectors;
|
||||||
}
|
}
|
||||||
@ -4025,6 +4040,7 @@ static int raid10_set_queue_limits(struct mddev *mddev)
|
|||||||
lim.max_write_zeroes_sectors = 0;
|
lim.max_write_zeroes_sectors = 0;
|
||||||
lim.io_min = mddev->chunk_sectors << 9;
|
lim.io_min = mddev->chunk_sectors << 9;
|
||||||
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
|
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
|
||||||
|
lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
|
||||||
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
|
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
|
||||||
if (err) {
|
if (err) {
|
||||||
queue_limits_cancel_update(mddev->gendisk->queue);
|
queue_limits_cancel_update(mddev->gendisk->queue);
|
||||||
|
@ -1305,9 +1305,10 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
|
|||||||
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
|
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_keep_alive_finish(struct request *rq,
|
static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
|
||||||
blk_status_t status, struct nvme_ctrl *ctrl)
|
blk_status_t status)
|
||||||
{
|
{
|
||||||
|
struct nvme_ctrl *ctrl = rq->end_io_data;
|
||||||
unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
|
unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
|
||||||
unsigned long delay = nvme_keep_alive_work_period(ctrl);
|
unsigned long delay = nvme_keep_alive_work_period(ctrl);
|
||||||
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
|
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
|
||||||
@ -1324,17 +1325,20 @@ static void nvme_keep_alive_finish(struct request *rq,
|
|||||||
delay = 0;
|
delay = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
blk_mq_free_request(rq);
|
||||||
|
|
||||||
if (status) {
|
if (status) {
|
||||||
dev_err(ctrl->device,
|
dev_err(ctrl->device,
|
||||||
"failed nvme_keep_alive_end_io error=%d\n",
|
"failed nvme_keep_alive_end_io error=%d\n",
|
||||||
status);
|
status);
|
||||||
return;
|
return RQ_END_IO_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctrl->ka_last_check_time = jiffies;
|
ctrl->ka_last_check_time = jiffies;
|
||||||
ctrl->comp_seen = false;
|
ctrl->comp_seen = false;
|
||||||
if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING)
|
if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING)
|
||||||
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
|
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
|
||||||
|
return RQ_END_IO_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_keep_alive_work(struct work_struct *work)
|
static void nvme_keep_alive_work(struct work_struct *work)
|
||||||
@ -1343,7 +1347,6 @@ static void nvme_keep_alive_work(struct work_struct *work)
|
|||||||
struct nvme_ctrl, ka_work);
|
struct nvme_ctrl, ka_work);
|
||||||
bool comp_seen = ctrl->comp_seen;
|
bool comp_seen = ctrl->comp_seen;
|
||||||
struct request *rq;
|
struct request *rq;
|
||||||
blk_status_t status;
|
|
||||||
|
|
||||||
ctrl->ka_last_check_time = jiffies;
|
ctrl->ka_last_check_time = jiffies;
|
||||||
|
|
||||||
@ -1366,9 +1369,9 @@ static void nvme_keep_alive_work(struct work_struct *work)
|
|||||||
nvme_init_request(rq, &ctrl->ka_cmd);
|
nvme_init_request(rq, &ctrl->ka_cmd);
|
||||||
|
|
||||||
rq->timeout = ctrl->kato * HZ;
|
rq->timeout = ctrl->kato * HZ;
|
||||||
status = blk_execute_rq(rq, false);
|
rq->end_io = nvme_keep_alive_end_io;
|
||||||
nvme_keep_alive_finish(rq, status, ctrl);
|
rq->end_io_data = ctrl;
|
||||||
blk_mq_free_request(rq);
|
blk_execute_rq_nowait(rq, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
|
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
|
||||||
@ -4600,6 +4603,11 @@ EXPORT_SYMBOL_GPL(nvme_alloc_admin_tag_set);
|
|||||||
|
|
||||||
void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl)
|
void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* As we're about to destroy the queue and free tagset
|
||||||
|
* we can not have keep-alive work running.
|
||||||
|
*/
|
||||||
|
nvme_stop_keep_alive(ctrl);
|
||||||
blk_mq_destroy_queue(ctrl->admin_q);
|
blk_mq_destroy_queue(ctrl->admin_q);
|
||||||
blk_put_queue(ctrl->admin_q);
|
blk_put_queue(ctrl->admin_q);
|
||||||
if (ctrl->ops->flags & NVME_F_FABRICS) {
|
if (ctrl->ops->flags & NVME_F_FABRICS) {
|
||||||
|
@ -120,12 +120,20 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
|
|||||||
struct nvme_ns *ns = q->queuedata;
|
struct nvme_ns *ns = q->queuedata;
|
||||||
struct block_device *bdev = ns ? ns->disk->part0 : NULL;
|
struct block_device *bdev = ns ? ns->disk->part0 : NULL;
|
||||||
bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk);
|
bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk);
|
||||||
|
struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
|
||||||
bool has_metadata = meta_buffer && meta_len;
|
bool has_metadata = meta_buffer && meta_len;
|
||||||
struct bio *bio = NULL;
|
struct bio *bio = NULL;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (has_metadata && !supports_metadata)
|
if (!nvme_ctrl_sgl_supported(ctrl))
|
||||||
|
dev_warn_once(ctrl->device, "using unchecked data buffer\n");
|
||||||
|
if (has_metadata) {
|
||||||
|
if (!supports_metadata)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
if (!nvme_ctrl_meta_sgl_supported(ctrl))
|
||||||
|
dev_warn_once(ctrl->device,
|
||||||
|
"using unchecked metadata buffer\n");
|
||||||
|
}
|
||||||
|
|
||||||
if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
|
if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
|
||||||
struct iov_iter iter;
|
struct iov_iter iter;
|
||||||
|
@ -165,7 +165,8 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
|
|||||||
int srcu_idx;
|
int srcu_idx;
|
||||||
|
|
||||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
|
list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
|
||||||
|
srcu_read_lock_held(&ctrl->srcu)) {
|
||||||
if (!ns->head->disk)
|
if (!ns->head->disk)
|
||||||
continue;
|
continue;
|
||||||
kblockd_schedule_work(&ns->head->requeue_work);
|
kblockd_schedule_work(&ns->head->requeue_work);
|
||||||
@ -209,7 +210,8 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
|
|||||||
int srcu_idx;
|
int srcu_idx;
|
||||||
|
|
||||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
|
list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
|
||||||
|
srcu_read_lock_held(&ctrl->srcu)) {
|
||||||
nvme_mpath_clear_current_path(ns);
|
nvme_mpath_clear_current_path(ns);
|
||||||
kblockd_schedule_work(&ns->head->requeue_work);
|
kblockd_schedule_work(&ns->head->requeue_work);
|
||||||
}
|
}
|
||||||
@ -224,7 +226,8 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
|
|||||||
int srcu_idx;
|
int srcu_idx;
|
||||||
|
|
||||||
srcu_idx = srcu_read_lock(&head->srcu);
|
srcu_idx = srcu_read_lock(&head->srcu);
|
||||||
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
list_for_each_entry_srcu(ns, &head->list, siblings,
|
||||||
|
srcu_read_lock_held(&head->srcu)) {
|
||||||
if (capacity != get_capacity(ns->disk))
|
if (capacity != get_capacity(ns->disk))
|
||||||
clear_bit(NVME_NS_READY, &ns->flags);
|
clear_bit(NVME_NS_READY, &ns->flags);
|
||||||
}
|
}
|
||||||
@ -257,7 +260,8 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
|
|||||||
int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
|
int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
|
||||||
struct nvme_ns *found = NULL, *fallback = NULL, *ns;
|
struct nvme_ns *found = NULL, *fallback = NULL, *ns;
|
||||||
|
|
||||||
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
list_for_each_entry_srcu(ns, &head->list, siblings,
|
||||||
|
srcu_read_lock_held(&head->srcu)) {
|
||||||
if (nvme_path_is_disabled(ns))
|
if (nvme_path_is_disabled(ns))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -356,7 +360,8 @@ static struct nvme_ns *nvme_queue_depth_path(struct nvme_ns_head *head)
|
|||||||
unsigned int min_depth_opt = UINT_MAX, min_depth_nonopt = UINT_MAX;
|
unsigned int min_depth_opt = UINT_MAX, min_depth_nonopt = UINT_MAX;
|
||||||
unsigned int depth;
|
unsigned int depth;
|
||||||
|
|
||||||
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
list_for_each_entry_srcu(ns, &head->list, siblings,
|
||||||
|
srcu_read_lock_held(&head->srcu)) {
|
||||||
if (nvme_path_is_disabled(ns))
|
if (nvme_path_is_disabled(ns))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -424,7 +429,8 @@ static bool nvme_available_path(struct nvme_ns_head *head)
|
|||||||
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
|
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
list_for_each_entry_srcu(ns, &head->list, siblings,
|
||||||
|
srcu_read_lock_held(&head->srcu)) {
|
||||||
if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
|
if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
|
||||||
continue;
|
continue;
|
||||||
switch (nvme_ctrl_state(ns->ctrl)) {
|
switch (nvme_ctrl_state(ns->ctrl)) {
|
||||||
@ -783,7 +789,8 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
srcu_idx = srcu_read_lock(&ctrl->srcu);
|
||||||
list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
|
list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
|
||||||
|
srcu_read_lock_held(&ctrl->srcu)) {
|
||||||
unsigned nsid;
|
unsigned nsid;
|
||||||
again:
|
again:
|
||||||
nsid = le32_to_cpu(desc->nsids[n]);
|
nsid = le32_to_cpu(desc->nsids[n]);
|
||||||
|
@ -1123,7 +1123,15 @@ static inline void nvme_start_request(struct request *rq)
|
|||||||
|
|
||||||
static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
|
static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
|
||||||
{
|
{
|
||||||
return ctrl->sgls & ((1 << 0) | (1 << 1));
|
return ctrl->sgls & (NVME_CTRL_SGLS_BYTE_ALIGNED |
|
||||||
|
NVME_CTRL_SGLS_DWORD_ALIGNED);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool nvme_ctrl_meta_sgl_supported(struct nvme_ctrl *ctrl)
|
||||||
|
{
|
||||||
|
if (ctrl->ops->flags & NVME_F_FABRICS)
|
||||||
|
return true;
|
||||||
|
return ctrl->sgls & NVME_CTRL_SGLS_MSDS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_NVME_HOST_AUTH
|
#ifdef CONFIG_NVME_HOST_AUTH
|
||||||
|
@ -43,6 +43,7 @@
|
|||||||
*/
|
*/
|
||||||
#define NVME_MAX_KB_SZ 8192
|
#define NVME_MAX_KB_SZ 8192
|
||||||
#define NVME_MAX_SEGS 128
|
#define NVME_MAX_SEGS 128
|
||||||
|
#define NVME_MAX_META_SEGS 15
|
||||||
#define NVME_MAX_NR_ALLOCATIONS 5
|
#define NVME_MAX_NR_ALLOCATIONS 5
|
||||||
|
|
||||||
static int use_threaded_interrupts;
|
static int use_threaded_interrupts;
|
||||||
@ -144,6 +145,7 @@ struct nvme_dev {
|
|||||||
struct sg_table *hmb_sgt;
|
struct sg_table *hmb_sgt;
|
||||||
|
|
||||||
mempool_t *iod_mempool;
|
mempool_t *iod_mempool;
|
||||||
|
mempool_t *iod_meta_mempool;
|
||||||
|
|
||||||
/* shadow doorbell buffer support: */
|
/* shadow doorbell buffer support: */
|
||||||
__le32 *dbbuf_dbs;
|
__le32 *dbbuf_dbs;
|
||||||
@ -239,6 +241,8 @@ struct nvme_iod {
|
|||||||
dma_addr_t first_dma;
|
dma_addr_t first_dma;
|
||||||
dma_addr_t meta_dma;
|
dma_addr_t meta_dma;
|
||||||
struct sg_table sgt;
|
struct sg_table sgt;
|
||||||
|
struct sg_table meta_sgt;
|
||||||
|
union nvme_descriptor meta_list;
|
||||||
union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS];
|
union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS];
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -506,6 +510,15 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
|
|||||||
spin_unlock(&nvmeq->sq_lock);
|
spin_unlock(&nvmeq->sq_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool nvme_pci_metadata_use_sgls(struct nvme_dev *dev,
|
||||||
|
struct request *req)
|
||||||
|
{
|
||||||
|
if (!nvme_ctrl_meta_sgl_supported(&dev->ctrl))
|
||||||
|
return false;
|
||||||
|
return req->nr_integrity_segments > 1 ||
|
||||||
|
nvme_req(req)->flags & NVME_REQ_USERCMD;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
|
static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
|
||||||
int nseg)
|
int nseg)
|
||||||
{
|
{
|
||||||
@ -518,8 +531,10 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
|
|||||||
return false;
|
return false;
|
||||||
if (!nvmeq->qid)
|
if (!nvmeq->qid)
|
||||||
return false;
|
return false;
|
||||||
|
if (nvme_pci_metadata_use_sgls(dev, req))
|
||||||
|
return true;
|
||||||
if (!sgl_threshold || avg_seg_size < sgl_threshold)
|
if (!sgl_threshold || avg_seg_size < sgl_threshold)
|
||||||
return false;
|
return nvme_req(req)->flags & NVME_REQ_USERCMD;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -780,7 +795,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
|
|||||||
struct bio_vec bv = req_bvec(req);
|
struct bio_vec bv = req_bvec(req);
|
||||||
|
|
||||||
if (!is_pci_p2pdma_page(bv.bv_page)) {
|
if (!is_pci_p2pdma_page(bv.bv_page)) {
|
||||||
if ((bv.bv_offset & (NVME_CTRL_PAGE_SIZE - 1)) +
|
if (!nvme_pci_metadata_use_sgls(dev, req) &&
|
||||||
|
(bv.bv_offset & (NVME_CTRL_PAGE_SIZE - 1)) +
|
||||||
bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
|
bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
|
||||||
return nvme_setup_prp_simple(dev, req,
|
return nvme_setup_prp_simple(dev, req,
|
||||||
&cmnd->rw, &bv);
|
&cmnd->rw, &bv);
|
||||||
@ -824,11 +840,69 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
|
static blk_status_t nvme_pci_setup_meta_sgls(struct nvme_dev *dev,
|
||||||
struct nvme_command *cmnd)
|
struct request *req)
|
||||||
|
{
|
||||||
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||||
|
struct nvme_rw_command *cmnd = &iod->cmd.rw;
|
||||||
|
struct nvme_sgl_desc *sg_list;
|
||||||
|
struct scatterlist *sgl, *sg;
|
||||||
|
unsigned int entries;
|
||||||
|
dma_addr_t sgl_dma;
|
||||||
|
int rc, i;
|
||||||
|
|
||||||
|
iod->meta_sgt.sgl = mempool_alloc(dev->iod_meta_mempool, GFP_ATOMIC);
|
||||||
|
if (!iod->meta_sgt.sgl)
|
||||||
|
return BLK_STS_RESOURCE;
|
||||||
|
|
||||||
|
sg_init_table(iod->meta_sgt.sgl, req->nr_integrity_segments);
|
||||||
|
iod->meta_sgt.orig_nents = blk_rq_map_integrity_sg(req,
|
||||||
|
iod->meta_sgt.sgl);
|
||||||
|
if (!iod->meta_sgt.orig_nents)
|
||||||
|
goto out_free_sg;
|
||||||
|
|
||||||
|
rc = dma_map_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req),
|
||||||
|
DMA_ATTR_NO_WARN);
|
||||||
|
if (rc)
|
||||||
|
goto out_free_sg;
|
||||||
|
|
||||||
|
sg_list = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC, &sgl_dma);
|
||||||
|
if (!sg_list)
|
||||||
|
goto out_unmap_sg;
|
||||||
|
|
||||||
|
entries = iod->meta_sgt.nents;
|
||||||
|
iod->meta_list.sg_list = sg_list;
|
||||||
|
iod->meta_dma = sgl_dma;
|
||||||
|
|
||||||
|
cmnd->flags = NVME_CMD_SGL_METASEG;
|
||||||
|
cmnd->metadata = cpu_to_le64(sgl_dma);
|
||||||
|
|
||||||
|
sgl = iod->meta_sgt.sgl;
|
||||||
|
if (entries == 1) {
|
||||||
|
nvme_pci_sgl_set_data(sg_list, sgl);
|
||||||
|
return BLK_STS_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
sgl_dma += sizeof(*sg_list);
|
||||||
|
nvme_pci_sgl_set_seg(sg_list, sgl_dma, entries);
|
||||||
|
for_each_sg(sgl, sg, entries, i)
|
||||||
|
nvme_pci_sgl_set_data(&sg_list[i + 1], sg);
|
||||||
|
|
||||||
|
return BLK_STS_OK;
|
||||||
|
|
||||||
|
out_unmap_sg:
|
||||||
|
dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0);
|
||||||
|
out_free_sg:
|
||||||
|
mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool);
|
||||||
|
return BLK_STS_RESOURCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static blk_status_t nvme_pci_setup_meta_mptr(struct nvme_dev *dev,
|
||||||
|
struct request *req)
|
||||||
{
|
{
|
||||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||||
struct bio_vec bv = rq_integrity_vec(req);
|
struct bio_vec bv = rq_integrity_vec(req);
|
||||||
|
struct nvme_command *cmnd = &iod->cmd;
|
||||||
|
|
||||||
iod->meta_dma = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0);
|
iod->meta_dma = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0);
|
||||||
if (dma_mapping_error(dev->dev, iod->meta_dma))
|
if (dma_mapping_error(dev->dev, iod->meta_dma))
|
||||||
@ -837,6 +911,13 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
|
|||||||
return BLK_STS_OK;
|
return BLK_STS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req)
|
||||||
|
{
|
||||||
|
if (nvme_pci_metadata_use_sgls(dev, req))
|
||||||
|
return nvme_pci_setup_meta_sgls(dev, req);
|
||||||
|
return nvme_pci_setup_meta_mptr(dev, req);
|
||||||
|
}
|
||||||
|
|
||||||
static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
|
static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
|
||||||
{
|
{
|
||||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||||
@ -845,6 +926,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
|
|||||||
iod->aborted = false;
|
iod->aborted = false;
|
||||||
iod->nr_allocations = -1;
|
iod->nr_allocations = -1;
|
||||||
iod->sgt.nents = 0;
|
iod->sgt.nents = 0;
|
||||||
|
iod->meta_sgt.nents = 0;
|
||||||
|
|
||||||
ret = nvme_setup_cmd(req->q->queuedata, req);
|
ret = nvme_setup_cmd(req->q->queuedata, req);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -857,7 +939,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (blk_integrity_rq(req)) {
|
if (blk_integrity_rq(req)) {
|
||||||
ret = nvme_map_metadata(dev, req, &iod->cmd);
|
ret = nvme_map_metadata(dev, req);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_unmap_data;
|
goto out_unmap_data;
|
||||||
}
|
}
|
||||||
@ -955,17 +1037,31 @@ static void nvme_queue_rqs(struct rq_list *rqlist)
|
|||||||
*rqlist = requeue_list;
|
*rqlist = requeue_list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __always_inline void nvme_unmap_metadata(struct nvme_dev *dev,
|
||||||
|
struct request *req)
|
||||||
|
{
|
||||||
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||||
|
|
||||||
|
if (!iod->meta_sgt.nents) {
|
||||||
|
dma_unmap_page(dev->dev, iod->meta_dma,
|
||||||
|
rq_integrity_vec(req).bv_len,
|
||||||
|
rq_dma_dir(req));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
dma_pool_free(dev->prp_small_pool, iod->meta_list.sg_list,
|
||||||
|
iod->meta_dma);
|
||||||
|
dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0);
|
||||||
|
mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool);
|
||||||
|
}
|
||||||
|
|
||||||
static __always_inline void nvme_pci_unmap_rq(struct request *req)
|
static __always_inline void nvme_pci_unmap_rq(struct request *req)
|
||||||
{
|
{
|
||||||
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
|
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
|
||||||
struct nvme_dev *dev = nvmeq->dev;
|
struct nvme_dev *dev = nvmeq->dev;
|
||||||
|
|
||||||
if (blk_integrity_rq(req)) {
|
if (blk_integrity_rq(req))
|
||||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
nvme_unmap_metadata(dev, req);
|
||||||
|
|
||||||
dma_unmap_page(dev->dev, iod->meta_dma,
|
|
||||||
rq_integrity_vec(req).bv_len, rq_dma_dir(req));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (blk_rq_nr_phys_segments(req))
|
if (blk_rq_nr_phys_segments(req))
|
||||||
nvme_unmap_data(dev, req);
|
nvme_unmap_data(dev, req);
|
||||||
@ -2761,6 +2857,7 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
|
|||||||
|
|
||||||
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
|
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
|
||||||
{
|
{
|
||||||
|
size_t meta_size = sizeof(struct scatterlist) * (NVME_MAX_META_SEGS + 1);
|
||||||
size_t alloc_size = sizeof(struct scatterlist) * NVME_MAX_SEGS;
|
size_t alloc_size = sizeof(struct scatterlist) * NVME_MAX_SEGS;
|
||||||
|
|
||||||
dev->iod_mempool = mempool_create_node(1,
|
dev->iod_mempool = mempool_create_node(1,
|
||||||
@ -2769,7 +2866,18 @@ static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
|
|||||||
dev_to_node(dev->dev));
|
dev_to_node(dev->dev));
|
||||||
if (!dev->iod_mempool)
|
if (!dev->iod_mempool)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
dev->iod_meta_mempool = mempool_create_node(1,
|
||||||
|
mempool_kmalloc, mempool_kfree,
|
||||||
|
(void *)meta_size, GFP_KERNEL,
|
||||||
|
dev_to_node(dev->dev));
|
||||||
|
if (!dev->iod_meta_mempool)
|
||||||
|
goto free;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
free:
|
||||||
|
mempool_destroy(dev->iod_mempool);
|
||||||
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_free_tagset(struct nvme_dev *dev)
|
static void nvme_free_tagset(struct nvme_dev *dev)
|
||||||
@ -2834,6 +2942,11 @@ static void nvme_reset_work(struct work_struct *work)
|
|||||||
if (result)
|
if (result)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
if (nvme_ctrl_meta_sgl_supported(&dev->ctrl))
|
||||||
|
dev->ctrl.max_integrity_segments = NVME_MAX_META_SEGS;
|
||||||
|
else
|
||||||
|
dev->ctrl.max_integrity_segments = 1;
|
||||||
|
|
||||||
nvme_dbbuf_dma_alloc(dev);
|
nvme_dbbuf_dma_alloc(dev);
|
||||||
|
|
||||||
result = nvme_setup_host_mem(dev);
|
result = nvme_setup_host_mem(dev);
|
||||||
@ -3101,11 +3214,6 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
|
|||||||
dev->ctrl.max_hw_sectors = min_t(u32,
|
dev->ctrl.max_hw_sectors = min_t(u32,
|
||||||
NVME_MAX_KB_SZ << 1, dma_opt_mapping_size(&pdev->dev) >> 9);
|
NVME_MAX_KB_SZ << 1, dma_opt_mapping_size(&pdev->dev) >> 9);
|
||||||
dev->ctrl.max_segments = NVME_MAX_SEGS;
|
dev->ctrl.max_segments = NVME_MAX_SEGS;
|
||||||
|
|
||||||
/*
|
|
||||||
* There is no support for SGLs for metadata (yet), so we are limited to
|
|
||||||
* a single integrity segment for the separate metadata pointer.
|
|
||||||
*/
|
|
||||||
dev->ctrl.max_integrity_segments = 1;
|
dev->ctrl.max_integrity_segments = 1;
|
||||||
return dev;
|
return dev;
|
||||||
|
|
||||||
@ -3168,6 +3276,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|||||||
if (result)
|
if (result)
|
||||||
goto out_disable;
|
goto out_disable;
|
||||||
|
|
||||||
|
if (nvme_ctrl_meta_sgl_supported(&dev->ctrl))
|
||||||
|
dev->ctrl.max_integrity_segments = NVME_MAX_META_SEGS;
|
||||||
|
else
|
||||||
|
dev->ctrl.max_integrity_segments = 1;
|
||||||
|
|
||||||
nvme_dbbuf_dma_alloc(dev);
|
nvme_dbbuf_dma_alloc(dev);
|
||||||
|
|
||||||
result = nvme_setup_host_mem(dev);
|
result = nvme_setup_host_mem(dev);
|
||||||
@ -3210,6 +3323,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|||||||
nvme_free_queues(dev, 0);
|
nvme_free_queues(dev, 0);
|
||||||
out_release_iod_mempool:
|
out_release_iod_mempool:
|
||||||
mempool_destroy(dev->iod_mempool);
|
mempool_destroy(dev->iod_mempool);
|
||||||
|
mempool_destroy(dev->iod_meta_mempool);
|
||||||
out_release_prp_pools:
|
out_release_prp_pools:
|
||||||
nvme_release_prp_pools(dev);
|
nvme_release_prp_pools(dev);
|
||||||
out_dev_unmap:
|
out_dev_unmap:
|
||||||
@ -3275,6 +3389,7 @@ static void nvme_remove(struct pci_dev *pdev)
|
|||||||
nvme_dbbuf_dma_free(dev);
|
nvme_dbbuf_dma_free(dev);
|
||||||
nvme_free_queues(dev, 0);
|
nvme_free_queues(dev, 0);
|
||||||
mempool_destroy(dev->iod_mempool);
|
mempool_destroy(dev->iod_mempool);
|
||||||
|
mempool_destroy(dev->iod_meta_mempool);
|
||||||
nvme_release_prp_pools(dev);
|
nvme_release_prp_pools(dev);
|
||||||
nvme_dev_unmap(dev);
|
nvme_dev_unmap(dev);
|
||||||
nvme_uninit_ctrl(&dev->ctrl);
|
nvme_uninit_ctrl(&dev->ctrl);
|
||||||
|
@ -94,109 +94,137 @@ static int nvme_status_to_pr_err(int status)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_send_pr_command(struct block_device *bdev,
|
static int __nvme_send_pr_command(struct block_device *bdev, u32 cdw10,
|
||||||
struct nvme_command *c, void *data, unsigned int data_len)
|
u32 cdw11, u8 op, void *data, unsigned int data_len)
|
||||||
{
|
{
|
||||||
if (nvme_disk_is_ns_head(bdev->bd_disk))
|
struct nvme_command c = { 0 };
|
||||||
return nvme_send_ns_head_pr_command(bdev, c, data, data_len);
|
|
||||||
|
|
||||||
return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data,
|
|
||||||
data_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
|
|
||||||
u64 key, u64 sa_key, u8 op)
|
|
||||||
{
|
|
||||||
struct nvme_command c = { };
|
|
||||||
u8 data[16] = { 0, };
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
put_unaligned_le64(key, &data[0]);
|
|
||||||
put_unaligned_le64(sa_key, &data[8]);
|
|
||||||
|
|
||||||
c.common.opcode = op;
|
c.common.opcode = op;
|
||||||
c.common.cdw10 = cpu_to_le32(cdw10);
|
c.common.cdw10 = cpu_to_le32(cdw10);
|
||||||
|
c.common.cdw11 = cpu_to_le32(cdw11);
|
||||||
|
|
||||||
ret = nvme_send_pr_command(bdev, &c, data, sizeof(data));
|
if (nvme_disk_is_ns_head(bdev->bd_disk))
|
||||||
if (ret < 0)
|
return nvme_send_ns_head_pr_command(bdev, &c, data, data_len);
|
||||||
return ret;
|
return nvme_send_ns_pr_command(bdev->bd_disk->private_data, &c,
|
||||||
|
data, data_len);
|
||||||
return nvme_status_to_pr_err(ret);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_pr_register(struct block_device *bdev, u64 old,
|
static int nvme_send_pr_command(struct block_device *bdev, u32 cdw10, u32 cdw11,
|
||||||
u64 new, unsigned flags)
|
u8 op, void *data, unsigned int data_len)
|
||||||
{
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = __nvme_send_pr_command(bdev, cdw10, cdw11, op, data, data_len);
|
||||||
|
return ret < 0 ? ret : nvme_status_to_pr_err(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvme_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
|
||||||
|
unsigned int flags)
|
||||||
|
{
|
||||||
|
struct nvmet_pr_register_data data = { 0 };
|
||||||
u32 cdw10;
|
u32 cdw10;
|
||||||
|
|
||||||
if (flags & ~PR_FL_IGNORE_KEY)
|
if (flags & ~PR_FL_IGNORE_KEY)
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
cdw10 = old ? 2 : 0;
|
data.crkey = cpu_to_le64(old_key);
|
||||||
cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
|
data.nrkey = cpu_to_le64(new_key);
|
||||||
cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
|
|
||||||
return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
|
cdw10 = old_key ? NVME_PR_REGISTER_ACT_REPLACE :
|
||||||
|
NVME_PR_REGISTER_ACT_REG;
|
||||||
|
cdw10 |= (flags & PR_FL_IGNORE_KEY) ? NVME_PR_IGNORE_KEY : 0;
|
||||||
|
cdw10 |= NVME_PR_CPTPL_PERSIST;
|
||||||
|
|
||||||
|
return nvme_send_pr_command(bdev, cdw10, 0, nvme_cmd_resv_register,
|
||||||
|
&data, sizeof(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_pr_reserve(struct block_device *bdev, u64 key,
|
static int nvme_pr_reserve(struct block_device *bdev, u64 key,
|
||||||
enum pr_type type, unsigned flags)
|
enum pr_type type, unsigned flags)
|
||||||
{
|
{
|
||||||
|
struct nvmet_pr_acquire_data data = { 0 };
|
||||||
u32 cdw10;
|
u32 cdw10;
|
||||||
|
|
||||||
if (flags & ~PR_FL_IGNORE_KEY)
|
if (flags & ~PR_FL_IGNORE_KEY)
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
cdw10 = nvme_pr_type_from_blk(type) << 8;
|
data.crkey = cpu_to_le64(key);
|
||||||
cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
|
|
||||||
return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
|
cdw10 = NVME_PR_ACQUIRE_ACT_ACQUIRE;
|
||||||
|
cdw10 |= nvme_pr_type_from_blk(type) << 8;
|
||||||
|
cdw10 |= (flags & PR_FL_IGNORE_KEY) ? NVME_PR_IGNORE_KEY : 0;
|
||||||
|
|
||||||
|
return nvme_send_pr_command(bdev, cdw10, 0, nvme_cmd_resv_acquire,
|
||||||
|
&data, sizeof(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
|
static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
|
||||||
enum pr_type type, bool abort)
|
enum pr_type type, bool abort)
|
||||||
{
|
{
|
||||||
u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (abort ? 2 : 1);
|
struct nvmet_pr_acquire_data data = { 0 };
|
||||||
|
u32 cdw10;
|
||||||
|
|
||||||
return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
|
data.crkey = cpu_to_le64(old);
|
||||||
|
data.prkey = cpu_to_le64(new);
|
||||||
|
|
||||||
|
cdw10 = abort ? NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT :
|
||||||
|
NVME_PR_ACQUIRE_ACT_PREEMPT;
|
||||||
|
cdw10 |= nvme_pr_type_from_blk(type) << 8;
|
||||||
|
|
||||||
|
return nvme_send_pr_command(bdev, cdw10, 0, nvme_cmd_resv_acquire,
|
||||||
|
&data, sizeof(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_pr_clear(struct block_device *bdev, u64 key)
|
static int nvme_pr_clear(struct block_device *bdev, u64 key)
|
||||||
{
|
{
|
||||||
u32 cdw10 = 1 | (key ? 0 : 1 << 3);
|
struct nvmet_pr_release_data data = { 0 };
|
||||||
|
u32 cdw10;
|
||||||
|
|
||||||
return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
|
data.crkey = cpu_to_le64(key);
|
||||||
|
|
||||||
|
cdw10 = NVME_PR_RELEASE_ACT_CLEAR;
|
||||||
|
cdw10 |= key ? 0 : NVME_PR_IGNORE_KEY;
|
||||||
|
|
||||||
|
return nvme_send_pr_command(bdev, cdw10, 0, nvme_cmd_resv_release,
|
||||||
|
&data, sizeof(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
|
static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
|
||||||
{
|
{
|
||||||
u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (key ? 0 : 1 << 3);
|
struct nvmet_pr_release_data data = { 0 };
|
||||||
|
u32 cdw10;
|
||||||
|
|
||||||
return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
|
data.crkey = cpu_to_le64(key);
|
||||||
|
|
||||||
|
cdw10 = NVME_PR_RELEASE_ACT_RELEASE;
|
||||||
|
cdw10 |= nvme_pr_type_from_blk(type) << 8;
|
||||||
|
cdw10 |= key ? 0 : NVME_PR_IGNORE_KEY;
|
||||||
|
|
||||||
|
return nvme_send_pr_command(bdev, cdw10, 0, nvme_cmd_resv_release,
|
||||||
|
&data, sizeof(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_pr_resv_report(struct block_device *bdev, void *data,
|
static int nvme_pr_resv_report(struct block_device *bdev, void *data,
|
||||||
u32 data_len, bool *eds)
|
u32 data_len, bool *eds)
|
||||||
{
|
{
|
||||||
struct nvme_command c = { };
|
u32 cdw10, cdw11;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
c.common.opcode = nvme_cmd_resv_report;
|
cdw10 = nvme_bytes_to_numd(data_len);
|
||||||
c.common.cdw10 = cpu_to_le32(nvme_bytes_to_numd(data_len));
|
cdw11 = NVME_EXTENDED_DATA_STRUCT;
|
||||||
c.common.cdw11 = cpu_to_le32(NVME_EXTENDED_DATA_STRUCT);
|
|
||||||
*eds = true;
|
*eds = true;
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
ret = nvme_send_pr_command(bdev, &c, data, data_len);
|
ret = __nvme_send_pr_command(bdev, cdw10, cdw11, nvme_cmd_resv_report,
|
||||||
|
data, data_len);
|
||||||
if (ret == NVME_SC_HOST_ID_INCONSIST &&
|
if (ret == NVME_SC_HOST_ID_INCONSIST &&
|
||||||
c.common.cdw11 == cpu_to_le32(NVME_EXTENDED_DATA_STRUCT)) {
|
cdw11 == NVME_EXTENDED_DATA_STRUCT) {
|
||||||
c.common.cdw11 = 0;
|
cdw11 = 0;
|
||||||
*eds = false;
|
*eds = false;
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret < 0)
|
return ret < 0 ? ret : nvme_status_to_pr_err(ret);
|
||||||
return ret;
|
|
||||||
|
|
||||||
return nvme_status_to_pr_err(ret);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_pr_read_keys(struct block_device *bdev,
|
static int nvme_pr_read_keys(struct block_device *bdev,
|
||||||
|
@ -1019,7 +1019,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
|
|||||||
goto destroy_admin;
|
goto destroy_admin;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(ctrl->ctrl.sgls & (1 << 2))) {
|
if (!(ctrl->ctrl.sgls & NVME_CTRL_SGLS_KSDBDS)) {
|
||||||
ret = -EOPNOTSUPP;
|
ret = -EOPNOTSUPP;
|
||||||
dev_err(ctrl->ctrl.device,
|
dev_err(ctrl->ctrl.device,
|
||||||
"Mandatory keyed sgls are not supported!\n");
|
"Mandatory keyed sgls are not supported!\n");
|
||||||
@ -1051,7 +1051,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
|
|||||||
ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
|
ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctrl->ctrl.sgls & (1 << 20))
|
if (ctrl->ctrl.sgls & NVME_CTRL_SGLS_SAOS)
|
||||||
ctrl->use_inline_data = true;
|
ctrl->use_inline_data = true;
|
||||||
|
|
||||||
if (ctrl->ctrl.queue_count > 1) {
|
if (ctrl->ctrl.queue_count > 1) {
|
||||||
|
@ -601,11 +601,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
|
|||||||
id->awun = 0;
|
id->awun = 0;
|
||||||
id->awupf = 0;
|
id->awupf = 0;
|
||||||
|
|
||||||
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
|
/* we always support SGLs */
|
||||||
|
id->sgls = cpu_to_le32(NVME_CTRL_SGLS_BYTE_ALIGNED);
|
||||||
if (ctrl->ops->flags & NVMF_KEYED_SGLS)
|
if (ctrl->ops->flags & NVMF_KEYED_SGLS)
|
||||||
id->sgls |= cpu_to_le32(1 << 2);
|
id->sgls |= cpu_to_le32(NVME_CTRL_SGLS_KSDBDS);
|
||||||
if (req->port->inline_data_size)
|
if (req->port->inline_data_size)
|
||||||
id->sgls |= cpu_to_le32(1 << 20);
|
id->sgls |= cpu_to_le32(NVME_CTRL_SGLS_SAOS);
|
||||||
|
|
||||||
strscpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
|
strscpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
|
||||||
|
|
||||||
|
@ -333,6 +333,10 @@ typedef unsigned int __bitwise blk_features_t;
|
|||||||
#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
|
#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
|
||||||
((__force blk_features_t)(1u << 15))
|
((__force blk_features_t)(1u << 15))
|
||||||
|
|
||||||
|
/* stacked device can/does support atomic writes */
|
||||||
|
#define BLK_FEAT_ATOMIC_WRITES_STACKED \
|
||||||
|
((__force blk_features_t)(1u << 16))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flags automatically inherited when stacking limits.
|
* Flags automatically inherited when stacking limits.
|
||||||
*/
|
*/
|
||||||
@ -775,13 +779,13 @@ static inline void bdev_clear_flag(struct block_device *bdev, unsigned flag)
|
|||||||
atomic_andnot(flag, &bdev->__bd_flags);
|
atomic_andnot(flag, &bdev->__bd_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int get_disk_ro(struct gendisk *disk)
|
static inline bool get_disk_ro(struct gendisk *disk)
|
||||||
{
|
{
|
||||||
return bdev_test_flag(disk->part0, BD_READ_ONLY) ||
|
return bdev_test_flag(disk->part0, BD_READ_ONLY) ||
|
||||||
test_bit(GD_READ_ONLY, &disk->state);
|
test_bit(GD_READ_ONLY, &disk->state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int bdev_read_only(struct block_device *bdev)
|
static inline bool bdev_read_only(struct block_device *bdev)
|
||||||
{
|
{
|
||||||
return bdev_test_flag(bdev, BD_READ_ONLY) || get_disk_ro(bdev->bd_disk);
|
return bdev_test_flag(bdev, BD_READ_ONLY) || get_disk_ro(bdev->bd_disk);
|
||||||
}
|
}
|
||||||
@ -1261,7 +1265,7 @@ static inline unsigned int queue_io_min(const struct request_queue *q)
|
|||||||
return q->limits.io_min;
|
return q->limits.io_min;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int bdev_io_min(struct block_device *bdev)
|
static inline unsigned int bdev_io_min(struct block_device *bdev)
|
||||||
{
|
{
|
||||||
return queue_io_min(bdev_get_queue(bdev));
|
return queue_io_min(bdev_get_queue(bdev));
|
||||||
}
|
}
|
||||||
@ -1271,7 +1275,7 @@ static inline unsigned int queue_io_opt(const struct request_queue *q)
|
|||||||
return q->limits.io_opt;
|
return q->limits.io_opt;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int bdev_io_opt(struct block_device *bdev)
|
static inline unsigned int bdev_io_opt(struct block_device *bdev)
|
||||||
{
|
{
|
||||||
return queue_io_opt(bdev_get_queue(bdev));
|
return queue_io_opt(bdev_get_queue(bdev));
|
||||||
}
|
}
|
||||||
@ -1417,7 +1421,7 @@ static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
|
|||||||
return is_seq;
|
return is_seq;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int queue_dma_alignment(const struct request_queue *q)
|
static inline unsigned int queue_dma_alignment(const struct request_queue *q)
|
||||||
{
|
{
|
||||||
return q->limits.dma_alignment;
|
return q->limits.dma_alignment;
|
||||||
}
|
}
|
||||||
@ -1458,12 +1462,13 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev,
|
|||||||
bdev_logical_block_size(bdev) - 1);
|
bdev_logical_block_size(bdev) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
|
static inline unsigned int
|
||||||
|
blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
|
||||||
{
|
{
|
||||||
return lim->dma_alignment | lim->dma_pad_mask;
|
return lim->dma_alignment | lim->dma_pad_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
|
static inline bool blk_rq_aligned(struct request_queue *q, unsigned long addr,
|
||||||
unsigned int len)
|
unsigned int len)
|
||||||
{
|
{
|
||||||
unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits);
|
unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits);
|
||||||
@ -1581,7 +1586,6 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
|
|||||||
return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev);
|
return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bdev_read_only(struct block_device *bdev);
|
|
||||||
int set_blocksize(struct file *file, int size);
|
int set_blocksize(struct file *file, int size);
|
||||||
|
|
||||||
int lookup_bdev(const char *pathname, dev_t *dev);
|
int lookup_bdev(const char *pathname, dev_t *dev);
|
||||||
|
@ -389,6 +389,11 @@ enum {
|
|||||||
NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5,
|
NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5,
|
||||||
NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY = 1 << 7,
|
NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY = 1 << 7,
|
||||||
NVME_CTRL_CTRATT_UUID_LIST = 1 << 9,
|
NVME_CTRL_CTRATT_UUID_LIST = 1 << 9,
|
||||||
|
NVME_CTRL_SGLS_BYTE_ALIGNED = 1,
|
||||||
|
NVME_CTRL_SGLS_DWORD_ALIGNED = 2,
|
||||||
|
NVME_CTRL_SGLS_KSDBDS = 1 << 2,
|
||||||
|
NVME_CTRL_SGLS_MSDS = 1 << 19,
|
||||||
|
NVME_CTRL_SGLS_SAOS = 1 << 20,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nvme_lbaf {
|
struct nvme_lbaf {
|
||||||
@ -2166,4 +2171,13 @@ enum nvme_pr_release_action {
|
|||||||
NVME_PR_RELEASE_ACT_CLEAR = 1,
|
NVME_PR_RELEASE_ACT_CLEAR = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum nvme_pr_change_ptpl {
|
||||||
|
NVME_PR_CPTPL_NO_CHANGE = 0,
|
||||||
|
NVME_PR_CPTPL_RESV = 1 << 30,
|
||||||
|
NVME_PR_CPTPL_CLEARED = 2 << 30,
|
||||||
|
NVME_PR_CPTPL_PERSIST = 3 << 30,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define NVME_PR_IGNORE_KEY (1 << 3)
|
||||||
|
|
||||||
#endif /* _LINUX_NVME_H */
|
#endif /* _LINUX_NVME_H */
|
||||||
|
@ -45,7 +45,7 @@ pub fn rotational(mut self, rotational: bool) -> Self {
|
|||||||
|
|
||||||
/// Validate block size by verifying that it is between 512 and `PAGE_SIZE`,
|
/// Validate block size by verifying that it is between 512 and `PAGE_SIZE`,
|
||||||
/// and that it is a power of two.
|
/// and that it is a power of two.
|
||||||
fn validate_block_size(size: u32) -> Result<()> {
|
fn validate_block_size(size: u32) -> Result {
|
||||||
if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() {
|
if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() {
|
||||||
Err(error::code::EINVAL)
|
Err(error::code::EINVAL)
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
Reference in New Issue
Block a user