for-6.12/block-20240925

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmb0T5AQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpnfHEADCXqmqZC+xr3sHZH9T1lz9KaFp1FjuBhCw
 bGpUgXQ9aLcqQUWJxmYVer8N2x2+Ds+xq4fm/rP1BfvNgRupqheHBwuLxSrz14EX
 lYmKZ+krMIPTDaLFewmEWflDwmZX0WFgV6nKTMLiO5BMeI4zXCkFGtwYFys2+Cdd
 9zYCFPgGDZUR77Ws5PpyqPVz2MoiNtsjrGmHpEmNZ+rIDzlpVOYgYk27X9ZbvNxC
 /l0KTc9+ayAeG0Kx5jO+m6Hrj3I6ehvM9JZMgpS/tF/jtccD2oVkJFJDlU+Jciv6
 BwVzgyDPGV7sXFT1fnSqDBYYwr/73nzNH0Gk8wn4Jg2LhjmVANVo9eQSOXDTYZI+
 O4HfIHGTIrk75TQd4bhq3dqaylS78pKBI/eQJUli2UNoyLWMrMyE88yh2YJam2Fs
 vJ/MHGxvFRurYbAlqLr33nb3ajvpg+D7XuAYfqHPMc2ZUe28Kza50Dj+luNjfVCu
 3qfR6qBlsdWuABtUS3vneB9jZp5jDnOpVfuBgtcAqIboUjehTXsI7If09Ex/mxLq
 O0KqNwBMfunPOKd5kGXlAgY8LRMfOhNaAAFBlXYUZB2eAadQnqVselTFvHMZkXo7
 wH/l6trd+/Tf+7Rav0YduNIlpVr7IctC+A7ph4zPdIjQxFEySCrC7cvAjel29LyV
 zgWW0Mw/sA==
 =yiWu
 -----END PGP SIGNATURE-----

Merge tag 'for-6.12/block-20240925' of git://git.kernel.dk/linux

Pull more block updates from Jens Axboe:

 - Improve blk-integrity segment counting and merging (Keith)

 - NVMe pull request via Keith:
      - Multipath fixes (Hannes)
      - Sysfs attribute list NULL terminate fix (Shin'ichiro)
      - Remove problematic read-back (Keith)

 - Fix for a regression with the IO scheduler switching freezing from
   6.11 (Damien)

 - Use a raw spinlock for sbitmap, as it may get called from preempt
   disabled context (Ming)

 - Cleanup for bd_claiming waiting, using var_waitqueue() rather than
   the bit waitqueues, as that more accurately describes that it does
   (Neil)

 - Various cleanups (Kanchan, Qiu-ji, David)

* tag 'for-6.12/block-20240925' of git://git.kernel.dk/linux:
  nvme: remove CC register read-back during enabling
  nvme: null terminate nvme_tls_attrs
  nvme-multipath: avoid hang on inaccessible namespaces
  nvme-multipath: system fails to create generic nvme device
  lib/sbitmap: define swap_lock as raw_spinlock_t
  block: Remove unused blk_limits_io_{min,opt}
  drbd: Fix atomicity violation in drbd_uuid_set_bm()
  block: Fix elv_iosched_local_module handling of "none" scheduler
  block: remove bogus union
  block: change wait on bd_claiming to use a var_waitqueue
  blk-integrity: improved sg segment mapping
  block: unexport blk_rq_count_integrity_sg
  nvme-rdma: use request to get integrity segments
  scsi: use request to get integrity segments
  block: provide a request helper for user integrity segments
  blk-integrity: consider entire bio list for merging
  blk-integrity: properly account for segments
  blk-mq: set the nr_integrity_segments from bio
  blk-mq: unconditional nr_integrity_segments
This commit is contained in:
Linus Torvalds 2024-09-25 14:56:40 -07:00
commit 11a299a793
20 changed files with 77 additions and 101 deletions

View File

@ -555,7 +555,7 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder,
/* if claiming is already in progress, wait for it to finish */
if (whole->bd_claiming) {
wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
wait_queue_head_t *wq = __var_waitqueue(&whole->bd_claiming);
DEFINE_WAIT(wait);
prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
@ -578,7 +578,7 @@ static void bd_clear_claiming(struct block_device *whole, void *holder)
/* tell others that we're done */
BUG_ON(whole->bd_claiming != holder);
whole->bd_claiming = NULL;
wake_up_bit(&whole->bd_claiming, 0);
wake_up_var(&whole->bd_claiming);
}
/**

View File

@ -367,7 +367,6 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
kfree(bvec);
return ret;
}
EXPORT_SYMBOL_GPL(bio_integrity_map_user);
/**
* bio_integrity_prep - Prepare bio for integrity I/O

View File

@ -53,7 +53,6 @@ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
return segments;
}
EXPORT_SYMBOL(blk_rq_count_integrity_sg);
/**
* blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
@ -63,19 +62,20 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg);
*
* Description: Map the integrity vectors in request into a
* scatterlist. The scatterlist must be big enough to hold all
* elements. I.e. sized using blk_rq_count_integrity_sg().
* elements. I.e. sized using blk_rq_count_integrity_sg() or
* rq->nr_integrity_segments.
*/
int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
struct scatterlist *sglist)
int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
{
struct bio_vec iv, ivprv = { NULL };
struct request_queue *q = rq->q;
struct scatterlist *sg = NULL;
struct bio *bio = rq->bio;
unsigned int segments = 0;
struct bvec_iter iter;
int prev = 0;
bio_for_each_integrity_vec(iv, bio, iter) {
if (prev) {
if (!biovec_phys_mergeable(q, &ivprv, &iv))
goto new_segment;
@ -103,10 +103,30 @@ int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
if (sg)
sg_mark_end(sg);
/*
* Something must have been wrong if the figured number of segment
* is bigger than number of req's physical integrity segments
*/
BUG_ON(segments > rq->nr_integrity_segments);
BUG_ON(segments > queue_max_integrity_segments(q));
return segments;
}
EXPORT_SYMBOL(blk_rq_map_integrity_sg);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
ssize_t bytes, u32 seed)
{
int ret = bio_integrity_map_user(rq->bio, ubuf, bytes, seed);
if (ret)
return ret;
rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q, rq->bio);
rq->cmd_flags |= REQ_INTEGRITY;
return 0;
}
EXPORT_SYMBOL_GPL(blk_rq_integrity_map_user);
bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
struct request *next)
{
@ -134,7 +154,6 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
struct bio *bio)
{
int nr_integrity_segs;
struct bio *next = bio->bi_next;
if (blk_integrity_rq(req) == 0 && bio_integrity(bio) == NULL)
return true;
@ -145,16 +164,11 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
if (bio_integrity(req->bio)->bip_flags != bio_integrity(bio)->bip_flags)
return false;
bio->bi_next = NULL;
nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
bio->bi_next = next;
if (req->nr_integrity_segments + nr_integrity_segs >
q->limits.max_integrity_segments)
return false;
req->nr_integrity_segments += nr_integrity_segs;
return true;
}

View File

@ -639,6 +639,9 @@ static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
* counters.
*/
req->nr_phys_segments += nr_phys_segs;
if (bio_integrity(bio))
req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q,
bio);
return 1;
no_merge:
@ -731,6 +734,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
/* Merge is OK... */
req->nr_phys_segments = total_phys_segments;
req->nr_integrity_segments += next->nr_integrity_segments;
return 1;
}

View File

@ -376,9 +376,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
rq->io_start_time_ns = 0;
rq->stats_sectors = 0;
rq->nr_phys_segments = 0;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
rq->nr_integrity_segments = 0;
#endif
rq->end_io = NULL;
rq->end_io_data = NULL;
@ -2546,6 +2544,9 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
rq->__sector = bio->bi_iter.bi_sector;
rq->write_hint = bio->bi_write_hint;
blk_rq_bio_prep(rq, bio, nr_segs);
if (bio_integrity(bio))
rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q,
bio);
/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
err = blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);

View File

@ -437,48 +437,6 @@ int queue_limits_set(struct request_queue *q, struct queue_limits *lim)
}
EXPORT_SYMBOL_GPL(queue_limits_set);
/**
* blk_limits_io_min - set minimum request size for a device
* @limits: the queue limits
* @min: smallest I/O size in bytes
*
* Description:
* Some devices have an internal block size bigger than the reported
* hardware sector size. This function can be used to signal the
* smallest I/O the device can perform without incurring a performance
* penalty.
*/
void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
{
limits->io_min = min;
if (limits->io_min < limits->logical_block_size)
limits->io_min = limits->logical_block_size;
if (limits->io_min < limits->physical_block_size)
limits->io_min = limits->physical_block_size;
}
EXPORT_SYMBOL(blk_limits_io_min);
/**
* blk_limits_io_opt - set optimal request size for a device
* @limits: the queue limits
* @opt: smallest I/O size in bytes
*
* Description:
* Storage devices may report an optimal I/O size, which is the
* device's preferred unit for sustained I/O. This is rarely reported
* for disk drives. For RAID arrays it is usually the stripe width or
* the internal track size. A properly aligned multiple of
* optimal_io_size is the preferred request size for workloads where
* sustained throughput is desired.
*/
void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt)
{
limits->io_opt = opt;
}
EXPORT_SYMBOL(blk_limits_io_opt);
static int queue_limit_alignment_offset(const struct queue_limits *lim,
sector_t sector)
{

View File

@ -715,7 +715,9 @@ int elv_iosched_load_module(struct gendisk *disk, const char *buf,
strscpy(elevator_name, buf, sizeof(elevator_name));
return request_module("%s-iosched", strstrip(elevator_name));
request_module("%s-iosched", strstrip(elevator_name));
return 0;
}
ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,

View File

@ -3399,10 +3399,12 @@ void drbd_uuid_new_current(struct drbd_device *device) __must_hold(local)
void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
{
unsigned long flags;
if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
return;
spin_lock_irqsave(&device->ldev->md.uuid_lock, flags);
if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) {
spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags);
return;
}
if (val == 0) {
drbd_uuid_move_history(device);
device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];

View File

@ -2468,11 +2468,6 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
if (ret)
return ret;
/* Flush write to device (required if transport is PCI) */
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CC, &ctrl->ctrl_config);
if (ret)
return ret;
/* CAP value may change after initial CC write */
ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
if (ret)

View File

@ -3,7 +3,6 @@
* Copyright (c) 2011-2014, Intel Corporation.
* Copyright (c) 2017-2021 Christoph Hellwig.
*/
#include <linux/bio-integrity.h>
#include <linux/blk-integrity.h>
#include <linux/ptrace.h> /* for force_successful_syscall_return */
#include <linux/nvme_ioctl.h>
@ -153,11 +152,10 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
bio_set_dev(bio, bdev);
if (has_metadata) {
ret = bio_integrity_map_user(bio, meta_buffer, meta_len,
meta_seed);
ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len,
meta_seed);
if (ret)
goto out_unmap;
req->cmd_flags |= REQ_INTEGRITY;
}
return ret;

View File

@ -421,6 +421,9 @@ static bool nvme_available_path(struct nvme_ns_head *head)
{
struct nvme_ns *ns;
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
return NULL;
list_for_each_entry_rcu(ns, &head->list, siblings) {
if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
continue;
@ -648,7 +651,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
rc = device_add_disk(&head->subsys->dev, head->disk,
nvme_ns_attr_groups);
if (rc) {
clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags);
clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags);
return;
}
nvme_add_ns_head_cdev(head);
@ -969,11 +972,16 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
kblockd_schedule_work(&head->requeue_work);
if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
nvme_cdev_del(&head->cdev, &head->cdev_device);
del_gendisk(head->disk);
}
/*
* requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
* to allow multipath to fail all I/O.
*/
synchronize_srcu(&head->srcu);
kblockd_schedule_work(&head->requeue_work);
}
void nvme_mpath_remove_disk(struct nvme_ns_head *head)

View File

@ -1496,7 +1496,7 @@ static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq,
req->metadata_sgl->sg_table.sgl =
(struct scatterlist *)(req->metadata_sgl + 1);
ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table,
blk_rq_count_integrity_sg(rq->q, rq->bio),
rq->nr_integrity_segments,
req->metadata_sgl->sg_table.sgl,
NVME_INLINE_METADATA_SG_CNT);
if (unlikely(ret)) {
@ -1504,8 +1504,8 @@ static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq,
goto out_unmap_sg;
}
req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
rq->bio, req->metadata_sgl->sg_table.sgl);
req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq,
req->metadata_sgl->sg_table.sgl);
*pi_count = ib_dma_map_sg(ibdev,
req->metadata_sgl->sg_table.sgl,
req->metadata_sgl->nents,

View File

@ -767,6 +767,7 @@ static struct attribute *nvme_tls_attrs[] = {
&dev_attr_tls_key.attr,
&dev_attr_tls_configured_key.attr,
&dev_attr_tls_keyring.attr,
NULL,
};
static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj,

View File

@ -1163,7 +1163,6 @@ blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd)
if (blk_integrity_rq(rq)) {
struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
int ivecs;
if (WARN_ON_ONCE(!prot_sdb)) {
/*
@ -1175,20 +1174,15 @@ blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd)
goto out_free_sgtables;
}
ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio);
if (sg_alloc_table_chained(&prot_sdb->table, ivecs,
if (sg_alloc_table_chained(&prot_sdb->table,
rq->nr_integrity_segments,
prot_sdb->table.sgl,
SCSI_INLINE_PROT_SG_CNT)) {
ret = BLK_STS_RESOURCE;
goto out_free_sgtables;
}
count = blk_rq_map_integrity_sg(rq->q, rq->bio,
prot_sdb->table.sgl);
BUG_ON(count > ivecs);
BUG_ON(count > queue_max_integrity_segments(rq->q));
count = blk_rq_map_integrity_sg(rq, prot_sdb->table.sgl);
cmd->prot_sdb = prot_sdb;
cmd->prot_sdb->table.nents = count;
}

View File

@ -25,9 +25,10 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
struct scatterlist *);
int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
ssize_t bytes, u32 seed);
static inline bool
blk_integrity_queue_supports_integrity(struct request_queue *q)
@ -96,12 +97,18 @@ static inline int blk_rq_count_integrity_sg(struct request_queue *q,
{
return 0;
}
static inline int blk_rq_map_integrity_sg(struct request_queue *q,
struct bio *b,
static inline int blk_rq_map_integrity_sg(struct request *q,
struct scatterlist *s)
{
return 0;
}
static inline int blk_rq_integrity_map_user(struct request *rq,
void __user *ubuf,
ssize_t bytes,
u32 seed)
{
return -EINVAL;
}
static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
{
return NULL;

View File

@ -149,10 +149,7 @@ struct request {
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
#ifdef CONFIG_BLK_DEV_INTEGRITY
unsigned short nr_integrity_segments;
#endif
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct bio_crypt_ctx *crypt_ctx;

View File

@ -251,11 +251,9 @@ struct bio {
struct bio_crypt_ctx *bi_crypt_context;
#endif
union {
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct bio_integrity_payload *bi_integrity; /* data integrity */
struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
};
unsigned short bi_vcnt; /* how many bio_vec's */

View File

@ -968,8 +968,6 @@ static inline void blk_queue_disable_write_zeroes(struct request_queue *q)
/*
* Access functions for manipulating queue properties
*/
extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
extern void blk_set_stacking_limits(struct queue_limits *lim);
extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,

View File

@ -40,7 +40,7 @@ struct sbitmap_word {
/**
* @swap_lock: serializes simultaneous updates of ->word and ->cleared
*/
spinlock_t swap_lock;
raw_spinlock_t swap_lock;
} ____cacheline_aligned_in_smp;
/**

View File

@ -65,7 +65,7 @@ static inline bool sbitmap_deferred_clear(struct sbitmap_word *map,
{
unsigned long mask, word_mask;
guard(spinlock_irqsave)(&map->swap_lock);
guard(raw_spinlock_irqsave)(&map->swap_lock);
if (!map->cleared) {
if (depth == 0)
@ -136,7 +136,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
}
for (i = 0; i < sb->map_nr; i++)
spin_lock_init(&sb->map[i].swap_lock);
raw_spin_lock_init(&sb->map[i].swap_lock);
return 0;
}