for-6.8/block-2024-01-18

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmWpoCgQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpqIUEADFvJdC2izkPzYzsOrMK5Rt1H7vaHGKhbA+
 zWCuQaa1xQd8bazq+NVnQpbzgclkE/WodTCNfNXcTTjzeQEmcZC888llP3Y9vwyP
 XfEKH7fSaeKvGigJLro1oPe3YV7/t89F5ol3BoZayfzJF8GEU9BXRWzgOkZzijnk
 xdm5wUyn/GknksMuQQraZ+U6bQRFLBOulzoaQeMD6Dosx+uRlM4WvAJawC+uOV6R
 qPT2BVSfYGzmgEKvoaphw0FMkUhFBMDHfXTpQBi5tIzTKOaof8tynYEGz0FHZWeh
 V0JEEp+3jLWFxFXeEcXgBVPJPE8J0DzGm9g17/uwC2Yhmlbw4FKZVRvGG+PpeUso
 D5aqhqm3w0x7HgZ7JKwy/aUctADYvjVcSVzPHTaFK0aCSYCIAXxqv4p7fOoxPqyx
 T32IUHTzGtkCdqzv/xFdtTYhTNM2vyzzbbWj5lXgCBqHsXOVbCh8UM2p+9ec2Umq
 Fo1XF9eoCDe6Sn4s15hJ5G4DEhKGOKkHluvRUdM+0selA5b0sNOeUqlAf2v+0ve3
 Pv3e3X4NPssNIEcsDHf5pc3zGC+LXRS0oFvfIvDESBjwXc3iHIMl+SkjyS57P4Fd
 RKrHEUUiACuCKO/IWqFYLiNBNHnP3RmV5gSxIZr9QJhFSwOzP+/+4++TCdF5vdAV
 amhv+0PdCw==
 =DLW9
 -----END PGP SIGNATURE-----

Merge tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
      - tcp, fc, and rdma target fixes (Maurizio, Daniel, Hannes,
        Christoph)
      - discard fixes and improvements (Christoph)
      - timeout debug improvements (Keith, Max)
      - various cleanups (Daniel, Max, Giuxen)
      - trace event string fixes (Arnd)
      - shadow doorbell setup on reset fix (William)
      - a write zeroes quirk for SK Hynix (Jim)

 - MD pull request via Song:
      - Sparse warning since v6.0 (Bart)
      - /proc/mdstat regression since v6.7 (Yu Kuai)

 - Use symbolic error value (Christian)

 - IO Priority documentation update (Christian)

 - Fix for accessing queue limits without having entered the queue
   (Christoph, me)

 - Fix for loop dio support (Christoph)

 - Move null_blk off deprecated ida interface (Christophe)

 - Ensure nbd initializes full msghdr (Eric)

 - Fix for a regression with the folio conversion, which is now easier
   to hit because of an unrelated change (Matthew)

 - Remove redundant check in virtio-blk (Li)

 - Fix for a potential hang in sbitmap (Ming)

 - Fix for partial zone appending (Damien)

 - Misc changes and fixes (Bart, me, Kemeng, Dmitry)

* tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux: (45 commits)
  Documentation: block: ioprio: Update schedulers
  loop: fix the the direct I/O support check when used on top of block devices
  blk-mq: Remove the hctx 'run' debugfs attribute
  nbd: always initialize struct msghdr completely
  block: Fix iterating over an empty bio with bio_for_each_folio_all
  block: bio-integrity: fix kcalloc() arguments order
  virtio_blk: remove duplicate check if queue is broken in virtblk_done
  sbitmap: remove stale comment in sbq_calc_wake_batch
  block: Correct a documentation comment in blk-cgroup.c
  null_blk: Remove usage of the deprecated ida_simple_xx() API
  block: ensure we hold a queue reference when using queue limits
  blk-mq: rename blk_mq_can_use_cached_rq
  block: print symbolic error name instead of error code
  blk-mq: fix IO hang from sbitmap wakeup race
  nvmet-rdma: avoid circular locking dependency on install_queue()
  nvmet-tcp: avoid circular locking dependency on install_queue()
  nvme-pci: set doorbell config before unquiescing
  block: fix partial zone append completion handling in req_bio_endio()
  block/iocost: silence warning on 'last_period' potentially being unused
  md/raid1: Use blk_opf_t for read and write operations
  ...
This commit is contained in:
Linus Torvalds 2024-01-18 18:22:40 -08:00
commit 9d1694dc91
34 changed files with 286 additions and 223 deletions

View File

@ -6,17 +6,16 @@ Block io priorities
Intro
-----
With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
priorities are supported for reads on files. This enables users to io nice
processes or process groups, similar to what has been possible with cpu
scheduling for ages. This document mainly details the current possibilities
with cfq; other io schedulers do not support io priorities thus far.
The io priority feature enables users to io nice processes or process groups,
similar to what has been possible with cpu scheduling for ages. Support for io
priorities is io scheduler dependent and currently supported by bfq and
mq-deadline.
Scheduling classes
------------------
CFQ implements three generic scheduling classes that determine how io is
served for a process.
Three generic scheduling classes are implemented for io priorities that
determine how io is served for a process.
IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
higher priority than any other in the system, processes from this class are

View File

@ -336,7 +336,7 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
if (nr_vecs > BIO_MAX_VECS)
return -E2BIG;
if (nr_vecs > UIO_FASTIOV) {
bvec = kcalloc(sizeof(*bvec), nr_vecs, GFP_KERNEL);
bvec = kcalloc(nr_vecs, sizeof(*bvec), GFP_KERNEL);
if (!bvec)
return -ENOMEM;
pages = NULL;

View File

@ -300,7 +300,7 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
* @disk: gendisk the new blkg is associated with
* @gfp_mask: allocation mask to use
*
* Allocate a new blkg assocating @blkcg and @q.
* Allocate a new blkg associating @blkcg and @disk.
*/
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
gfp_t gfp_mask)

View File

@ -1261,7 +1261,7 @@ static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now)
static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
{
struct ioc *ioc = iocg->ioc;
u64 last_period, cur_period;
u64 __maybe_unused last_period, cur_period;
u64 vtime, vtarget;
int i;

View File

@ -479,23 +479,6 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m)
return res;
}
static int hctx_run_show(void *data, struct seq_file *m)
{
struct blk_mq_hw_ctx *hctx = data;
seq_printf(m, "%lu\n", hctx->run);
return 0;
}
static ssize_t hctx_run_write(void *data, const char __user *buf, size_t count,
loff_t *ppos)
{
struct blk_mq_hw_ctx *hctx = data;
hctx->run = 0;
return count;
}
static int hctx_active_show(void *data, struct seq_file *m)
{
struct blk_mq_hw_ctx *hctx = data;
@ -624,7 +607,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
{"tags_bitmap", 0400, hctx_tags_bitmap_show},
{"sched_tags", 0400, hctx_sched_tags_show},
{"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},
{"run", 0600, hctx_run_show, hctx_run_write},
{"active", 0400, hctx_active_show},
{"dispatch_busy", 0400, hctx_dispatch_busy_show},
{"type", 0400, hctx_type_show},

View File

@ -324,8 +324,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
return;
hctx->run++;
/*
* A return of -EAGAIN is an indication that hctx->dispatch is not
* empty and we must run again in order to avoid starving flushes.

View File

@ -772,11 +772,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
/*
* Partial zone append completions cannot be supported as the
* BIO fragments may end up not being written sequentially.
* For such case, force the completed nbytes to be equal to
* the BIO size so that bio_advance() sets the BIO remaining
* size to 0 and we end up calling bio_endio() before returning.
*/
if (bio->bi_iter.bi_size != nbytes)
if (bio->bi_iter.bi_size != nbytes) {
bio->bi_status = BLK_STS_IOERR;
else
nbytes = bio->bi_iter.bi_size;
} else {
bio->bi_iter.bi_sector = rq->__sector;
}
}
bio_advance(bio, nbytes);
@ -1859,6 +1864,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
__add_wait_queue(wq, wait);
/*
* Add one explicit barrier since blk_mq_get_driver_tag() may
* not imply barrier in case of failure.
*
* Order adding us to wait queue and allocating driver tag.
*
* The pair is the one implied in sbitmap_queue_wake_up() which
* orders clearing sbitmap tag bits and waitqueue_active() in
* __sbitmap_queue_wake_up(), since waitqueue_active() is lockless
*
* Otherwise, re-order of adding wait queue and getting driver tag
* may cause __sbitmap_queue_wake_up() to wake up nothing because
* the waitqueue_active() may not observe us in wait queue.
*/
smp_mb();
/*
* It's possible that a tag was freed in the window between the
* allocation failure and adding the hardware queue to the wait
@ -2891,8 +2912,11 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
return NULL;
}
/* return true if this @rq can be used for @bio */
static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
/*
* Check if we can use the passed on request for submitting the passed in bio,
* and remove it from the request list if it can be used.
*/
static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
struct bio *bio)
{
enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
@ -2952,12 +2976,6 @@ void blk_mq_submit_bio(struct bio *bio)
blk_status_t ret;
bio = blk_queue_bounce(bio, q);
if (bio_may_exceed_limits(bio, &q->limits)) {
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
if (!bio)
return;
}
bio_set_ioprio(bio);
if (plug) {
@ -2966,16 +2984,26 @@ void blk_mq_submit_bio(struct bio *bio)
rq = NULL;
}
if (rq) {
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
if (!bio)
return;
}
if (!bio_integrity_prep(bio))
return;
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
return;
if (blk_mq_can_use_cached_rq(rq, plug, bio))
if (blk_mq_use_cached_rq(rq, plug, bio))
goto done;
percpu_ref_get(&q->q_usage_counter);
} else {
if (unlikely(bio_queue_enter(bio)))
return;
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
if (!bio)
goto fail;
}
if (!bio_integrity_prep(bio))
goto fail;
}

View File

@ -139,32 +139,6 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
return ret;
}
/*
* If the task has set an I/O priority, use that. Otherwise, return
* the default I/O priority.
*
* Expected to be called for current task or with task_lock() held to keep
* io_context stable.
*/
int __get_task_ioprio(struct task_struct *p)
{
struct io_context *ioc = p->io_context;
int prio;
if (p != current)
lockdep_assert_held(&p->alloc_lock);
if (ioc)
prio = ioc->ioprio;
else
prio = IOPRIO_DEFAULT;
if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
task_nice_ioprio(p));
return prio;
}
EXPORT_SYMBOL_GPL(__get_task_ioprio);
static int get_task_ioprio(struct task_struct *p)
{
int ret;

View File

@ -562,8 +562,8 @@ static bool blk_add_partition(struct gendisk *disk,
part = add_partition(disk, p, from, size, state->parts[p].flags,
&state->parts[p].info);
if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) {
printk(KERN_ERR " %s: p%d could not be added: %ld\n",
disk->disk_name, p, -PTR_ERR(part));
printk(KERN_ERR " %s: p%d could not be added: %pe\n",
disk->disk_name, p, part);
return true;
}

View File

@ -165,39 +165,37 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)
return get_size(lo->lo_offset, lo->lo_sizelimit, file);
}
/*
* We support direct I/O only if lo_offset is aligned with the logical I/O size
* of backing device, and the logical block size of loop is bigger than that of
* the backing device.
*/
static bool lo_bdev_can_use_dio(struct loop_device *lo,
struct block_device *backing_bdev)
{
unsigned short sb_bsize = bdev_logical_block_size(backing_bdev);
if (queue_logical_block_size(lo->lo_queue) < sb_bsize)
return false;
if (lo->lo_offset & (sb_bsize - 1))
return false;
return true;
}
static void __loop_update_dio(struct loop_device *lo, bool dio)
{
struct file *file = lo->lo_backing_file;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
unsigned short sb_bsize = 0;
unsigned dio_align = 0;
struct inode *inode = file->f_mapping->host;
struct block_device *backing_bdev = NULL;
bool use_dio;
if (inode->i_sb->s_bdev) {
sb_bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
dio_align = sb_bsize - 1;
}
if (S_ISBLK(inode->i_mode))
backing_bdev = I_BDEV(inode);
else if (inode->i_sb->s_bdev)
backing_bdev = inode->i_sb->s_bdev;
/*
* We support direct I/O only if lo_offset is aligned with the
* logical I/O size of backing device, and the logical block
* size of loop is bigger than the backing device's.
*
* TODO: the above condition may be loosed in the future, and
* direct I/O may be switched runtime at that time because most
* of requests in sane applications should be PAGE_SIZE aligned
*/
if (dio) {
if (queue_logical_block_size(lo->lo_queue) >= sb_bsize &&
!(lo->lo_offset & dio_align) &&
(file->f_mode & FMODE_CAN_ODIRECT))
use_dio = true;
else
use_dio = false;
} else {
use_dio = false;
}
use_dio = dio && (file->f_mode & FMODE_CAN_ODIRECT) &&
(!backing_bdev || lo_bdev_can_use_dio(lo, backing_bdev));
if (lo->use_dio == use_dio)
return;

View File

@ -508,7 +508,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
struct iov_iter *iter, int msg_flags, int *sent)
{
int result;
struct msghdr msg;
struct msghdr msg = {} ;
unsigned int noreclaim_flag;
if (unlikely(!sock)) {
@ -524,10 +524,6 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
do {
sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
sock->sk->sk_use_task_frag = false;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = msg_flags | MSG_NOSIGNAL;
if (send)

View File

@ -1840,7 +1840,7 @@ static void null_del_dev(struct nullb *nullb)
dev = nullb->dev;
ida_simple_remove(&nullb_indexes, nullb->index);
ida_free(&nullb_indexes, nullb->index);
list_del_init(&nullb->list);
@ -2174,7 +2174,7 @@ static int null_add_dev(struct nullb_device *dev)
blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
mutex_lock(&lock);
rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
if (rv < 0) {
mutex_unlock(&lock);
goto out_cleanup_zone;

View File

@ -367,8 +367,6 @@ static void virtblk_done(struct virtqueue *vq)
blk_mq_complete_request(req);
req_done = true;
}
if (unlikely(virtqueue_is_broken(vq)))
break;
} while (!virtqueue_enable_cb(vq));
/* In case queue is stopped waiting for more buffers. */

View File

@ -8132,6 +8132,19 @@ static void status_unused(struct seq_file *seq)
seq_printf(seq, "\n");
}
static void status_personalities(struct seq_file *seq)
{
struct md_personality *pers;
seq_puts(seq, "Personalities : ");
spin_lock(&pers_lock);
list_for_each_entry(pers, &pers_list, list)
seq_printf(seq, "[%s] ", pers->name);
spin_unlock(&pers_lock);
seq_puts(seq, "\n");
}
static int status_resync(struct seq_file *seq, struct mddev *mddev)
{
sector_t max_sectors, resync, res;
@ -8273,20 +8286,10 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
static void *md_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(&all_mddevs_lock)
{
struct md_personality *pers;
seq_puts(seq, "Personalities : ");
spin_lock(&pers_lock);
list_for_each_entry(pers, &pers_list, list)
seq_printf(seq, "[%s] ", pers->name);
spin_unlock(&pers_lock);
seq_puts(seq, "\n");
seq->poll_event = atomic_read(&md_event_count);
spin_lock(&all_mddevs_lock);
return seq_list_start(&all_mddevs, *pos);
return seq_list_start_head(&all_mddevs, *pos);
}
static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@ -8297,16 +8300,23 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void md_seq_stop(struct seq_file *seq, void *v)
__releases(&all_mddevs_lock)
{
status_unused(seq);
spin_unlock(&all_mddevs_lock);
}
static int md_seq_show(struct seq_file *seq, void *v)
{
struct mddev *mddev = list_entry(v, struct mddev, all_mddevs);
struct mddev *mddev;
sector_t sectors;
struct md_rdev *rdev;
if (v == &all_mddevs) {
status_personalities(seq);
if (list_empty(&all_mddevs))
status_unused(seq);
return 0;
}
mddev = list_entry(v, struct mddev, all_mddevs);
if (!mddev_get(mddev))
return 0;
@ -8382,6 +8392,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
}
spin_unlock(&mddev->lock);
spin_lock(&all_mddevs_lock);
if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs))
status_unused(seq);
if (atomic_dec_and_test(&mddev->active))
__mddev_put(mddev);

View File

@ -1968,12 +1968,12 @@ static void end_sync_write(struct bio *bio)
}
static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
int sectors, struct page *page, int rw)
int sectors, struct page *page, blk_opf_t rw)
{
if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
/* success */
return 1;
if (rw == WRITE) {
if (rw == REQ_OP_WRITE) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement,
&rdev->flags))
@ -2090,7 +2090,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
rdev = conf->mirrors[d].rdev;
if (r1_sync_page_io(rdev, sect, s,
pages[idx],
WRITE) == 0) {
REQ_OP_WRITE) == 0) {
r1_bio->bios[d]->bi_end_io = NULL;
rdev_dec_pending(rdev, mddev);
}
@ -2105,7 +2105,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
rdev = conf->mirrors[d].rdev;
if (r1_sync_page_io(rdev, sect, s,
pages[idx],
READ) != 0)
REQ_OP_READ) != 0)
atomic_add(s, &rdev->corrected_errors);
}
sectors -= s;
@ -2321,7 +2321,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
!test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
r1_sync_page_io(rdev, sect, s,
conf->tmppage, WRITE);
conf->tmppage, REQ_OP_WRITE);
rdev_dec_pending(rdev, mddev);
}
}
@ -2335,7 +2335,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
!test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
if (r1_sync_page_io(rdev, sect, s,
conf->tmppage, READ)) {
conf->tmppage, REQ_OP_READ)) {
atomic_add(s, &rdev->corrected_errors);
pr_info("md/raid1:%s: read error corrected (%d sectors at %llu on %pg)\n",
mdname(mddev), s,

View File

@ -111,7 +111,7 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring,
* should be preferred to 'generated' PSKs,
* and SHA-384 should be preferred to SHA-256.
*/
struct nvme_tls_psk_priority_list {
static struct nvme_tls_psk_priority_list {
bool generated;
enum nvme_tcp_tls_cipher cipher;
} nvme_tls_psk_prio[] = {

View File

@ -1740,13 +1740,13 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
struct nvme_ns_head *head)
{
struct request_queue *queue = disk->queue;
u32 size = queue_logical_block_size(queue);
u32 max_discard_sectors;
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX))
ctrl->max_discard_sectors =
nvme_lba_to_sect(head, ctrl->dmrsl);
if (ctrl->max_discard_sectors == 0) {
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) {
max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl);
} else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
max_discard_sectors = UINT_MAX;
} else {
blk_queue_max_discard_sectors(queue, 0);
return;
}
@ -1754,14 +1754,22 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
queue->limits.discard_granularity = size;
/* If discard is already enabled, don't reset queue limits */
/*
* If discard is already enabled, don't reset queue limits.
*
* This works around the fact that the block layer can't cope well with
* updating the hardware limits when overridden through sysfs. This is
* harmless because discard limits in NVMe are purely advisory.
*/
if (queue->limits.max_discard_sectors)
return;
blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
blk_queue_max_discard_sectors(queue, max_discard_sectors);
if (ctrl->dmrl)
blk_queue_max_discard_segments(queue, ctrl->dmrl);
else
blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
queue->limits.discard_granularity = queue_logical_block_size(queue);
if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@ -2930,14 +2938,6 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
struct nvme_id_ctrl_nvm *id;
int ret;
if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
ctrl->max_discard_sectors = UINT_MAX;
ctrl->max_discard_segments = NVME_DSM_MAX_RANGES;
} else {
ctrl->max_discard_sectors = 0;
ctrl->max_discard_segments = 0;
}
/*
* Even though NVMe spec explicitly states that MDTS is not applicable
* to the write-zeroes, we are cautious and limit the size to the
@ -2967,8 +2967,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
if (ret)
goto free_data;
if (id->dmrl)
ctrl->max_discard_segments = id->dmrl;
ctrl->dmrl = id->dmrl;
ctrl->dmrsl = le32_to_cpu(id->dmrsl);
if (id->wzsl)
ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);

View File

@ -303,14 +303,13 @@ struct nvme_ctrl {
u32 max_hw_sectors;
u32 max_segments;
u32 max_integrity_segments;
u32 max_discard_sectors;
u32 max_discard_segments;
u32 max_zeroes_sectors;
#ifdef CONFIG_BLK_DEV_ZONED
u32 max_zone_append;
#endif
u16 crdt[3];
u16 oncs;
u8 dmrl;
u32 dmrsl;
u16 oacs;
u16 sqsize;
@ -932,6 +931,10 @@ extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
extern struct device_attribute subsys_attr_iopolicy;
static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
{
return disk->fops == &nvme_ns_head_ops;
}
#else
#define multipath false
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
@ -1009,6 +1012,10 @@ static inline void nvme_mpath_start_request(struct request *rq)
static inline void nvme_mpath_end_request(struct request *rq)
{
}
static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
{
return false;
}
#endif /* CONFIG_NVME_MULTIPATH */
int nvme_revalidate_zones(struct nvme_ns *ns);
@ -1037,7 +1044,10 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
{
return dev_to_disk(dev)->private_data;
struct gendisk *disk = dev_to_disk(dev);
WARN_ON(nvme_disk_is_ns_head(disk));
return disk->private_data;
}
#ifdef CONFIG_NVME_HWMON

View File

@ -1284,6 +1284,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
struct request *abort_req;
struct nvme_command cmd = { };
u32 csts = readl(dev->bar + NVME_REG_CSTS);
u8 opcode;
/* If PCI error recovery process is happening, we cannot reset or
* the recovery mechanism will surely fail.
@ -1310,8 +1311,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) {
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, completion polled\n",
req->tag, nvmeq->qid);
"I/O tag %d (%04x) QID %d timeout, completion polled\n",
req->tag, nvme_cid(req), nvmeq->qid);
return BLK_EH_DONE;
}
@ -1327,8 +1328,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
fallthrough;
case NVME_CTRL_DELETING:
dev_warn_ratelimited(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
"I/O tag %d (%04x) QID %d timeout, disable controller\n",
req->tag, nvme_cid(req), nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
nvme_dev_disable(dev, true);
return BLK_EH_DONE;
@ -1343,10 +1344,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
* command was already aborted once before and still hasn't been
* returned to the driver, or if this is the admin queue.
*/
opcode = nvme_req(req)->cmd->common.opcode;
if (!nvmeq->qid || iod->aborted) {
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
"I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
req->tag, nvme_cid(req), opcode,
nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
goto disable;
}
@ -1362,10 +1365,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
dev_warn(nvmeq->dev->ctrl.device,
"I/O %d (%s) QID %d timeout, aborting\n",
req->tag,
nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode),
nvmeq->qid);
"I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, aborting req_op:%s(%u) size:%u\n",
req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode),
nvmeq->qid, blk_op_str(req_op(req)), req_op(req),
blk_rq_bytes(req));
abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
BLK_MQ_REQ_NOWAIT);
@ -2743,10 +2746,10 @@ static void nvme_reset_work(struct work_struct *work)
* controller around but remove all namespaces.
*/
if (dev->online_queues > 1) {
nvme_dbbuf_set(dev);
nvme_unquiesce_io_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl);
nvme_pci_update_nr_queues(dev);
nvme_dbbuf_set(dev);
nvme_unfreeze(&dev->ctrl);
} else {
dev_warn(dev->ctrl.device, "IO queues lost\n");
@ -3408,6 +3411,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1c5c, 0x1D59), /* SK Hynix BC901 */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1d97, 0x2263), /* SPCC */

View File

@ -98,7 +98,7 @@ static int nvme_send_pr_command(struct block_device *bdev,
struct nvme_command *c, void *data, unsigned int data_len)
{
if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
bdev->bd_disk->fops == &nvme_ns_head_ops)
nvme_disk_is_ns_head(bdev->bd_disk))
return nvme_send_ns_head_pr_command(bdev, c, data, data_len);
return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data,

View File

@ -1946,9 +1946,14 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
u8 opcode = req->req.cmd->common.opcode;
u8 fctype = req->req.cmd->fabrics.fctype;
int qid = nvme_rdma_queue_idx(queue);
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
rq->tag, nvme_rdma_queue_idx(queue));
dev_warn(ctrl->ctrl.device,
"I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n",
rq->tag, nvme_cid(rq), opcode,
nvme_opcode_str(qid, opcode, fctype), qid);
if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
/*

View File

@ -39,10 +39,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
{
struct gendisk *disk = dev_to_disk(dev);
if (disk->fops == &nvme_bdev_ops)
return nvme_get_ns_from_dev(dev)->head;
else
if (nvme_disk_is_ns_head(disk))
return disk->private_data;
return nvme_get_ns_from_dev(dev)->head;
}
static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
@ -233,7 +232,8 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
}
#ifdef CONFIG_NVME_MULTIPATH
if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */
/* per-path attr */
if (nvme_disk_is_ns_head(dev_to_disk(dev)))
return 0;
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
return 0;

View File

@ -1922,14 +1922,13 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
ctrl->opts->subsysnqn);
if (!pskid) {
dev_err(ctrl->device, "no valid PSK found\n");
ret = -ENOKEY;
goto out_free_queue;
return -ENOKEY;
}
}
ret = nvme_tcp_alloc_queue(ctrl, 0, pskid);
if (ret)
goto out_free_queue;
return ret;
ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
if (ret)
@ -2433,9 +2432,9 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
int qid = nvme_tcp_queue_id(req->queue);
dev_warn(ctrl->device,
"queue %d: timeout cid %#x type %d opcode %#x (%s)\n",
nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type,
opc, nvme_opcode_str(qid, opc, fctype));
"I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n",
rq->tag, nvme_cid(rq), pdu->hdr.type, opc,
nvme_opcode_str(qid, opc, fctype), qid);
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
/*

View File

@ -1031,7 +1031,7 @@ nvmet_fc_match_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
list_for_each_entry(host, &tgtport->host_list, host_list) {
if (host->hosthandle == hosthandle && !host->invalid) {
if (nvmet_fc_hostport_get(host))
return (host);
return host;
}
}

View File

@ -995,11 +995,6 @@ fcloop_nport_free(struct kref *ref)
{
struct fcloop_nport *nport =
container_of(ref, struct fcloop_nport, ref);
unsigned long flags;
spin_lock_irqsave(&fcloop_lock, flags);
list_del(&nport->nport_list);
spin_unlock_irqrestore(&fcloop_lock, flags);
kfree(nport);
}
@ -1357,6 +1352,8 @@ __unlink_remote_port(struct fcloop_nport *nport)
nport->tport->remoteport = NULL;
nport->rport = NULL;
list_del(&nport->nport_list);
return rport;
}

View File

@ -37,6 +37,8 @@
#define NVMET_RDMA_MAX_MDTS 8
#define NVMET_RDMA_MAX_METADATA_MDTS 5
#define NVMET_RDMA_BACKLOG 128
struct nvmet_rdma_srq;
struct nvmet_rdma_cmd {
@ -1583,8 +1585,19 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
}
if (queue->host_qid == 0) {
/* Let inflight controller teardown complete */
flush_workqueue(nvmet_wq);
struct nvmet_rdma_queue *q;
int pending = 0;
/* Check for pending controller teardown */
mutex_lock(&nvmet_rdma_queue_mutex);
list_for_each_entry(q, &nvmet_rdma_queue_list, queue_list) {
if (q->nvme_sq.ctrl == queue->nvme_sq.ctrl &&
q->state == NVMET_RDMA_Q_DISCONNECTING)
pending++;
}
mutex_unlock(&nvmet_rdma_queue_mutex);
if (pending > NVMET_RDMA_BACKLOG)
return NVME_SC_CONNECT_CTRL_BUSY;
}
ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
@ -1880,7 +1893,7 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
goto out_destroy_id;
}
ret = rdma_listen(cm_id, 128);
ret = rdma_listen(cm_id, NVMET_RDMA_BACKLOG);
if (ret) {
pr_err("listening to %pISpcs failed (%d)\n", addr, ret);
goto out_destroy_id;

View File

@ -24,6 +24,8 @@
#include "nvmet.h"
#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
#define NVMET_TCP_MAXH2CDATA 0x400000 /* 16M arbitrary limit */
#define NVMET_TCP_BACKLOG 128
static int param_store_val(const char *str, int *val, int min, int max)
{
@ -923,7 +925,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
icresp->hdr.pdo = 0;
icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen);
icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */
icresp->maxdata = cpu_to_le32(NVMET_TCP_MAXH2CDATA);
icresp->cpda = 0;
if (queue->hdr_digest)
icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
@ -978,13 +980,13 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
{
struct nvme_tcp_data_pdu *data = &queue->pdu.data;
struct nvmet_tcp_cmd *cmd;
unsigned int exp_data_len;
if (likely(queue->nr_cmds)) {
if (unlikely(data->ttag >= queue->nr_cmds)) {
pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n",
queue->idx, data->ttag, queue->nr_cmds);
nvmet_tcp_fatal_error(queue);
return -EPROTO;
goto err_proto;
}
cmd = &queue->cmds[data->ttag];
} else {
@ -995,19 +997,32 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
pr_err("ttag %u unexpected data offset %u (expected %u)\n",
data->ttag, le32_to_cpu(data->data_offset),
cmd->rbytes_done);
/* FIXME: use path and transport errors */
nvmet_req_complete(&cmd->req,
NVME_SC_INVALID_FIELD | NVME_SC_DNR);
return -EPROTO;
goto err_proto;
}
exp_data_len = le32_to_cpu(data->hdr.plen) -
nvmet_tcp_hdgst_len(queue) -
nvmet_tcp_ddgst_len(queue) -
sizeof(*data);
cmd->pdu_len = le32_to_cpu(data->data_length);
if (unlikely(cmd->pdu_len != exp_data_len ||
cmd->pdu_len == 0 ||
cmd->pdu_len > NVMET_TCP_MAXH2CDATA)) {
pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len);
goto err_proto;
}
cmd->pdu_recv = 0;
nvmet_tcp_build_pdu_iovec(cmd);
queue->cmd = cmd;
queue->rcv_state = NVMET_TCP_RECV_DATA;
return 0;
err_proto:
/* FIXME: use proper transport errors */
nvmet_tcp_fatal_error(queue);
return -EPROTO;
}
static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
@ -1768,7 +1783,7 @@ static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue)
(int)sizeof(struct nvme_tcp_icreq_pdu));
if (hdr->type == nvme_tcp_icreq &&
hdr->hlen == sizeof(struct nvme_tcp_icreq_pdu) &&
hdr->plen == (__le32)sizeof(struct nvme_tcp_icreq_pdu)) {
hdr->plen == cpu_to_le32(sizeof(struct nvme_tcp_icreq_pdu))) {
pr_debug("queue %d: icreq detected\n",
queue->idx);
return len;
@ -2053,7 +2068,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
goto err_sock;
}
ret = kernel_listen(port->sock, 128);
ret = kernel_listen(port->sock, NVMET_TCP_BACKLOG);
if (ret) {
pr_err("failed to listen %d on port sock\n", ret);
goto err_sock;
@ -2119,8 +2134,19 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq)
container_of(sq, struct nvmet_tcp_queue, nvme_sq);
if (sq->qid == 0) {
/* Let inflight controller teardown complete */
flush_workqueue(nvmet_wq);
struct nvmet_tcp_queue *q;
int pending = 0;
/* Check for pending controller teardown */
mutex_lock(&nvmet_tcp_queue_mutex);
list_for_each_entry(q, &nvmet_tcp_queue_list, queue_list) {
if (q->nvme_sq.ctrl == sq->ctrl &&
q->state == NVMET_TCP_Q_DISCONNECTING)
pending++;
}
mutex_unlock(&nvmet_tcp_queue_mutex);
if (pending > NVMET_TCP_BACKLOG)
return NVME_SC_CONNECT_CTRL_BUSY;
}
queue->nr_cmds = sq->size * 2;

View File

@ -211,7 +211,7 @@ const char *nvmet_trace_disk_name(struct trace_seq *p, char *name)
return ret;
}
const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl)
const char *nvmet_trace_ctrl_id(struct trace_seq *p, u16 ctrl_id)
{
const char *ret = trace_seq_buffer_ptr(p);
@ -224,8 +224,8 @@ const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl)
* If we can know the extra data of the connect command in this stage,
* we can update this print statement later.
*/
if (ctrl)
trace_seq_printf(p, "%d", ctrl->cntlid);
if (ctrl_id)
trace_seq_printf(p, "%d", ctrl_id);
else
trace_seq_printf(p, "_");
trace_seq_putc(p, 0);

View File

@ -32,18 +32,24 @@ const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype,
nvmet_trace_parse_nvm_cmd(p, opcode, cdw10) : \
nvmet_trace_parse_admin_cmd(p, opcode, cdw10)))
const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl);
#define __print_ctrl_name(ctrl) \
nvmet_trace_ctrl_name(p, ctrl)
const char *nvmet_trace_ctrl_id(struct trace_seq *p, u16 ctrl_id);
#define __print_ctrl_id(ctrl_id) \
nvmet_trace_ctrl_id(p, ctrl_id)
const char *nvmet_trace_disk_name(struct trace_seq *p, char *name);
#define __print_disk_name(name) \
nvmet_trace_disk_name(p, name)
#ifndef TRACE_HEADER_MULTI_READ
static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req)
static inline u16 nvmet_req_to_ctrl_id(struct nvmet_req *req)
{
return req->sq->ctrl;
/*
* The queue and controller pointers are not valid until an association
* has been established.
*/
if (!req->sq || !req->sq->ctrl)
return 0;
return req->sq->ctrl->cntlid;
}
static inline void __assign_req_name(char *name, struct nvmet_req *req)
@ -53,8 +59,7 @@ static inline void __assign_req_name(char *name, struct nvmet_req *req)
return;
}
strncpy(name, req->ns->device_path,
min_t(size_t, DISK_NAME_LEN, strlen(req->ns->device_path)));
strscpy_pad(name, req->ns->device_path, DISK_NAME_LEN);
}
#endif
@ -63,7 +68,7 @@ TRACE_EVENT(nvmet_req_init,
TP_ARGS(req, cmd),
TP_STRUCT__entry(
__field(struct nvme_command *, cmd)
__field(struct nvmet_ctrl *, ctrl)
__field(u16, ctrl_id)
__array(char, disk, DISK_NAME_LEN)
__field(int, qid)
__field(u16, cid)
@ -76,7 +81,7 @@ TRACE_EVENT(nvmet_req_init,
),
TP_fast_assign(
__entry->cmd = cmd;
__entry->ctrl = nvmet_req_to_ctrl(req);
__entry->ctrl_id = nvmet_req_to_ctrl_id(req);
__assign_req_name(__entry->disk, req);
__entry->qid = req->sq->qid;
__entry->cid = cmd->common.command_id;
@ -85,12 +90,12 @@ TRACE_EVENT(nvmet_req_init,
__entry->flags = cmd->common.flags;
__entry->nsid = le32_to_cpu(cmd->common.nsid);
__entry->metadata = le64_to_cpu(cmd->common.metadata);
memcpy(__entry->cdw10, &cmd->common.cdw10,
memcpy(__entry->cdw10, &cmd->common.cdws,
sizeof(__entry->cdw10));
),
TP_printk("nvmet%s: %sqid=%d, cmdid=%u, nsid=%u, flags=%#x, "
"meta=%#llx, cmd=(%s, %s)",
__print_ctrl_name(__entry->ctrl),
__print_ctrl_id(__entry->ctrl_id),
__print_disk_name(__entry->disk),
__entry->qid, __entry->cid, __entry->nsid,
__entry->flags, __entry->metadata,
@ -104,7 +109,7 @@ TRACE_EVENT(nvmet_req_complete,
TP_PROTO(struct nvmet_req *req),
TP_ARGS(req),
TP_STRUCT__entry(
__field(struct nvmet_ctrl *, ctrl)
__field(u16, ctrl_id)
__array(char, disk, DISK_NAME_LEN)
__field(int, qid)
__field(int, cid)
@ -112,7 +117,7 @@ TRACE_EVENT(nvmet_req_complete,
__field(u16, status)
),
TP_fast_assign(
__entry->ctrl = nvmet_req_to_ctrl(req);
__entry->ctrl_id = nvmet_req_to_ctrl_id(req);
__entry->qid = req->cq->qid;
__entry->cid = req->cqe->command_id;
__entry->result = le64_to_cpu(req->cqe->result.u64);
@ -120,7 +125,7 @@ TRACE_EVENT(nvmet_req_complete,
__assign_req_name(__entry->disk, req);
),
TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x",
__print_ctrl_name(__entry->ctrl),
__print_ctrl_id(__entry->ctrl_id),
__print_disk_name(__entry->disk),
__entry->qid, __entry->cid, __entry->result, __entry->status)

View File

@ -286,6 +286,11 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio,
{
struct bio_vec *bvec = bio_first_bvec_all(bio) + i;
if (unlikely(i >= bio->bi_vcnt)) {
fi->folio = NULL;
return;
}
fi->folio = page_folio(bvec->bv_page);
fi->offset = bvec->bv_offset +
PAGE_SIZE * (bvec->bv_page - &fi->folio->page);
@ -303,10 +308,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
fi->offset = 0;
fi->length = min(folio_size(fi->folio), fi->_seg_count);
fi->_next = folio_next(fi->folio);
} else if (fi->_i + 1 < bio->bi_vcnt) {
bio_first_folio(fi, bio, fi->_i + 1);
} else {
fi->folio = NULL;
bio_first_folio(fi, bio, fi->_i + 1);
}
}

View File

@ -391,9 +391,6 @@ struct blk_mq_hw_ctx {
*/
struct blk_mq_tags *sched_tags;
/** @run: Number of dispatched requests. */
unsigned long run;
/** @numa_node: NUMA node the storage adapter has been connected to. */
unsigned int numa_node;
/** @queue_num: Index of this hardware queue. */

View File

@ -47,7 +47,30 @@ static inline int task_nice_ioclass(struct task_struct *task)
}
#ifdef CONFIG_BLOCK
int __get_task_ioprio(struct task_struct *p);
/*
* If the task has set an I/O priority, use that. Otherwise, return
* the default I/O priority.
*
* Expected to be called for current task or with task_lock() held to keep
* io_context stable.
*/
static inline int __get_task_ioprio(struct task_struct *p)
{
struct io_context *ioc = p->io_context;
int prio;
if (!ioc)
return IOPRIO_DEFAULT;
if (p != current)
lockdep_assert_held(&p->alloc_lock);
prio = ioc->ioprio;
if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
task_nice_ioprio(p));
return prio;
}
#else
static inline int __get_task_ioprio(struct task_struct *p)
{

View File

@ -20,7 +20,6 @@
#define NVMF_TRSVCID_SIZE 32
#define NVMF_TRADDR_SIZE 256
#define NVMF_TSAS_SIZE 256
#define NVMF_AUTH_HASH_LEN 64
#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"

View File

@ -388,11 +388,6 @@ static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
unsigned int shallow_depth;
/*
* For each batch, we wake up one queue. We need to make sure that our
* batch size is small enough that the full depth of the bitmap,
* potentially limited by a shallow depth, is enough to wake up all of
* the queues.
*
* Each full word of the bitmap has bits_per_word bits, and there might
* be a partial word. There are depth / bits_per_word full words and
* depth % bits_per_word bits left over. In bitwise arithmetic: