mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 10:43:43 +00:00
for-6.9/block-20240310
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmXuFO4QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpq33D/9hyNyBce2A9iyo026eK8EqLDoed6BPzuvB kLKj5tsGvX4YlfuswvP86M5dgibTASXclnfUK394TijW/JPOfJ3mNhi9gMnHzRoK ZaR1di0Lum56dY1FkpMmWiGmE4fB79PAtXYKtajOkuoIcNzylncEAAACUY4/Ouhg Cm+LMg2prcc+m9g8rKDNQ51pUFg4U21KAUTl35XLMUAaQk1ahW3EDEVYhweC/zwE V/5hJsv8UY72+oQGY2Dc/YgQk/Zj4ZDh7C+oHR9XeB/ro99kr3/Vopagu0gBMLZi Rq6qqz6PVMhVcuz8uN2rsTQKXmXhsBn9/adsl4AKtdxcW5D5moWb5BLq1P0WQylc nzMxa1d6cVcTKZpaUQQv3Rj6ZMrLuDwP277UYHfn5x1oPWYRZCG7FtHuOo1gNcpG DrSNwVG6BSDcbABqI+MIS2oD1JoUMyevjwT7e2hOXukZhc6GLO5F3ODWE5j3KnCR S/aGSAmcdR4fTcgavULqWdQVt7SYl4f1IxT8KrUirJGVhc2LgahaWj69ooklVHoU fPDFRiruwJ5YkH4RWCSDm9mi4kAz6eUf+f4yE06wZOFOb2fT8/1ZK2Snpz2KeXuZ INO0RejtFzT8L0OUlu7dBmF20y6rgAYt87lR8mIt71yuuATIrVhzlX1VdsvhdrAo VLHGV1Ncgw== =WlVL -----END PGP SIGNATURE----- Merge tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux Pull block updates from Jens Axboe: - MD pull requests via Song: - Cleanup redundant checks (Yu Kuai) - Remove deprecated headers (Marc Zyngier, Song Liu) - Concurrency fixes (Li Lingfeng) - Memory leak fix (Li Nan) - Refactor raid1 read_balance (Yu Kuai, Paul Luse) - Clean up and fix for md_ioctl (Li Nan) - Other small fixes (Gui-Dong Han, Heming Zhao) - MD atomic limits (Christoph) - NVMe pull request via Keith: - RDMA target enhancements (Max) - Fabrics fixes (Max, Guixin, Hannes) - Atomic queue_limits usage (Christoph) - Const use for class_register (Ricardo) - Identification error handling fixes (Shin'ichiro, Keith) - Improvement and cleanup for cached request handling (Christoph) - Moving towards atomic queue limits. Core changes and driver bits so far (Christoph) - Fix UAF issues in aoeblk (Chun-Yi) - Zoned fix and cleanups (Damien) - s390 dasd cleanups and fixes (Jan, Miroslav) - Block issue timestamp caching (me) - noio scope guarding for zoned IO (Johannes) - block/nvme PI improvements (Kanchan) - Ability to terminate long running discard loop (Keith) - bdev revalidation fix (Li) - Get rid of old nr_queues hack for kdump kernels (Ming) - Support for async deletion of ublk (Ming) - Improve IRQ bio recycling (Pavel) - Factor in CPU capacity for remote vs local completion (Qais) - Add shared_tags configfs entry for null_blk (Shin'ichiro - Fix for a regression in page refcounts introduced by the folio unification (Tony) - Misc fixes and cleanups (Arnd, Colin, John, Kunwu, Li, Navid, Ricardo, Roman, Tang, Uwe) * tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux: (221 commits) block: partitions: only define function mac_fix_string for CONFIG_PPC_PMAC block/swim: Convert to platform remove callback returning void cdrom: gdrom: Convert to platform remove callback returning void block: remove disk_stack_limits md: remove mddev->queue md: don't initialize queue limits md/raid10: use the atomic queue limit update APIs md/raid5: use the atomic queue limit update APIs md/raid1: use the atomic queue limit update APIs md/raid0: use the atomic queue limit update APIs md: add queue limit helpers md: add a mddev_is_dm helper md: add a mddev_add_trace_msg helper md: add a mddev_trace_remap helper bcache: move calculation of stripe_size and io_opt into bcache_device_init virtio_blk: Do not use disk_set_max_open/active_zones() aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts block: move capacity validation to blkpg_do_ioctl() block: prevent division by zero in blk_rq_stat_sum() drbd: atomically update queue limits in drbd_reconsider_queue_parameters ...
This commit is contained in:
commit
1ddeeb2a05
@ -96,6 +96,9 @@ static const struct block_device_operations nfhd_ops = {
|
||||
|
||||
static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = bsize,
|
||||
};
|
||||
struct nfhd_device *dev;
|
||||
int dev_id = id - NFHD_DEV_OFFSET;
|
||||
int err = -ENOMEM;
|
||||
@ -117,9 +120,11 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
|
||||
dev->bsize = bsize;
|
||||
dev->bshift = ffs(bsize) - 10;
|
||||
|
||||
dev->disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!dev->disk)
|
||||
dev->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(dev->disk)) {
|
||||
err = PTR_ERR(dev->disk);
|
||||
goto free_dev;
|
||||
}
|
||||
|
||||
dev->disk->major = major_num;
|
||||
dev->disk->first_minor = dev_id * 16;
|
||||
@ -128,7 +133,6 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
|
||||
dev->disk->private_data = dev;
|
||||
sprintf(dev->disk->disk_name, "nfhd%u", dev_id);
|
||||
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
|
||||
blk_queue_logical_block_size(dev->disk->queue, bsize);
|
||||
err = add_disk(dev->disk);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
|
@ -108,8 +108,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
|
||||
static DEFINE_MUTEX(ubd_lock);
|
||||
static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
|
||||
|
||||
static int ubd_open(struct gendisk *disk, blk_mode_t mode);
|
||||
static void ubd_release(struct gendisk *disk);
|
||||
static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
|
||||
@ -118,16 +116,11 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
|
||||
|
||||
static const struct block_device_operations ubd_blops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = ubd_open,
|
||||
.release = ubd_release,
|
||||
.ioctl = ubd_ioctl,
|
||||
.compat_ioctl = blkdev_compat_ptr_ioctl,
|
||||
.getgeo = ubd_getgeo,
|
||||
};
|
||||
|
||||
/* Protected by ubd_lock */
|
||||
static struct gendisk *ubd_gendisk[MAX_DEV];
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_UBD_SYNC
|
||||
#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
|
||||
.cl = 1 })
|
||||
@ -155,7 +148,6 @@ struct ubd {
|
||||
* backing or the cow file. */
|
||||
char *file;
|
||||
char *serial;
|
||||
int count;
|
||||
int fd;
|
||||
__u64 size;
|
||||
struct openflags boot_openflags;
|
||||
@ -165,7 +157,7 @@ struct ubd {
|
||||
unsigned no_trim:1;
|
||||
struct cow cow;
|
||||
struct platform_device pdev;
|
||||
struct request_queue *queue;
|
||||
struct gendisk *disk;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
spinlock_t lock;
|
||||
};
|
||||
@ -181,7 +173,6 @@ struct ubd {
|
||||
#define DEFAULT_UBD { \
|
||||
.file = NULL, \
|
||||
.serial = NULL, \
|
||||
.count = 0, \
|
||||
.fd = -1, \
|
||||
.size = -1, \
|
||||
.boot_openflags = OPEN_FLAGS, \
|
||||
@ -774,8 +765,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
|
||||
ubd_dev->fd = fd;
|
||||
|
||||
if(ubd_dev->cow.file != NULL){
|
||||
blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
|
||||
|
||||
err = -ENOMEM;
|
||||
ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
|
||||
if(ubd_dev->cow.bitmap == NULL){
|
||||
@ -797,11 +786,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
|
||||
if(err < 0) goto error;
|
||||
ubd_dev->cow.fd = err;
|
||||
}
|
||||
if (ubd_dev->no_trim == 0) {
|
||||
blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
|
||||
blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
|
||||
}
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
|
||||
return 0;
|
||||
error:
|
||||
os_close_file(ubd_dev->fd);
|
||||
@ -851,27 +835,6 @@ static const struct attribute_group *ubd_attr_groups[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int ubd_disk_register(int major, u64 size, int unit,
|
||||
struct gendisk *disk)
|
||||
{
|
||||
disk->major = major;
|
||||
disk->first_minor = unit << UBD_SHIFT;
|
||||
disk->minors = 1 << UBD_SHIFT;
|
||||
disk->fops = &ubd_blops;
|
||||
set_capacity(disk, size / 512);
|
||||
sprintf(disk->disk_name, "ubd%c", 'a' + unit);
|
||||
|
||||
ubd_devs[unit].pdev.id = unit;
|
||||
ubd_devs[unit].pdev.name = DRIVER_NAME;
|
||||
ubd_devs[unit].pdev.dev.release = ubd_device_release;
|
||||
dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
|
||||
platform_device_register(&ubd_devs[unit].pdev);
|
||||
|
||||
disk->private_data = &ubd_devs[unit];
|
||||
disk->queue = ubd_devs[unit].queue;
|
||||
return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
|
||||
}
|
||||
|
||||
#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
|
||||
|
||||
static const struct blk_mq_ops ubd_mq_ops = {
|
||||
@ -881,18 +844,36 @@ static const struct blk_mq_ops ubd_mq_ops = {
|
||||
static int ubd_add(int n, char **error_out)
|
||||
{
|
||||
struct ubd *ubd_dev = &ubd_devs[n];
|
||||
struct queue_limits lim = {
|
||||
.max_segments = MAX_SG,
|
||||
.seg_boundary_mask = PAGE_SIZE - 1,
|
||||
};
|
||||
struct gendisk *disk;
|
||||
int err = 0;
|
||||
|
||||
if(ubd_dev->file == NULL)
|
||||
goto out;
|
||||
|
||||
if (ubd_dev->cow.file)
|
||||
lim.max_hw_sectors = 8 * sizeof(long);
|
||||
if (!ubd_dev->no_trim) {
|
||||
lim.max_hw_discard_sectors = UBD_MAX_REQUEST;
|
||||
lim.max_write_zeroes_sectors = UBD_MAX_REQUEST;
|
||||
}
|
||||
|
||||
err = ubd_file_size(ubd_dev, &ubd_dev->size);
|
||||
if(err < 0){
|
||||
*error_out = "Couldn't determine size of device's file";
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = ubd_open_dev(ubd_dev);
|
||||
if (err) {
|
||||
pr_err("ubd%c: Can't open \"%s\": errno = %d\n",
|
||||
'a' + n, ubd_dev->file, -err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
|
||||
|
||||
ubd_dev->tag_set.ops = &ubd_mq_ops;
|
||||
@ -904,29 +885,43 @@ static int ubd_add(int n, char **error_out)
|
||||
|
||||
err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
|
||||
if (err)
|
||||
goto out;
|
||||
goto out_close;
|
||||
|
||||
disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev);
|
||||
disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_cleanup_tags;
|
||||
}
|
||||
ubd_dev->queue = disk->queue;
|
||||
|
||||
blk_queue_write_cache(ubd_dev->queue, true, false);
|
||||
blk_queue_max_segments(ubd_dev->queue, MAX_SG);
|
||||
blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
|
||||
err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_write_cache(disk->queue, true, false);
|
||||
disk->major = UBD_MAJOR;
|
||||
disk->first_minor = n << UBD_SHIFT;
|
||||
disk->minors = 1 << UBD_SHIFT;
|
||||
disk->fops = &ubd_blops;
|
||||
set_capacity(disk, ubd_dev->size / 512);
|
||||
sprintf(disk->disk_name, "ubd%c", 'a' + n);
|
||||
disk->private_data = ubd_dev;
|
||||
set_disk_ro(disk, !ubd_dev->openflags.w);
|
||||
|
||||
ubd_dev->pdev.id = n;
|
||||
ubd_dev->pdev.name = DRIVER_NAME;
|
||||
ubd_dev->pdev.dev.release = ubd_device_release;
|
||||
dev_set_drvdata(&ubd_dev->pdev.dev, ubd_dev);
|
||||
platform_device_register(&ubd_dev->pdev);
|
||||
|
||||
err = device_add_disk(&ubd_dev->pdev.dev, disk, ubd_attr_groups);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
|
||||
ubd_gendisk[n] = disk;
|
||||
return 0;
|
||||
|
||||
out_cleanup_disk:
|
||||
put_disk(disk);
|
||||
out_cleanup_tags:
|
||||
blk_mq_free_tag_set(&ubd_dev->tag_set);
|
||||
out_close:
|
||||
ubd_close_dev(ubd_dev);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
@ -1012,7 +1007,6 @@ static int ubd_id(char **str, int *start_out, int *end_out)
|
||||
|
||||
static int ubd_remove(int n, char **error_out)
|
||||
{
|
||||
struct gendisk *disk = ubd_gendisk[n];
|
||||
struct ubd *ubd_dev;
|
||||
int err = -ENODEV;
|
||||
|
||||
@ -1023,15 +1017,15 @@ static int ubd_remove(int n, char **error_out)
|
||||
if(ubd_dev->file == NULL)
|
||||
goto out;
|
||||
|
||||
/* you cannot remove a open disk */
|
||||
err = -EBUSY;
|
||||
if(ubd_dev->count > 0)
|
||||
goto out;
|
||||
if (ubd_dev->disk) {
|
||||
/* you cannot remove a open disk */
|
||||
err = -EBUSY;
|
||||
if (disk_openers(ubd_dev->disk))
|
||||
goto out;
|
||||
|
||||
ubd_gendisk[n] = NULL;
|
||||
if(disk != NULL){
|
||||
del_gendisk(disk);
|
||||
put_disk(disk);
|
||||
del_gendisk(ubd_dev->disk);
|
||||
ubd_close_dev(ubd_dev);
|
||||
put_disk(ubd_dev->disk);
|
||||
}
|
||||
|
||||
err = 0;
|
||||
@ -1153,37 +1147,6 @@ static int __init ubd_driver_init(void){
|
||||
|
||||
device_initcall(ubd_driver_init);
|
||||
|
||||
static int ubd_open(struct gendisk *disk, blk_mode_t mode)
|
||||
{
|
||||
struct ubd *ubd_dev = disk->private_data;
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&ubd_mutex);
|
||||
if(ubd_dev->count == 0){
|
||||
err = ubd_open_dev(ubd_dev);
|
||||
if(err){
|
||||
printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
|
||||
disk->disk_name, ubd_dev->file, -err);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ubd_dev->count++;
|
||||
set_disk_ro(disk, !ubd_dev->openflags.w);
|
||||
out:
|
||||
mutex_unlock(&ubd_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ubd_release(struct gendisk *disk)
|
||||
{
|
||||
struct ubd *ubd_dev = disk->private_data;
|
||||
|
||||
mutex_lock(&ubd_mutex);
|
||||
if(--ubd_dev->count == 0)
|
||||
ubd_close_dev(ubd_dev);
|
||||
mutex_unlock(&ubd_mutex);
|
||||
}
|
||||
|
||||
static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
|
||||
__u64 *cow_offset, unsigned long *bitmap,
|
||||
__u64 bitmap_offset, unsigned long *bitmap_words,
|
||||
|
@ -264,16 +264,18 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
|
||||
struct proc_dir_entry *procdir)
|
||||
{
|
||||
char tmp[2] = { '0' + which, 0 };
|
||||
int err = -ENOMEM;
|
||||
int err;
|
||||
|
||||
dev->fd = -1;
|
||||
dev->filename = NULL;
|
||||
spin_lock_init(&dev->lock);
|
||||
dev->users = 0;
|
||||
|
||||
dev->gd = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!dev->gd)
|
||||
dev->gd = blk_alloc_disk(NULL, NUMA_NO_NODE);
|
||||
if (IS_ERR(dev->gd)) {
|
||||
err = PTR_ERR(dev->gd);
|
||||
goto out;
|
||||
}
|
||||
dev->gd->major = simdisk_major;
|
||||
dev->gd->first_minor = which;
|
||||
dev->gd->minors = SIMDISK_MINORS;
|
||||
|
@ -383,7 +383,7 @@ void __init bdev_cache_init(void)
|
||||
|
||||
bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
|
||||
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
|
||||
SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
|
||||
SLAB_ACCOUNT|SLAB_PANIC),
|
||||
init_once);
|
||||
err = register_filesystem(&bd_type);
|
||||
if (err)
|
||||
|
@ -127,7 +127,7 @@ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
|
||||
if (!bfqg_stats_waiting(stats))
|
||||
return;
|
||||
|
||||
now = ktime_get_ns();
|
||||
now = blk_time_get_ns();
|
||||
if (now > stats->start_group_wait_time)
|
||||
bfq_stat_add(&stats->group_wait_time,
|
||||
now - stats->start_group_wait_time);
|
||||
@ -144,7 +144,7 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
|
||||
return;
|
||||
if (bfqg == curr_bfqg)
|
||||
return;
|
||||
stats->start_group_wait_time = ktime_get_ns();
|
||||
stats->start_group_wait_time = blk_time_get_ns();
|
||||
bfqg_stats_mark_waiting(stats);
|
||||
}
|
||||
|
||||
@ -156,7 +156,7 @@ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
|
||||
if (!bfqg_stats_empty(stats))
|
||||
return;
|
||||
|
||||
now = ktime_get_ns();
|
||||
now = blk_time_get_ns();
|
||||
if (now > stats->start_empty_time)
|
||||
bfq_stat_add(&stats->empty_time,
|
||||
now - stats->start_empty_time);
|
||||
@ -183,7 +183,7 @@ void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
|
||||
if (bfqg_stats_empty(stats))
|
||||
return;
|
||||
|
||||
stats->start_empty_time = ktime_get_ns();
|
||||
stats->start_empty_time = blk_time_get_ns();
|
||||
bfqg_stats_mark_empty(stats);
|
||||
}
|
||||
|
||||
@ -192,7 +192,7 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
|
||||
struct bfqg_stats *stats = &bfqg->stats;
|
||||
|
||||
if (bfqg_stats_idling(stats)) {
|
||||
u64 now = ktime_get_ns();
|
||||
u64 now = blk_time_get_ns();
|
||||
|
||||
if (now > stats->start_idle_time)
|
||||
bfq_stat_add(&stats->idle_time,
|
||||
@ -205,7 +205,7 @@ void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
|
||||
{
|
||||
struct bfqg_stats *stats = &bfqg->stats;
|
||||
|
||||
stats->start_idle_time = ktime_get_ns();
|
||||
stats->start_idle_time = blk_time_get_ns();
|
||||
bfqg_stats_mark_idling(stats);
|
||||
}
|
||||
|
||||
@ -242,7 +242,7 @@ void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
|
||||
u64 io_start_time_ns, blk_opf_t opf)
|
||||
{
|
||||
struct bfqg_stats *stats = &bfqg->stats;
|
||||
u64 now = ktime_get_ns();
|
||||
u64 now = blk_time_get_ns();
|
||||
|
||||
if (now > io_start_time_ns)
|
||||
blkg_rwstat_add(&stats->service_time, opf,
|
||||
|
@ -1005,7 +1005,7 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq,
|
||||
|
||||
rq = rq_entry_fifo(bfqq->fifo.next);
|
||||
|
||||
if (rq == last || ktime_get_ns() < rq->fifo_time)
|
||||
if (rq == last || blk_time_get_ns() < rq->fifo_time)
|
||||
return NULL;
|
||||
|
||||
bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq);
|
||||
@ -1829,7 +1829,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
|
||||
* bfq_bfqq_update_budg_for_activation for
|
||||
* details on the usage of the next variable.
|
||||
*/
|
||||
arrived_in_time = ktime_get_ns() <=
|
||||
arrived_in_time = blk_time_get_ns() <=
|
||||
bfqq->ttime.last_end_request +
|
||||
bfqd->bfq_slice_idle * 3;
|
||||
unsigned int act_idx = bfq_actuator_index(bfqd, rq->bio);
|
||||
@ -2208,7 +2208,7 @@ static void bfq_add_request(struct request *rq)
|
||||
struct request *next_rq, *prev;
|
||||
unsigned int old_wr_coeff = bfqq->wr_coeff;
|
||||
bool interactive = false;
|
||||
u64 now_ns = ktime_get_ns();
|
||||
u64 now_ns = blk_time_get_ns();
|
||||
|
||||
bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
|
||||
bfqq->queued[rq_is_sync(rq)]++;
|
||||
@ -2262,7 +2262,7 @@ static void bfq_add_request(struct request *rq)
|
||||
bfqd->rqs_injected && bfqd->tot_rq_in_driver > 0)) &&
|
||||
time_is_before_eq_jiffies(bfqq->decrease_time_jif +
|
||||
msecs_to_jiffies(10))) {
|
||||
bfqd->last_empty_occupied_ns = ktime_get_ns();
|
||||
bfqd->last_empty_occupied_ns = blk_time_get_ns();
|
||||
/*
|
||||
* Start the state machine for measuring the
|
||||
* total service time of rq: setting
|
||||
@ -3294,7 +3294,7 @@ static void bfq_set_budget_timeout(struct bfq_data *bfqd,
|
||||
else
|
||||
timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
|
||||
|
||||
bfqd->last_budget_start = ktime_get();
|
||||
bfqd->last_budget_start = blk_time_get();
|
||||
|
||||
bfqq->budget_timeout = jiffies +
|
||||
bfqd->bfq_timeout * timeout_coeff;
|
||||
@ -3394,7 +3394,7 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
|
||||
else if (bfqq->wr_coeff > 1)
|
||||
sl = max_t(u32, sl, 20ULL * NSEC_PER_MSEC);
|
||||
|
||||
bfqd->last_idling_start = ktime_get();
|
||||
bfqd->last_idling_start = blk_time_get();
|
||||
bfqd->last_idling_start_jiffies = jiffies;
|
||||
|
||||
hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl),
|
||||
@ -3433,7 +3433,7 @@ static void bfq_reset_rate_computation(struct bfq_data *bfqd,
|
||||
struct request *rq)
|
||||
{
|
||||
if (rq != NULL) { /* new rq dispatch now, reset accordingly */
|
||||
bfqd->last_dispatch = bfqd->first_dispatch = ktime_get_ns();
|
||||
bfqd->last_dispatch = bfqd->first_dispatch = blk_time_get_ns();
|
||||
bfqd->peak_rate_samples = 1;
|
||||
bfqd->sequential_samples = 0;
|
||||
bfqd->tot_sectors_dispatched = bfqd->last_rq_max_size =
|
||||
@ -3590,7 +3590,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
|
||||
*/
|
||||
static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq)
|
||||
{
|
||||
u64 now_ns = ktime_get_ns();
|
||||
u64 now_ns = blk_time_get_ns();
|
||||
|
||||
if (bfqd->peak_rate_samples == 0) { /* first dispatch */
|
||||
bfq_log(bfqd, "update_peak_rate: goto reset, samples %d",
|
||||
@ -4162,7 +4162,7 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
if (compensate)
|
||||
delta_ktime = bfqd->last_idling_start;
|
||||
else
|
||||
delta_ktime = ktime_get();
|
||||
delta_ktime = blk_time_get();
|
||||
delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start);
|
||||
delta_usecs = ktime_to_us(delta_ktime);
|
||||
|
||||
@ -5591,7 +5591,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
struct bfq_io_cq *bic, pid_t pid, int is_sync,
|
||||
unsigned int act_idx)
|
||||
{
|
||||
u64 now_ns = ktime_get_ns();
|
||||
u64 now_ns = blk_time_get_ns();
|
||||
|
||||
bfqq->actuator_idx = act_idx;
|
||||
RB_CLEAR_NODE(&bfqq->entity.rb_node);
|
||||
@ -5903,7 +5903,7 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd,
|
||||
*/
|
||||
if (bfqq->dispatched || bfq_bfqq_busy(bfqq))
|
||||
return;
|
||||
elapsed = ktime_get_ns() - bfqq->ttime.last_end_request;
|
||||
elapsed = blk_time_get_ns() - bfqq->ttime.last_end_request;
|
||||
elapsed = min_t(u64, elapsed, 2ULL * bfqd->bfq_slice_idle);
|
||||
|
||||
ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
|
||||
@ -6194,7 +6194,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
|
||||
bfq_add_request(rq);
|
||||
idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq);
|
||||
|
||||
rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
|
||||
rq->fifo_time = blk_time_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
|
||||
list_add_tail(&rq->queuelist, &bfqq->fifo);
|
||||
|
||||
bfq_rq_enqueued(bfqd, bfqq, rq);
|
||||
@ -6370,7 +6370,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
|
||||
bfq_weights_tree_remove(bfqq);
|
||||
}
|
||||
|
||||
now_ns = ktime_get_ns();
|
||||
now_ns = blk_time_get_ns();
|
||||
|
||||
bfqq->ttime.last_end_request = now_ns;
|
||||
|
||||
@ -6585,7 +6585,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
|
||||
static void bfq_update_inject_limit(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq)
|
||||
{
|
||||
u64 tot_time_ns = ktime_get_ns() - bfqd->last_empty_occupied_ns;
|
||||
u64 tot_time_ns = blk_time_get_ns() - bfqd->last_empty_occupied_ns;
|
||||
unsigned int old_limit = bfqq->inject_limit;
|
||||
|
||||
if (bfqq->last_serv_time_ns > 0 && bfqd->rqs_injected) {
|
||||
|
@ -395,6 +395,7 @@ static blk_status_t bio_integrity_process(struct bio *bio,
|
||||
iter.tuple_size = bi->tuple_size;
|
||||
iter.seed = proc_iter->bi_sector;
|
||||
iter.prot_buf = bvec_virt(bip->bip_vec);
|
||||
iter.pi_offset = bi->pi_offset;
|
||||
|
||||
__bio_for_each_segment(bv, bio, bviter, *proc_iter) {
|
||||
void *kaddr = bvec_kmap_local(&bv);
|
||||
|
45
block/bio.c
45
block/bio.c
@ -16,7 +16,6 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/blk-crypto.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
@ -763,29 +762,31 @@ static inline void bio_put_percpu_cache(struct bio *bio)
|
||||
struct bio_alloc_cache *cache;
|
||||
|
||||
cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
|
||||
if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX) {
|
||||
put_cpu();
|
||||
bio_free(bio);
|
||||
return;
|
||||
}
|
||||
if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX)
|
||||
goto out_free;
|
||||
|
||||
bio_uninit(bio);
|
||||
|
||||
if ((bio->bi_opf & REQ_POLLED) && !WARN_ON_ONCE(in_interrupt())) {
|
||||
if (in_task()) {
|
||||
bio_uninit(bio);
|
||||
bio->bi_next = cache->free_list;
|
||||
/* Not necessary but helps not to iopoll already freed bios */
|
||||
bio->bi_bdev = NULL;
|
||||
cache->free_list = bio;
|
||||
cache->nr++;
|
||||
} else {
|
||||
unsigned long flags;
|
||||
} else if (in_hardirq()) {
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
local_irq_save(flags);
|
||||
bio_uninit(bio);
|
||||
bio->bi_next = cache->free_list_irq;
|
||||
cache->free_list_irq = bio;
|
||||
cache->nr_irq++;
|
||||
local_irq_restore(flags);
|
||||
} else {
|
||||
goto out_free;
|
||||
}
|
||||
put_cpu();
|
||||
return;
|
||||
out_free:
|
||||
put_cpu();
|
||||
bio_free(bio);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1154,7 +1155,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
|
||||
|
||||
bio_for_each_folio_all(fi, bio) {
|
||||
struct page *page;
|
||||
size_t done = 0;
|
||||
size_t nr_pages;
|
||||
|
||||
if (mark_dirty) {
|
||||
folio_lock(fi.folio);
|
||||
@ -1162,10 +1163,11 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
|
||||
folio_unlock(fi.folio);
|
||||
}
|
||||
page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
|
||||
nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE -
|
||||
fi.offset / PAGE_SIZE + 1;
|
||||
do {
|
||||
bio_release_page(bio, page++);
|
||||
done += PAGE_SIZE;
|
||||
} while (done < fi.length);
|
||||
} while (--nr_pages != 0);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__bio_release_pages);
|
||||
@ -1371,21 +1373,12 @@ int submit_bio_wait(struct bio *bio)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK_MAP(done,
|
||||
bio->bi_bdev->bd_disk->lockdep_map);
|
||||
unsigned long hang_check;
|
||||
|
||||
bio->bi_private = &done;
|
||||
bio->bi_end_io = submit_bio_wait_endio;
|
||||
bio->bi_opf |= REQ_SYNC;
|
||||
submit_bio(bio);
|
||||
|
||||
/* Prevent hang_check timer from firing at us during very long I/O */
|
||||
hang_check = sysctl_hung_task_timeout_secs;
|
||||
if (hang_check)
|
||||
while (!wait_for_completion_io_timeout(&done,
|
||||
hang_check * (HZ/2)))
|
||||
;
|
||||
else
|
||||
wait_for_completion_io(&done);
|
||||
blk_wait_io(&done);
|
||||
|
||||
return blk_status_to_errno(bio->bi_status);
|
||||
}
|
||||
|
@ -1846,7 +1846,7 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
|
||||
{
|
||||
unsigned long pflags;
|
||||
bool clamp;
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
u64 now = blk_time_get_ns();
|
||||
u64 exp;
|
||||
u64 delay_nsec = 0;
|
||||
int tok;
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/llist.h>
|
||||
#include "blk.h"
|
||||
|
||||
struct blkcg_gq;
|
||||
struct blkg_policy_data;
|
||||
|
@ -394,24 +394,34 @@ static void blk_timeout_work(struct work_struct *work)
|
||||
{
|
||||
}
|
||||
|
||||
struct request_queue *blk_alloc_queue(int node_id)
|
||||
struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int error;
|
||||
|
||||
q = kmem_cache_alloc_node(blk_requestq_cachep, GFP_KERNEL | __GFP_ZERO,
|
||||
node_id);
|
||||
if (!q)
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
q->last_merge = NULL;
|
||||
|
||||
q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL);
|
||||
if (q->id < 0)
|
||||
if (q->id < 0) {
|
||||
error = q->id;
|
||||
goto fail_q;
|
||||
}
|
||||
|
||||
q->stats = blk_alloc_queue_stats();
|
||||
if (!q->stats)
|
||||
if (!q->stats) {
|
||||
error = -ENOMEM;
|
||||
goto fail_id;
|
||||
}
|
||||
|
||||
error = blk_set_default_limits(lim);
|
||||
if (error)
|
||||
goto fail_stats;
|
||||
q->limits = *lim;
|
||||
|
||||
q->node = node_id;
|
||||
|
||||
@ -425,6 +435,7 @@ struct request_queue *blk_alloc_queue(int node_id)
|
||||
mutex_init(&q->debugfs_mutex);
|
||||
mutex_init(&q->sysfs_lock);
|
||||
mutex_init(&q->sysfs_dir_lock);
|
||||
mutex_init(&q->limits_lock);
|
||||
mutex_init(&q->rq_qos_mutex);
|
||||
spin_lock_init(&q->queue_lock);
|
||||
|
||||
@ -435,12 +446,12 @@ struct request_queue *blk_alloc_queue(int node_id)
|
||||
* Init percpu_ref in atomic mode so that it's faster to shutdown.
|
||||
* See blk_register_queue() for details.
|
||||
*/
|
||||
if (percpu_ref_init(&q->q_usage_counter,
|
||||
error = percpu_ref_init(&q->q_usage_counter,
|
||||
blk_queue_usage_counter_release,
|
||||
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
|
||||
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
|
||||
if (error)
|
||||
goto fail_stats;
|
||||
|
||||
blk_set_default_limits(&q->limits);
|
||||
q->nr_requests = BLKDEV_DEFAULT_RQ;
|
||||
|
||||
return q;
|
||||
@ -451,7 +462,7 @@ struct request_queue *blk_alloc_queue(int node_id)
|
||||
ida_free(&blk_queue_ida, q->id);
|
||||
fail_q:
|
||||
kmem_cache_free(blk_requestq_cachep, q);
|
||||
return NULL;
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1083,6 +1094,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
|
||||
if (tsk->plug)
|
||||
return;
|
||||
|
||||
plug->cur_ktime = 0;
|
||||
plug->mq_list = NULL;
|
||||
plug->cached_rq = NULL;
|
||||
plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
|
||||
@ -1182,6 +1194,8 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
|
||||
*/
|
||||
if (unlikely(!rq_list_empty(plug->cached_rq)))
|
||||
blk_mq_free_plug_rqs(plug);
|
||||
|
||||
current->flags &= ~PF_BLOCK_TS;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1229,8 +1243,7 @@ int __init blk_dev_init(void)
|
||||
if (!kblockd_workqueue)
|
||||
panic("Failed to create kblockd\n");
|
||||
|
||||
blk_requestq_cachep = kmem_cache_create("request_queue",
|
||||
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
|
||||
blk_requestq_cachep = KMEM_CACHE(request_queue, SLAB_PANIC);
|
||||
|
||||
blk_debugfs_root = debugfs_create_dir("block", NULL);
|
||||
|
||||
|
@ -143,7 +143,7 @@ static void blk_account_io_flush(struct request *rq)
|
||||
part_stat_lock();
|
||||
part_stat_inc(part, ios[STAT_FLUSH]);
|
||||
part_stat_add(part, nsecs[STAT_FLUSH],
|
||||
ktime_get_ns() - rq->start_time_ns);
|
||||
blk_time_get_ns() - rq->start_time_ns);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
|
@ -370,6 +370,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
|
||||
bi->profile = template->profile ? template->profile : &nop_profile;
|
||||
bi->tuple_size = template->tuple_size;
|
||||
bi->tag_size = template->tag_size;
|
||||
bi->pi_offset = template->pi_offset;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
|
||||
|
||||
|
@ -829,7 +829,7 @@ static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk)
|
||||
|
||||
/* step up/down based on the vrate */
|
||||
vrate_pct = div64_u64(ioc->vtime_base_rate * 100, VTIME_PER_USEC);
|
||||
now_ns = ktime_get_ns();
|
||||
now_ns = blk_time_get_ns();
|
||||
|
||||
if (p->too_fast_vrate_pct && p->too_fast_vrate_pct <= vrate_pct) {
|
||||
if (!ioc->autop_too_fast_at)
|
||||
@ -1044,7 +1044,7 @@ static void ioc_now(struct ioc *ioc, struct ioc_now *now)
|
||||
unsigned seq;
|
||||
u64 vrate;
|
||||
|
||||
now->now_ns = ktime_get();
|
||||
now->now_ns = blk_time_get_ns();
|
||||
now->now = ktime_to_us(now->now_ns);
|
||||
vrate = atomic64_read(&ioc->vtime_rate);
|
||||
|
||||
@ -2817,7 +2817,7 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
|
||||
return;
|
||||
}
|
||||
|
||||
on_q_ns = ktime_get_ns() - rq->alloc_time_ns;
|
||||
on_q_ns = blk_time_get_ns() - rq->alloc_time_ns;
|
||||
rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
|
||||
size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
|
||||
|
||||
@ -2900,7 +2900,7 @@ static int blk_iocost_init(struct gendisk *disk)
|
||||
ioc->vtime_base_rate = VTIME_PER_USEC;
|
||||
atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC);
|
||||
seqcount_spinlock_init(&ioc->period_seqcount, &ioc->lock);
|
||||
ioc->period_at = ktime_to_us(ktime_get());
|
||||
ioc->period_at = ktime_to_us(blk_time_get());
|
||||
atomic64_set(&ioc->cur_period, 0);
|
||||
atomic_set(&ioc->hweight_gen, 0);
|
||||
|
||||
|
@ -609,7 +609,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
|
||||
if (!iolat->blkiolat->enabled)
|
||||
return;
|
||||
|
||||
now = ktime_to_ns(ktime_get());
|
||||
now = blk_time_get_ns();
|
||||
while (blkg && blkg->parent) {
|
||||
iolat = blkg_to_lat(blkg);
|
||||
if (!iolat) {
|
||||
@ -661,7 +661,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
|
||||
struct blk_iolatency *blkiolat = from_timer(blkiolat, t, timer);
|
||||
struct blkcg_gq *blkg;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
u64 now = blk_time_get_ns();
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(blkg, pos_css,
|
||||
@ -985,7 +985,7 @@ static void iolatency_pd_init(struct blkg_policy_data *pd)
|
||||
struct blkcg_gq *blkg = lat_to_blkg(iolat);
|
||||
struct rq_qos *rqos = iolat_rq_qos(blkg->q);
|
||||
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
u64 now = blk_time_get_ns();
|
||||
int cpu;
|
||||
|
||||
if (blk_queue_nonrot(blkg->q))
|
||||
|
@ -35,6 +35,26 @@ static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
|
||||
return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
static void await_bio_endio(struct bio *bio)
|
||||
{
|
||||
complete(bio->bi_private);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* await_bio_chain - ends @bio and waits for every chained bio to complete
|
||||
*/
|
||||
static void await_bio_chain(struct bio *bio)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK_MAP(done,
|
||||
bio->bi_bdev->bd_disk->lockdep_map);
|
||||
|
||||
bio->bi_private = &done;
|
||||
bio->bi_end_io = await_bio_endio;
|
||||
bio_endio(bio);
|
||||
blk_wait_io(&done);
|
||||
}
|
||||
|
||||
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
|
||||
{
|
||||
@ -77,6 +97,10 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
* is disabled.
|
||||
*/
|
||||
cond_resched();
|
||||
if (fatal_signal_pending(current)) {
|
||||
await_bio_chain(bio);
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
|
||||
*biop = bio;
|
||||
@ -120,32 +144,33 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
|
||||
struct bio **biop, unsigned flags)
|
||||
{
|
||||
struct bio *bio = *biop;
|
||||
unsigned int max_write_zeroes_sectors;
|
||||
unsigned int max_sectors;
|
||||
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
/* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
|
||||
max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
|
||||
/* Ensure that max_sectors doesn't overflow bi_size */
|
||||
max_sectors = bdev_write_zeroes_sectors(bdev);
|
||||
|
||||
if (max_write_zeroes_sectors == 0)
|
||||
if (max_sectors == 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
while (nr_sects) {
|
||||
unsigned int len = min_t(sector_t, nr_sects, max_sectors);
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
if (flags & BLKDEV_ZERO_NOUNMAP)
|
||||
bio->bi_opf |= REQ_NOUNMAP;
|
||||
|
||||
if (nr_sects > max_write_zeroes_sectors) {
|
||||
bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
|
||||
nr_sects -= max_write_zeroes_sectors;
|
||||
sector += max_write_zeroes_sectors;
|
||||
} else {
|
||||
bio->bi_iter.bi_size = nr_sects << 9;
|
||||
nr_sects = 0;
|
||||
}
|
||||
bio->bi_iter.bi_size = len << SECTOR_SHIFT;
|
||||
nr_sects -= len;
|
||||
sector += len;
|
||||
cond_resched();
|
||||
if (fatal_signal_pending(current)) {
|
||||
await_bio_chain(bio);
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
|
||||
*biop = bio;
|
||||
@ -190,6 +215,10 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
|
||||
break;
|
||||
}
|
||||
cond_resched();
|
||||
if (fatal_signal_pending(current)) {
|
||||
await_bio_chain(bio);
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
|
||||
*biop = bio;
|
||||
@ -280,7 +309,7 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
bio_put(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
if (ret && try_write_zeroes) {
|
||||
if (ret && ret != -EINTR && try_write_zeroes) {
|
||||
if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
|
||||
try_write_zeroes = false;
|
||||
goto retry;
|
||||
@ -322,7 +351,7 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
|
||||
return -EPERM;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
for (;;) {
|
||||
while (nr_sects) {
|
||||
unsigned int len = min_t(sector_t, nr_sects, max_sectors);
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
|
||||
@ -331,12 +360,17 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
|
||||
|
||||
sector += len;
|
||||
nr_sects -= len;
|
||||
if (!nr_sects) {
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
cond_resched();
|
||||
if (fatal_signal_pending(current)) {
|
||||
await_bio_chain(bio);
|
||||
ret = -EINTR;
|
||||
bio = NULL;
|
||||
break;
|
||||
}
|
||||
cond_resched();
|
||||
}
|
||||
if (bio) {
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
|
186
block/blk-mq.c
186
block/blk-mq.c
@ -21,7 +21,6 @@
|
||||
#include <linux/llist.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/sched/topology.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/delay.h>
|
||||
@ -322,7 +321,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
|
||||
RB_CLEAR_NODE(&rq->rb_node);
|
||||
rq->tag = BLK_MQ_NO_TAG;
|
||||
rq->internal_tag = BLK_MQ_NO_TAG;
|
||||
rq->start_time_ns = ktime_get_ns();
|
||||
rq->start_time_ns = blk_time_get_ns();
|
||||
rq->part = NULL;
|
||||
blk_crypto_rq_set_defaults(rq);
|
||||
}
|
||||
@ -332,7 +331,7 @@ EXPORT_SYMBOL(blk_rq_init);
|
||||
static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
|
||||
{
|
||||
if (blk_mq_need_time_stamp(rq))
|
||||
rq->start_time_ns = ktime_get_ns();
|
||||
rq->start_time_ns = blk_time_get_ns();
|
||||
else
|
||||
rq->start_time_ns = 0;
|
||||
|
||||
@ -443,7 +442,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
|
||||
|
||||
/* alloc_time includes depth and tag waits */
|
||||
if (blk_queue_rq_alloc_time(q))
|
||||
alloc_time_ns = ktime_get_ns();
|
||||
alloc_time_ns = blk_time_get_ns();
|
||||
|
||||
if (data->cmd_flags & REQ_NOWAIT)
|
||||
data->flags |= BLK_MQ_REQ_NOWAIT;
|
||||
@ -628,7 +627,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
|
||||
|
||||
/* alloc_time includes depth and tag waits */
|
||||
if (blk_queue_rq_alloc_time(q))
|
||||
alloc_time_ns = ktime_get_ns();
|
||||
alloc_time_ns = blk_time_get_ns();
|
||||
|
||||
/*
|
||||
* If the tag allocator sleeps we could get an allocation for a
|
||||
@ -1041,7 +1040,7 @@ static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
|
||||
inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
|
||||
{
|
||||
if (blk_mq_need_time_stamp(rq))
|
||||
__blk_mq_end_request_acct(rq, ktime_get_ns());
|
||||
__blk_mq_end_request_acct(rq, blk_time_get_ns());
|
||||
|
||||
blk_mq_finish_request(rq);
|
||||
|
||||
@ -1084,7 +1083,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
|
||||
u64 now = 0;
|
||||
|
||||
if (iob->need_ts)
|
||||
now = ktime_get_ns();
|
||||
now = blk_time_get_ns();
|
||||
|
||||
while ((rq = rq_list_pop(&iob->req_list)) != NULL) {
|
||||
prefetch(rq->bio);
|
||||
@ -1167,10 +1166,11 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
|
||||
if (force_irqthreads())
|
||||
return false;
|
||||
|
||||
/* same CPU or cache domain? Complete locally */
|
||||
/* same CPU or cache domain and capacity? Complete locally */
|
||||
if (cpu == rq->mq_ctx->cpu ||
|
||||
(!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
|
||||
cpus_share_cache(cpu, rq->mq_ctx->cpu)))
|
||||
cpus_share_cache(cpu, rq->mq_ctx->cpu) &&
|
||||
cpus_equal_capacity(cpu, rq->mq_ctx->cpu)))
|
||||
return false;
|
||||
|
||||
/* don't try to IPI to an offline CPU */
|
||||
@ -1254,7 +1254,7 @@ void blk_mq_start_request(struct request *rq)
|
||||
|
||||
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) &&
|
||||
!blk_rq_is_passthrough(rq)) {
|
||||
rq->io_start_time_ns = ktime_get_ns();
|
||||
rq->io_start_time_ns = blk_time_get_ns();
|
||||
rq->stats_sectors = blk_rq_sectors(rq);
|
||||
rq->rq_flags |= RQF_STATS;
|
||||
rq_qos_issue(q, rq);
|
||||
@ -1409,22 +1409,10 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
|
||||
blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
|
||||
if (blk_rq_is_poll(rq)) {
|
||||
if (blk_rq_is_poll(rq))
|
||||
blk_rq_poll_completion(rq, &wait.done);
|
||||
} else {
|
||||
/*
|
||||
* Prevent hang_check timer from firing at us during very long
|
||||
* I/O
|
||||
*/
|
||||
unsigned long hang_check = sysctl_hung_task_timeout_secs;
|
||||
|
||||
if (hang_check)
|
||||
while (!wait_for_completion_io_timeout(&wait.done,
|
||||
hang_check * (HZ/2)))
|
||||
;
|
||||
else
|
||||
wait_for_completion_io(&wait.done);
|
||||
}
|
||||
else
|
||||
blk_wait_io(&wait.done);
|
||||
|
||||
return wait.ret;
|
||||
}
|
||||
@ -2892,9 +2880,6 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
|
||||
};
|
||||
struct request *rq;
|
||||
|
||||
if (blk_mq_attempt_bio_merge(q, bio, nsegs))
|
||||
return NULL;
|
||||
|
||||
rq_qos_throttle(q, bio);
|
||||
|
||||
if (plug) {
|
||||
@ -2913,23 +2898,32 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we can use the passed on request for submitting the passed in bio,
|
||||
* and remove it from the request list if it can be used.
|
||||
* Check if there is a suitable cached request and return it.
|
||||
*/
|
||||
static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
|
||||
static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
|
||||
struct request_queue *q, blk_opf_t opf)
|
||||
{
|
||||
enum hctx_type type = blk_mq_get_hctx_type(opf);
|
||||
struct request *rq;
|
||||
|
||||
if (!plug)
|
||||
return NULL;
|
||||
rq = rq_list_peek(&plug->cached_rq);
|
||||
if (!rq || rq->q != q)
|
||||
return NULL;
|
||||
if (type != rq->mq_hctx->type &&
|
||||
(type != HCTX_TYPE_READ || rq->mq_hctx->type != HCTX_TYPE_DEFAULT))
|
||||
return NULL;
|
||||
if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
|
||||
return NULL;
|
||||
return rq;
|
||||
}
|
||||
|
||||
static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
|
||||
struct bio *bio)
|
||||
{
|
||||
enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
|
||||
enum hctx_type hctx_type = rq->mq_hctx->type;
|
||||
|
||||
WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
|
||||
|
||||
if (type != hctx_type &&
|
||||
!(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
|
||||
return false;
|
||||
if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If any qos ->throttle() end up blocking, we will have flushed the
|
||||
* plug and hence killed the cached_rq list as well. Pop this entry
|
||||
@ -2941,7 +2935,6 @@ static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
|
||||
blk_mq_rq_time_init(rq, 0);
|
||||
rq->cmd_flags = bio->bi_opf;
|
||||
INIT_LIST_HEAD(&rq->queuelist);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2963,50 +2956,43 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
struct blk_plug *plug = blk_mq_plug(bio);
|
||||
const int is_sync = op_is_sync(bio->bi_opf);
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct request *rq = NULL;
|
||||
unsigned int nr_segs = 1;
|
||||
struct request *rq;
|
||||
blk_status_t ret;
|
||||
|
||||
bio = blk_queue_bounce(bio, q);
|
||||
|
||||
if (plug) {
|
||||
rq = rq_list_peek(&plug->cached_rq);
|
||||
if (rq && rq->q != q)
|
||||
rq = NULL;
|
||||
}
|
||||
if (rq) {
|
||||
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
|
||||
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
|
||||
if (!bio)
|
||||
return;
|
||||
}
|
||||
if (!bio_integrity_prep(bio))
|
||||
return;
|
||||
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
|
||||
return;
|
||||
if (blk_mq_use_cached_rq(rq, plug, bio))
|
||||
goto done;
|
||||
percpu_ref_get(&q->q_usage_counter);
|
||||
} else {
|
||||
/*
|
||||
* If the plug has a cached request for this queue, try use it.
|
||||
*
|
||||
* The cached request already holds a q_usage_counter reference and we
|
||||
* don't have to acquire a new one if we use it.
|
||||
*/
|
||||
rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf);
|
||||
if (!rq) {
|
||||
if (unlikely(bio_queue_enter(bio)))
|
||||
return;
|
||||
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
|
||||
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
|
||||
if (!bio)
|
||||
goto fail;
|
||||
}
|
||||
if (!bio_integrity_prep(bio))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
|
||||
if (unlikely(!rq)) {
|
||||
fail:
|
||||
blk_queue_exit(q);
|
||||
return;
|
||||
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
|
||||
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
|
||||
if (!bio)
|
||||
goto queue_exit;
|
||||
}
|
||||
if (!bio_integrity_prep(bio))
|
||||
goto queue_exit;
|
||||
|
||||
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
|
||||
goto queue_exit;
|
||||
|
||||
if (!rq) {
|
||||
rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
|
||||
if (unlikely(!rq))
|
||||
goto queue_exit;
|
||||
} else {
|
||||
blk_mq_use_cached_rq(rq, plug, bio);
|
||||
}
|
||||
|
||||
done:
|
||||
trace_block_getrq(bio);
|
||||
|
||||
rq_qos_track(q, rq, bio);
|
||||
@ -3037,6 +3023,15 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
} else {
|
||||
blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq));
|
||||
}
|
||||
return;
|
||||
|
||||
queue_exit:
|
||||
/*
|
||||
* Don't drop the queue reference if we were trying to use a cached
|
||||
* request and thus didn't acquire one.
|
||||
*/
|
||||
if (!rq)
|
||||
blk_queue_exit(q);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_MQ_STACKING
|
||||
@ -3098,7 +3093,7 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
|
||||
blk_mq_run_dispatch_ops(q,
|
||||
ret = blk_mq_request_issue_directly(rq, true));
|
||||
if (ret)
|
||||
blk_account_io_done(rq, ktime_get_ns());
|
||||
blk_account_io_done(rq, blk_time_get_ns());
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
|
||||
@ -4078,15 +4073,16 @@ void blk_mq_release(struct request_queue *q)
|
||||
blk_mq_sysfs_deinit(q);
|
||||
}
|
||||
|
||||
static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
|
||||
void *queuedata)
|
||||
struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
|
||||
struct queue_limits *lim, void *queuedata)
|
||||
{
|
||||
struct queue_limits default_lim = { };
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
|
||||
q = blk_alloc_queue(set->numa_node);
|
||||
if (!q)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
q = blk_alloc_queue(lim ? lim : &default_lim, set->numa_node);
|
||||
if (IS_ERR(q))
|
||||
return q;
|
||||
q->queuedata = queuedata;
|
||||
ret = blk_mq_init_allocated_queue(set, q);
|
||||
if (ret) {
|
||||
@ -4095,20 +4091,15 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
|
||||
{
|
||||
return blk_mq_init_queue_data(set, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_init_queue);
|
||||
EXPORT_SYMBOL(blk_mq_alloc_queue);
|
||||
|
||||
/**
|
||||
* blk_mq_destroy_queue - shutdown a request queue
|
||||
* @q: request queue to shutdown
|
||||
*
|
||||
* This shuts down a request queue allocated by blk_mq_init_queue(). All future
|
||||
* This shuts down a request queue allocated by blk_mq_alloc_queue(). All future
|
||||
* requests will be failed with -ENODEV. The caller is responsible for dropping
|
||||
* the reference from blk_mq_init_queue() by calling blk_put_queue().
|
||||
* the reference from blk_mq_alloc_queue() by calling blk_put_queue().
|
||||
*
|
||||
* Context: can sleep
|
||||
*/
|
||||
@ -4129,13 +4120,14 @@ void blk_mq_destroy_queue(struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_destroy_queue);
|
||||
|
||||
struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
|
||||
struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
|
||||
struct queue_limits *lim, void *queuedata,
|
||||
struct lock_class_key *lkclass)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct gendisk *disk;
|
||||
|
||||
q = blk_mq_init_queue_data(set, queuedata);
|
||||
q = blk_mq_alloc_queue(set, lim, queuedata);
|
||||
if (IS_ERR(q))
|
||||
return ERR_CAST(q);
|
||||
|
||||
@ -4389,7 +4381,7 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
|
||||
if (set->nr_maps == 1)
|
||||
set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
|
||||
|
||||
if (set->ops->map_queues && !is_kdump_kernel()) {
|
||||
if (set->ops->map_queues) {
|
||||
int i;
|
||||
|
||||
/*
|
||||
@ -4488,14 +4480,12 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
||||
|
||||
/*
|
||||
* If a crashdump is active, then we are potentially in a very
|
||||
* memory constrained environment. Limit us to 1 queue and
|
||||
* 64 tags to prevent using too much memory.
|
||||
* memory constrained environment. Limit us to 64 tags to prevent
|
||||
* using too much memory.
|
||||
*/
|
||||
if (is_kdump_kernel()) {
|
||||
set->nr_hw_queues = 1;
|
||||
set->nr_maps = 1;
|
||||
if (is_kdump_kernel())
|
||||
set->queue_depth = min(64U, set->queue_depth);
|
||||
}
|
||||
|
||||
/*
|
||||
* There is no use for more h/w queues than cpus if we just have
|
||||
* a single map
|
||||
@ -4525,7 +4515,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
||||
GFP_KERNEL, set->numa_node);
|
||||
if (!set->map[i].mq_map)
|
||||
goto out_free_mq_map;
|
||||
set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
|
||||
set->map[i].nr_queues = set->nr_hw_queues;
|
||||
}
|
||||
|
||||
blk_mq_update_queue_map(set);
|
||||
|
@ -25,53 +25,22 @@ void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
|
||||
|
||||
/**
|
||||
* blk_set_default_limits - reset limits to default values
|
||||
* @lim: the queue_limits structure to reset
|
||||
*
|
||||
* Description:
|
||||
* Returns a queue_limit struct to its default state.
|
||||
*/
|
||||
void blk_set_default_limits(struct queue_limits *lim)
|
||||
{
|
||||
lim->max_segments = BLK_MAX_SEGMENTS;
|
||||
lim->max_discard_segments = 1;
|
||||
lim->max_integrity_segments = 0;
|
||||
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
|
||||
lim->virt_boundary_mask = 0;
|
||||
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
|
||||
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
|
||||
lim->max_user_sectors = lim->max_dev_sectors = 0;
|
||||
lim->chunk_sectors = 0;
|
||||
lim->max_write_zeroes_sectors = 0;
|
||||
lim->max_zone_append_sectors = 0;
|
||||
lim->max_discard_sectors = 0;
|
||||
lim->max_hw_discard_sectors = 0;
|
||||
lim->max_secure_erase_sectors = 0;
|
||||
lim->discard_granularity = 512;
|
||||
lim->discard_alignment = 0;
|
||||
lim->discard_misaligned = 0;
|
||||
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
|
||||
lim->bounce = BLK_BOUNCE_NONE;
|
||||
lim->alignment_offset = 0;
|
||||
lim->io_opt = 0;
|
||||
lim->misaligned = 0;
|
||||
lim->zoned = false;
|
||||
lim->zone_write_granularity = 0;
|
||||
lim->dma_alignment = 511;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_set_stacking_limits - set default limits for stacking devices
|
||||
* @lim: the queue_limits structure to reset
|
||||
*
|
||||
* Description:
|
||||
* Returns a queue_limit struct to its default state. Should be used
|
||||
* by stacking drivers like DM that have no internal limits.
|
||||
* Prepare queue limits for applying limits from underlying devices using
|
||||
* blk_stack_limits().
|
||||
*/
|
||||
void blk_set_stacking_limits(struct queue_limits *lim)
|
||||
{
|
||||
blk_set_default_limits(lim);
|
||||
memset(lim, 0, sizeof(*lim));
|
||||
lim->logical_block_size = SECTOR_SIZE;
|
||||
lim->physical_block_size = SECTOR_SIZE;
|
||||
lim->io_min = SECTOR_SIZE;
|
||||
lim->discard_granularity = SECTOR_SIZE;
|
||||
lim->dma_alignment = SECTOR_SIZE - 1;
|
||||
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
|
||||
|
||||
/* Inherit limits from component devices */
|
||||
lim->max_segments = USHRT_MAX;
|
||||
@ -82,9 +51,239 @@ void blk_set_stacking_limits(struct queue_limits *lim)
|
||||
lim->max_dev_sectors = UINT_MAX;
|
||||
lim->max_write_zeroes_sectors = UINT_MAX;
|
||||
lim->max_zone_append_sectors = UINT_MAX;
|
||||
lim->max_user_discard_sectors = UINT_MAX;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_stacking_limits);
|
||||
|
||||
static void blk_apply_bdi_limits(struct backing_dev_info *bdi,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
/*
|
||||
* For read-ahead of large files to be effective, we need to read ahead
|
||||
* at least twice the optimal I/O size.
|
||||
*/
|
||||
bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
|
||||
bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
|
||||
}
|
||||
|
||||
static int blk_validate_zoned_limits(struct queue_limits *lim)
|
||||
{
|
||||
if (!lim->zoned) {
|
||||
if (WARN_ON_ONCE(lim->max_open_zones) ||
|
||||
WARN_ON_ONCE(lim->max_active_zones) ||
|
||||
WARN_ON_ONCE(lim->zone_write_granularity) ||
|
||||
WARN_ON_ONCE(lim->max_zone_append_sectors))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)))
|
||||
return -EINVAL;
|
||||
|
||||
if (lim->zone_write_granularity < lim->logical_block_size)
|
||||
lim->zone_write_granularity = lim->logical_block_size;
|
||||
|
||||
if (lim->max_zone_append_sectors) {
|
||||
/*
|
||||
* The Zone Append size is limited by the maximum I/O size
|
||||
* and the zone size given that it can't span zones.
|
||||
*/
|
||||
lim->max_zone_append_sectors =
|
||||
min3(lim->max_hw_sectors,
|
||||
lim->max_zone_append_sectors,
|
||||
lim->chunk_sectors);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the limits in lim are valid, initialize defaults for unset
|
||||
* values, and cap values based on others where needed.
|
||||
*/
|
||||
static int blk_validate_limits(struct queue_limits *lim)
|
||||
{
|
||||
unsigned int max_hw_sectors;
|
||||
|
||||
/*
|
||||
* Unless otherwise specified, default to 512 byte logical blocks and a
|
||||
* physical block size equal to the logical block size.
|
||||
*/
|
||||
if (!lim->logical_block_size)
|
||||
lim->logical_block_size = SECTOR_SIZE;
|
||||
if (lim->physical_block_size < lim->logical_block_size)
|
||||
lim->physical_block_size = lim->logical_block_size;
|
||||
|
||||
/*
|
||||
* The minimum I/O size defaults to the physical block size unless
|
||||
* explicitly overridden.
|
||||
*/
|
||||
if (lim->io_min < lim->physical_block_size)
|
||||
lim->io_min = lim->physical_block_size;
|
||||
|
||||
/*
|
||||
* max_hw_sectors has a somewhat weird default for historical reason,
|
||||
* but driver really should set their own instead of relying on this
|
||||
* value.
|
||||
*
|
||||
* The block layer relies on the fact that every driver can
|
||||
* handle at lest a page worth of data per I/O, and needs the value
|
||||
* aligned to the logical block size.
|
||||
*/
|
||||
if (!lim->max_hw_sectors)
|
||||
lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
|
||||
if (WARN_ON_ONCE(lim->max_hw_sectors < PAGE_SECTORS))
|
||||
return -EINVAL;
|
||||
lim->max_hw_sectors = round_down(lim->max_hw_sectors,
|
||||
lim->logical_block_size >> SECTOR_SHIFT);
|
||||
|
||||
/*
|
||||
* The actual max_sectors value is a complex beast and also takes the
|
||||
* max_dev_sectors value (set by SCSI ULPs) and a user configurable
|
||||
* value into account. The ->max_sectors value is always calculated
|
||||
* from these, so directly setting it won't have any effect.
|
||||
*/
|
||||
max_hw_sectors = min_not_zero(lim->max_hw_sectors,
|
||||
lim->max_dev_sectors);
|
||||
if (lim->max_user_sectors) {
|
||||
if (lim->max_user_sectors > max_hw_sectors ||
|
||||
lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
|
||||
return -EINVAL;
|
||||
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
|
||||
} else {
|
||||
lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP);
|
||||
}
|
||||
lim->max_sectors = round_down(lim->max_sectors,
|
||||
lim->logical_block_size >> SECTOR_SHIFT);
|
||||
|
||||
/*
|
||||
* Random default for the maximum number of segments. Driver should not
|
||||
* rely on this and set their own.
|
||||
*/
|
||||
if (!lim->max_segments)
|
||||
lim->max_segments = BLK_MAX_SEGMENTS;
|
||||
|
||||
lim->max_discard_sectors =
|
||||
min(lim->max_hw_discard_sectors, lim->max_user_discard_sectors);
|
||||
|
||||
if (!lim->max_discard_segments)
|
||||
lim->max_discard_segments = 1;
|
||||
|
||||
if (lim->discard_granularity < lim->physical_block_size)
|
||||
lim->discard_granularity = lim->physical_block_size;
|
||||
|
||||
/*
|
||||
* By default there is no limit on the segment boundary alignment,
|
||||
* but if there is one it can't be smaller than the page size as
|
||||
* that would break all the normal I/O patterns.
|
||||
*/
|
||||
if (!lim->seg_boundary_mask)
|
||||
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
|
||||
if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Devices that require a virtual boundary do not support scatter/gather
|
||||
* I/O natively, but instead require a descriptor list entry for each
|
||||
* page (which might not be identical to the Linux PAGE_SIZE). Because
|
||||
* of that they are not limited by our notion of "segment size".
|
||||
*/
|
||||
if (lim->virt_boundary_mask) {
|
||||
if (WARN_ON_ONCE(lim->max_segment_size &&
|
||||
lim->max_segment_size != UINT_MAX))
|
||||
return -EINVAL;
|
||||
lim->max_segment_size = UINT_MAX;
|
||||
} else {
|
||||
/*
|
||||
* The maximum segment size has an odd historic 64k default that
|
||||
* drivers probably should override. Just like the I/O size we
|
||||
* require drivers to at least handle a full page per segment.
|
||||
*/
|
||||
if (!lim->max_segment_size)
|
||||
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
|
||||
if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We require drivers to at least do logical block aligned I/O, but
|
||||
* historically could not check for that due to the separate calls
|
||||
* to set the limits. Once the transition is finished the check
|
||||
* below should be narrowed down to check the logical block size.
|
||||
*/
|
||||
if (!lim->dma_alignment)
|
||||
lim->dma_alignment = SECTOR_SIZE - 1;
|
||||
if (WARN_ON_ONCE(lim->dma_alignment > PAGE_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
if (lim->alignment_offset) {
|
||||
lim->alignment_offset &= (lim->physical_block_size - 1);
|
||||
lim->misaligned = 0;
|
||||
}
|
||||
|
||||
return blk_validate_zoned_limits(lim);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the default limits for a newly allocated queue. @lim contains the
|
||||
* initial limits set by the driver, which could be no limit in which case
|
||||
* all fields are cleared to zero.
|
||||
*/
|
||||
int blk_set_default_limits(struct queue_limits *lim)
|
||||
{
|
||||
/*
|
||||
* Most defaults are set by capping the bounds in blk_validate_limits,
|
||||
* but max_user_discard_sectors is special and needs an explicit
|
||||
* initialization to the max value here.
|
||||
*/
|
||||
lim->max_user_discard_sectors = UINT_MAX;
|
||||
return blk_validate_limits(lim);
|
||||
}
|
||||
|
||||
/**
|
||||
* queue_limits_commit_update - commit an atomic update of queue limits
|
||||
* @q: queue to update
|
||||
* @lim: limits to apply
|
||||
*
|
||||
* Apply the limits in @lim that were obtained from queue_limits_start_update()
|
||||
* and updated by the caller to @q.
|
||||
*
|
||||
* Returns 0 if successful, else a negative error code.
|
||||
*/
|
||||
int queue_limits_commit_update(struct request_queue *q,
|
||||
struct queue_limits *lim)
|
||||
__releases(q->limits_lock)
|
||||
{
|
||||
int error = blk_validate_limits(lim);
|
||||
|
||||
if (!error) {
|
||||
q->limits = *lim;
|
||||
if (q->disk)
|
||||
blk_apply_bdi_limits(q->disk->bdi, lim);
|
||||
}
|
||||
mutex_unlock(&q->limits_lock);
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(queue_limits_commit_update);
|
||||
|
||||
/**
|
||||
* queue_limits_set - apply queue limits to queue
|
||||
* @q: queue to update
|
||||
* @lim: limits to apply
|
||||
*
|
||||
* Apply the limits in @lim that were freshly initialized to @q.
|
||||
* To update existing limits use queue_limits_start_update() and
|
||||
* queue_limits_commit_update() instead.
|
||||
*
|
||||
* Returns 0 if successful, else a negative error code.
|
||||
*/
|
||||
int queue_limits_set(struct request_queue *q, struct queue_limits *lim)
|
||||
{
|
||||
mutex_lock(&q->limits_lock);
|
||||
return queue_limits_commit_update(q, lim);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(queue_limits_set);
|
||||
|
||||
/**
|
||||
* blk_queue_bounce_limit - set bounce buffer limit for queue
|
||||
* @q: the request queue for the device
|
||||
@ -177,8 +376,11 @@ EXPORT_SYMBOL(blk_queue_chunk_sectors);
|
||||
void blk_queue_max_discard_sectors(struct request_queue *q,
|
||||
unsigned int max_discard_sectors)
|
||||
{
|
||||
q->limits.max_hw_discard_sectors = max_discard_sectors;
|
||||
q->limits.max_discard_sectors = max_discard_sectors;
|
||||
struct queue_limits *lim = &q->limits;
|
||||
|
||||
lim->max_hw_discard_sectors = max_discard_sectors;
|
||||
lim->max_discard_sectors =
|
||||
min(max_discard_sectors, lim->max_user_discard_sectors);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
|
||||
|
||||
@ -393,15 +595,7 @@ EXPORT_SYMBOL(blk_queue_alignment_offset);
|
||||
|
||||
void disk_update_readahead(struct gendisk *disk)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
/*
|
||||
* For read-ahead of large files to be effective, we need to read ahead
|
||||
* at least twice the optimal I/O size.
|
||||
*/
|
||||
disk->bdi->ra_pages =
|
||||
max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
|
||||
disk->bdi->io_pages = queue_max_sectors(q) >> (PAGE_SHIFT - 9);
|
||||
blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_update_readahead);
|
||||
|
||||
@ -689,33 +883,38 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
t->zone_write_granularity = max(t->zone_write_granularity,
|
||||
b->zone_write_granularity);
|
||||
t->zoned = max(t->zoned, b->zoned);
|
||||
if (!t->zoned) {
|
||||
t->zone_write_granularity = 0;
|
||||
t->max_zone_append_sectors = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_stack_limits);
|
||||
|
||||
/**
|
||||
* disk_stack_limits - adjust queue limits for stacked drivers
|
||||
* @disk: MD/DM gendisk (top)
|
||||
* queue_limits_stack_bdev - adjust queue_limits for stacked devices
|
||||
* @t: the stacking driver limits (top device)
|
||||
* @bdev: the underlying block device (bottom)
|
||||
* @offset: offset to beginning of data within component device
|
||||
* @pfx: prefix to use for warnings logged
|
||||
*
|
||||
* Description:
|
||||
* Merges the limits for a top level gendisk and a bottom level
|
||||
* block_device.
|
||||
* This function is used by stacking drivers like MD and DM to ensure
|
||||
* that all component devices have compatible block sizes and
|
||||
* alignments. The stacking driver must provide a queue_limits
|
||||
* struct (top) and then iteratively call the stacking function for
|
||||
* all component (bottom) devices. The stacking function will
|
||||
* attempt to combine the values and ensure proper alignment.
|
||||
*/
|
||||
void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
|
||||
sector_t offset)
|
||||
void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
|
||||
sector_t offset, const char *pfx)
|
||||
{
|
||||
struct request_queue *t = disk->queue;
|
||||
|
||||
if (blk_stack_limits(&t->limits, &bdev_get_queue(bdev)->limits,
|
||||
get_start_sect(bdev) + (offset >> 9)) < 0)
|
||||
if (blk_stack_limits(t, &bdev_get_queue(bdev)->limits,
|
||||
get_start_sect(bdev) + offset))
|
||||
pr_notice("%s: Warning: Device %pg is misaligned\n",
|
||||
disk->disk_name, bdev);
|
||||
|
||||
disk_update_readahead(disk);
|
||||
pfx, bdev);
|
||||
}
|
||||
EXPORT_SYMBOL(disk_stack_limits);
|
||||
EXPORT_SYMBOL_GPL(queue_limits_stack_bdev);
|
||||
|
||||
/**
|
||||
* blk_queue_update_dma_pad - update pad mask
|
||||
|
@ -27,7 +27,7 @@ void blk_rq_stat_init(struct blk_rq_stat *stat)
|
||||
/* src is a per-cpu stat, mean isn't initialized */
|
||||
void blk_rq_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
|
||||
{
|
||||
if (!src->nr_samples)
|
||||
if (dst->nr_samples + src->nr_samples <= dst->nr_samples)
|
||||
return;
|
||||
|
||||
dst->min = min(dst->min, src->min);
|
||||
|
@ -174,23 +174,29 @@ static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_discard_max_store(struct request_queue *q,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
unsigned long max_discard;
|
||||
ssize_t ret = queue_var_store(&max_discard, page, count);
|
||||
unsigned long max_discard_bytes;
|
||||
struct queue_limits lim;
|
||||
ssize_t ret;
|
||||
int err;
|
||||
|
||||
ret = queue_var_store(&max_discard_bytes, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (max_discard & (q->limits.discard_granularity - 1))
|
||||
if (max_discard_bytes & (q->limits.discard_granularity - 1))
|
||||
return -EINVAL;
|
||||
|
||||
max_discard >>= 9;
|
||||
if (max_discard > UINT_MAX)
|
||||
if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
if (max_discard > q->limits.max_hw_discard_sectors)
|
||||
max_discard = q->limits.max_hw_discard_sectors;
|
||||
blk_mq_freeze_queue(q);
|
||||
lim = queue_limits_start_update(q);
|
||||
lim.max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
|
||||
err = queue_limits_commit_update(q, &lim);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
q->limits.max_discard_sectors = max_discard;
|
||||
if (err)
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -226,35 +232,22 @@ static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
|
||||
static ssize_t
|
||||
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
|
||||
{
|
||||
unsigned long var;
|
||||
unsigned int max_sectors_kb,
|
||||
max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
|
||||
page_kb = 1 << (PAGE_SHIFT - 10);
|
||||
ssize_t ret = queue_var_store(&var, page, count);
|
||||
unsigned long max_sectors_kb;
|
||||
struct queue_limits lim;
|
||||
ssize_t ret;
|
||||
int err;
|
||||
|
||||
ret = queue_var_store(&max_sectors_kb, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
max_sectors_kb = (unsigned int)var;
|
||||
max_hw_sectors_kb = min_not_zero(max_hw_sectors_kb,
|
||||
q->limits.max_dev_sectors >> 1);
|
||||
if (max_sectors_kb == 0) {
|
||||
q->limits.max_user_sectors = 0;
|
||||
max_sectors_kb = min(max_hw_sectors_kb,
|
||||
BLK_DEF_MAX_SECTORS_CAP >> 1);
|
||||
} else {
|
||||
if (max_sectors_kb > max_hw_sectors_kb ||
|
||||
max_sectors_kb < page_kb)
|
||||
return -EINVAL;
|
||||
q->limits.max_user_sectors = max_sectors_kb << 1;
|
||||
}
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
q->limits.max_sectors = max_sectors_kb << 1;
|
||||
if (q->disk)
|
||||
q->disk->bdi->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
lim = queue_limits_start_update(q);
|
||||
lim.max_user_sectors = max_sectors_kb << 1;
|
||||
err = queue_limits_commit_update(q, &lim);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
if (err)
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1098,7 +1098,7 @@ static int throtl_dispatch_tg(struct throtl_grp *tg)
|
||||
while ((bio = throtl_peek_queued(&sq->queued[READ])) &&
|
||||
tg_may_dispatch(tg, bio, NULL)) {
|
||||
|
||||
tg_dispatch_one_bio(tg, bio_data_dir(bio));
|
||||
tg_dispatch_one_bio(tg, READ);
|
||||
nr_reads++;
|
||||
|
||||
if (nr_reads >= max_nr_reads)
|
||||
@ -1108,7 +1108,7 @@ static int throtl_dispatch_tg(struct throtl_grp *tg)
|
||||
while ((bio = throtl_peek_queued(&sq->queued[WRITE])) &&
|
||||
tg_may_dispatch(tg, bio, NULL)) {
|
||||
|
||||
tg_dispatch_one_bio(tg, bio_data_dir(bio));
|
||||
tg_dispatch_one_bio(tg, WRITE);
|
||||
nr_writes++;
|
||||
|
||||
if (nr_writes >= max_nr_writes)
|
||||
@ -1815,7 +1815,7 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
|
||||
time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
|
||||
ret = tg->latency_target == DFL_LATENCY_TARGET ||
|
||||
tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
|
||||
(ktime_get_ns() >> 10) - tg->last_finish_time > time ||
|
||||
(blk_time_get_ns() >> 10) - tg->last_finish_time > time ||
|
||||
tg->avg_idletime > tg->idletime_threshold ||
|
||||
(tg->latency_target && tg->bio_cnt &&
|
||||
tg->bad_bio_cnt * 5 < tg->bio_cnt);
|
||||
@ -2060,7 +2060,7 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg)
|
||||
if (last_finish_time == 0)
|
||||
return;
|
||||
|
||||
now = ktime_get_ns() >> 10;
|
||||
now = blk_time_get_ns() >> 10;
|
||||
if (now <= last_finish_time ||
|
||||
last_finish_time == tg->checked_last_finish_time)
|
||||
return;
|
||||
@ -2327,7 +2327,7 @@ void blk_throtl_bio_endio(struct bio *bio)
|
||||
if (!tg->td->limit_valid[LIMIT_LOW])
|
||||
return;
|
||||
|
||||
finish_time_ns = ktime_get_ns();
|
||||
finish_time_ns = blk_time_get_ns();
|
||||
tg->last_finish_time = finish_time_ns >> 10;
|
||||
|
||||
start_time = bio_issue_time(&bio->bi_issue) >> 10;
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "blk-wbt.h"
|
||||
#include "blk-rq-qos.h"
|
||||
#include "elevator.h"
|
||||
#include "blk.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/wbt.h>
|
||||
@ -274,13 +275,12 @@ static inline bool stat_sample_valid(struct blk_rq_stat *stat)
|
||||
|
||||
static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
|
||||
{
|
||||
u64 now, issue = READ_ONCE(rwb->sync_issue);
|
||||
u64 issue = READ_ONCE(rwb->sync_issue);
|
||||
|
||||
if (!issue || !rwb->sync_cookie)
|
||||
return 0;
|
||||
|
||||
now = ktime_to_ns(ktime_get());
|
||||
return now - issue;
|
||||
return blk_time_get_ns() - issue;
|
||||
}
|
||||
|
||||
static inline unsigned int wbt_inflight(struct rq_wb *rwb)
|
||||
|
@ -11,7 +11,6 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/mm.h>
|
||||
@ -177,8 +176,7 @@ static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
|
||||
}
|
||||
}
|
||||
|
||||
static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
|
||||
gfp_t gfp_mask)
|
||||
static int blkdev_zone_reset_all_emulated(struct block_device *bdev)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
sector_t capacity = bdev_nr_sectors(bdev);
|
||||
@ -205,7 +203,7 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
|
||||
}
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
|
||||
gfp_mask);
|
||||
GFP_KERNEL);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
sector += zone_sectors;
|
||||
|
||||
@ -223,7 +221,7 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
|
||||
static int blkdev_zone_reset_all(struct block_device *bdev)
|
||||
{
|
||||
struct bio bio;
|
||||
|
||||
@ -238,7 +236,6 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
|
||||
* @sector: Start sector of the first zone to operate on
|
||||
* @nr_sectors: Number of sectors, should be at least the length of one zone and
|
||||
* must be zone size aligned.
|
||||
* @gfp_mask: Memory allocation flags (for bio_alloc)
|
||||
*
|
||||
* Description:
|
||||
* Perform the specified operation on the range of zones specified by
|
||||
@ -248,7 +245,7 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
|
||||
* or finish request.
|
||||
*/
|
||||
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
|
||||
sector_t sector, sector_t nr_sectors, gfp_t gfp_mask)
|
||||
sector_t sector, sector_t nr_sectors)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
sector_t zone_sectors = bdev_zone_sectors(bdev);
|
||||
@ -285,12 +282,12 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
|
||||
*/
|
||||
if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
|
||||
if (!blk_queue_zone_resetall(q))
|
||||
return blkdev_zone_reset_all_emulated(bdev, gfp_mask);
|
||||
return blkdev_zone_reset_all(bdev, gfp_mask);
|
||||
return blkdev_zone_reset_all_emulated(bdev);
|
||||
return blkdev_zone_reset_all(bdev);
|
||||
}
|
||||
|
||||
while (sector < end_sector) {
|
||||
bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask);
|
||||
bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
sector += zone_sectors;
|
||||
|
||||
@ -419,8 +416,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
|
||||
GFP_KERNEL);
|
||||
ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
|
||||
|
||||
fail:
|
||||
if (cmd == BLKRESETZONE)
|
||||
|
85
block/blk.h
85
block/blk.h
@ -4,6 +4,8 @@
|
||||
|
||||
#include <linux/blk-crypto.h>
|
||||
#include <linux/memblock.h> /* for max_pfn/max_low_pfn */
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/timekeeping.h>
|
||||
#include <xen/xen.h>
|
||||
#include "blk-crypto-internal.h"
|
||||
|
||||
@ -70,6 +72,18 @@ static inline int bio_queue_enter(struct bio *bio)
|
||||
return __bio_queue_enter(q, bio);
|
||||
}
|
||||
|
||||
static inline void blk_wait_io(struct completion *done)
|
||||
{
|
||||
/* Prevent hang_check timer from firing at us during very long I/O */
|
||||
unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2;
|
||||
|
||||
if (timeout)
|
||||
while (!wait_for_completion_io_timeout(done, timeout))
|
||||
;
|
||||
else
|
||||
wait_for_completion_io(done);
|
||||
}
|
||||
|
||||
#define BIO_INLINE_VECS 4
|
||||
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
|
||||
gfp_t gfp_mask);
|
||||
@ -329,7 +343,7 @@ void blk_rq_set_mixed_merge(struct request *rq);
|
||||
bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
|
||||
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
|
||||
|
||||
void blk_set_default_limits(struct queue_limits *lim);
|
||||
int blk_set_default_limits(struct queue_limits *lim);
|
||||
int blk_dev_init(void);
|
||||
|
||||
/*
|
||||
@ -447,7 +461,7 @@ static inline void bio_release_page(struct bio *bio, struct page *page)
|
||||
unpin_user_page(page);
|
||||
}
|
||||
|
||||
struct request_queue *blk_alloc_queue(int node_id);
|
||||
struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id);
|
||||
|
||||
int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode);
|
||||
|
||||
@ -516,8 +530,75 @@ static inline int req_ref_read(struct request *req)
|
||||
return atomic_read(&req->ref);
|
||||
}
|
||||
|
||||
static inline u64 blk_time_get_ns(void)
|
||||
{
|
||||
struct blk_plug *plug = current->plug;
|
||||
|
||||
if (!plug)
|
||||
return ktime_get_ns();
|
||||
|
||||
/*
|
||||
* 0 could very well be a valid time, but rather than flag "this is
|
||||
* a valid timestamp" separately, just accept that we'll do an extra
|
||||
* ktime_get_ns() if we just happen to get 0 as the current time.
|
||||
*/
|
||||
if (!plug->cur_ktime) {
|
||||
plug->cur_ktime = ktime_get_ns();
|
||||
current->flags |= PF_BLOCK_TS;
|
||||
}
|
||||
return plug->cur_ktime;
|
||||
}
|
||||
|
||||
static inline ktime_t blk_time_get(void)
|
||||
{
|
||||
return ns_to_ktime(blk_time_get_ns());
|
||||
}
|
||||
|
||||
/*
|
||||
* From most significant bit:
|
||||
* 1 bit: reserved for other usage, see below
|
||||
* 12 bits: original size of bio
|
||||
* 51 bits: issue time of bio
|
||||
*/
|
||||
#define BIO_ISSUE_RES_BITS 1
|
||||
#define BIO_ISSUE_SIZE_BITS 12
|
||||
#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS)
|
||||
#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
|
||||
#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
|
||||
#define BIO_ISSUE_SIZE_MASK \
|
||||
(((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
|
||||
#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
|
||||
|
||||
/* Reserved bit for blk-throtl */
|
||||
#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
|
||||
|
||||
static inline u64 __bio_issue_time(u64 time)
|
||||
{
|
||||
return time & BIO_ISSUE_TIME_MASK;
|
||||
}
|
||||
|
||||
static inline u64 bio_issue_time(struct bio_issue *issue)
|
||||
{
|
||||
return __bio_issue_time(issue->value);
|
||||
}
|
||||
|
||||
static inline sector_t bio_issue_size(struct bio_issue *issue)
|
||||
{
|
||||
return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
|
||||
}
|
||||
|
||||
static inline void bio_issue_init(struct bio_issue *issue,
|
||||
sector_t size)
|
||||
{
|
||||
size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
|
||||
issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
|
||||
(blk_time_get_ns() & BIO_ISSUE_TIME_MASK) |
|
||||
((u64)size << BIO_ISSUE_SIZE_SHIFT));
|
||||
}
|
||||
|
||||
void bdev_release(struct file *bdev_file);
|
||||
int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
|
||||
const struct blk_holder_ops *hops, struct file *bdev_file);
|
||||
int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);
|
||||
|
||||
#endif /* BLK_INTERNAL_H */
|
||||
|
@ -383,7 +383,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
|
||||
if (blk_mq_alloc_tag_set(set))
|
||||
goto out_tag_set;
|
||||
|
||||
q = blk_mq_init_queue(set);
|
||||
q = blk_mq_alloc_queue(set, NULL, NULL);
|
||||
if (IS_ERR(q)) {
|
||||
ret = PTR_ERR(q);
|
||||
goto out_queue;
|
||||
|
@ -1201,7 +1201,7 @@ static int block_uevent(const struct device *dev, struct kobj_uevent_env *env)
|
||||
return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
|
||||
}
|
||||
|
||||
struct class block_class = {
|
||||
const struct class block_class = {
|
||||
.name = "block",
|
||||
.dev_uevent = block_uevent,
|
||||
};
|
||||
@ -1391,19 +1391,21 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
|
||||
struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node,
|
||||
struct lock_class_key *lkclass)
|
||||
{
|
||||
struct queue_limits default_lim = { };
|
||||
struct request_queue *q;
|
||||
struct gendisk *disk;
|
||||
|
||||
q = blk_alloc_queue(node);
|
||||
if (!q)
|
||||
return NULL;
|
||||
q = blk_alloc_queue(lim ? lim : &default_lim, node);
|
||||
if (IS_ERR(q))
|
||||
return ERR_CAST(q);
|
||||
|
||||
disk = __alloc_disk_node(q, node, lkclass);
|
||||
if (!disk) {
|
||||
blk_put_queue(q);
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
set_bit(GD_OWNS_QUEUE, &disk->state);
|
||||
return disk;
|
||||
|
@ -8,6 +8,8 @@ struct bd_holder_disk {
|
||||
int refcnt;
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(blk_holder_mutex);
|
||||
|
||||
static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
|
||||
struct gendisk *disk)
|
||||
{
|
||||
@ -80,7 +82,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
|
||||
kobject_get(bdev->bd_holder_dir);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
|
||||
mutex_lock(&disk->open_mutex);
|
||||
mutex_lock(&blk_holder_mutex);
|
||||
WARN_ON_ONCE(!bdev->bd_holder);
|
||||
|
||||
holder = bd_find_holder_disk(bdev, disk);
|
||||
@ -108,7 +110,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
|
||||
goto out_del_symlink;
|
||||
list_add(&holder->list, &disk->slave_bdevs);
|
||||
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
mutex_unlock(&blk_holder_mutex);
|
||||
return 0;
|
||||
|
||||
out_del_symlink:
|
||||
@ -116,7 +118,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
|
||||
out_free_holder:
|
||||
kfree(holder);
|
||||
out_unlock:
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
mutex_unlock(&blk_holder_mutex);
|
||||
if (ret)
|
||||
kobject_put(bdev->bd_holder_dir);
|
||||
return ret;
|
||||
@ -140,7 +142,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
|
||||
if (WARN_ON_ONCE(!disk->slave_dir))
|
||||
return;
|
||||
|
||||
mutex_lock(&disk->open_mutex);
|
||||
mutex_lock(&blk_holder_mutex);
|
||||
holder = bd_find_holder_disk(bdev, disk);
|
||||
if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
|
||||
del_symlink(disk->slave_dir, bdev_kobj(bdev));
|
||||
@ -149,6 +151,6 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
|
||||
list_del_init(&holder->list);
|
||||
kfree(holder);
|
||||
}
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
mutex_unlock(&blk_holder_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
|
||||
|
@ -18,7 +18,7 @@ static int blkpg_do_ioctl(struct block_device *bdev,
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
struct blkpg_partition p;
|
||||
sector_t start, length;
|
||||
sector_t start, length, capacity, end;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
@ -41,6 +41,13 @@ static int blkpg_do_ioctl(struct block_device *bdev,
|
||||
|
||||
start = p.start >> SECTOR_SHIFT;
|
||||
length = p.length >> SECTOR_SHIFT;
|
||||
capacity = get_capacity(disk);
|
||||
|
||||
if (check_add_overflow(start, length, &end))
|
||||
return -EINVAL;
|
||||
|
||||
if (start >= capacity || end > capacity)
|
||||
return -EINVAL;
|
||||
|
||||
switch (op) {
|
||||
case BLKPG_ADD_PARTITION:
|
||||
|
@ -419,21 +419,10 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start,
|
||||
int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
|
||||
sector_t length)
|
||||
{
|
||||
sector_t capacity = get_capacity(disk), end;
|
||||
struct block_device *part;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&disk->open_mutex);
|
||||
if (check_add_overflow(start, length, &end)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (start >= capacity || end > capacity) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!disk_live(disk)) {
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
|
@ -20,6 +20,7 @@ extern void note_bootable_part(dev_t dev, int part, int goodness);
|
||||
* Code to understand MacOS partition tables.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_PPC_PMAC
|
||||
static inline void mac_fix_string(char *stg, int len)
|
||||
{
|
||||
int i;
|
||||
@ -27,6 +28,7 @@ static inline void mac_fix_string(char *stg, int len)
|
||||
for (i = len - 1; i >= 0 && stg[i] == ' '; i--)
|
||||
stg[i] = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int mac_partition(struct parsed_partitions *state)
|
||||
{
|
||||
|
@ -1212,7 +1212,7 @@ static int cmd_start(struct opal_dev *dev, const u8 *uid, const u8 *method)
|
||||
static int start_opal_session_cont(struct opal_dev *dev)
|
||||
{
|
||||
u32 hsn, tsn;
|
||||
int error = 0;
|
||||
int error;
|
||||
|
||||
error = parse_and_check_status(dev);
|
||||
if (error)
|
||||
@ -1354,7 +1354,7 @@ static int get_active_key_cont(struct opal_dev *dev)
|
||||
{
|
||||
const char *activekey;
|
||||
size_t keylen;
|
||||
int error = 0;
|
||||
int error;
|
||||
|
||||
error = parse_and_check_status(dev);
|
||||
if (error)
|
||||
@ -2157,7 +2157,7 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
|
||||
u8 lr_buffer[OPAL_UID_LENGTH];
|
||||
struct opal_lock_unlock *lkul = data;
|
||||
u8 read_locked = 1, write_locked = 1;
|
||||
int err = 0;
|
||||
int err;
|
||||
|
||||
if (build_locking_range(lr_buffer, sizeof(lr_buffer),
|
||||
lkul->session.opal_key.lr) < 0)
|
||||
@ -2580,7 +2580,7 @@ static int opal_get_discv(struct opal_dev *dev, struct opal_discovery *discv)
|
||||
const struct opal_step discovery0_step = {
|
||||
opal_discovery0, discv
|
||||
};
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&dev->dev_lock);
|
||||
setup_opal_dev(dev);
|
||||
@ -3069,7 +3069,7 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
|
||||
{
|
||||
struct opal_suspend_data *suspend;
|
||||
bool was_failure = false;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
if (!dev)
|
||||
return false;
|
||||
@ -3112,10 +3112,9 @@ static int opal_read_table(struct opal_dev *dev,
|
||||
{ read_table_data, rw_tbl },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
if (!rw_tbl->size)
|
||||
return ret;
|
||||
return 0;
|
||||
|
||||
return execute_steps(dev, read_table_steps,
|
||||
ARRAY_SIZE(read_table_steps));
|
||||
@ -3129,10 +3128,9 @@ static int opal_write_table(struct opal_dev *dev,
|
||||
{ write_table_data, rw_tbl },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
if (!rw_tbl->size)
|
||||
return ret;
|
||||
return 0;
|
||||
|
||||
return execute_steps(dev, write_table_steps,
|
||||
ARRAY_SIZE(write_table_steps));
|
||||
|
@ -12,14 +12,14 @@
|
||||
#include <net/checksum.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
typedef __be16 (csum_fn) (void *, unsigned int);
|
||||
typedef __be16 (csum_fn) (__be16, void *, unsigned int);
|
||||
|
||||
static __be16 t10_pi_crc_fn(void *data, unsigned int len)
|
||||
static __be16 t10_pi_crc_fn(__be16 crc, void *data, unsigned int len)
|
||||
{
|
||||
return cpu_to_be16(crc_t10dif(data, len));
|
||||
return cpu_to_be16(crc_t10dif_update(be16_to_cpu(crc), data, len));
|
||||
}
|
||||
|
||||
static __be16 t10_pi_ip_fn(void *data, unsigned int len)
|
||||
static __be16 t10_pi_ip_fn(__be16 csum, void *data, unsigned int len)
|
||||
{
|
||||
return (__force __be16)ip_compute_csum(data, len);
|
||||
}
|
||||
@ -32,12 +32,16 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
|
||||
static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
|
||||
csum_fn *fn, enum t10_dif_type type)
|
||||
{
|
||||
u8 offset = iter->pi_offset;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
|
||||
struct t10_pi_tuple *pi = iter->prot_buf;
|
||||
struct t10_pi_tuple *pi = iter->prot_buf + offset;
|
||||
|
||||
pi->guard_tag = fn(iter->data_buf, iter->interval);
|
||||
pi->guard_tag = fn(0, iter->data_buf, iter->interval);
|
||||
if (offset)
|
||||
pi->guard_tag = fn(pi->guard_tag, iter->prot_buf,
|
||||
offset);
|
||||
pi->app_tag = 0;
|
||||
|
||||
if (type == T10_PI_TYPE1_PROTECTION)
|
||||
@ -56,12 +60,13 @@ static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
|
||||
static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
|
||||
csum_fn *fn, enum t10_dif_type type)
|
||||
{
|
||||
u8 offset = iter->pi_offset;
|
||||
unsigned int i;
|
||||
|
||||
BUG_ON(type == T10_PI_TYPE0_PROTECTION);
|
||||
|
||||
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
|
||||
struct t10_pi_tuple *pi = iter->prot_buf;
|
||||
struct t10_pi_tuple *pi = iter->prot_buf + offset;
|
||||
__be16 csum;
|
||||
|
||||
if (type == T10_PI_TYPE1_PROTECTION ||
|
||||
@ -83,7 +88,9 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
|
||||
goto next;
|
||||
}
|
||||
|
||||
csum = fn(iter->data_buf, iter->interval);
|
||||
csum = fn(0, iter->data_buf, iter->interval);
|
||||
if (offset)
|
||||
csum = fn(csum, iter->prot_buf, offset);
|
||||
|
||||
if (pi->guard_tag != csum) {
|
||||
pr_err("%s: guard tag error at sector %llu " \
|
||||
@ -134,8 +141,10 @@ static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
|
||||
*/
|
||||
static void t10_pi_type1_prepare(struct request *rq)
|
||||
{
|
||||
const int tuple_sz = rq->q->integrity.tuple_size;
|
||||
struct blk_integrity *bi = &rq->q->integrity;
|
||||
const int tuple_sz = bi->tuple_size;
|
||||
u32 ref_tag = t10_pi_ref_tag(rq);
|
||||
u8 offset = bi->pi_offset;
|
||||
struct bio *bio;
|
||||
|
||||
__rq_for_each_bio(bio, rq) {
|
||||
@ -154,7 +163,7 @@ static void t10_pi_type1_prepare(struct request *rq)
|
||||
|
||||
p = bvec_kmap_local(&iv);
|
||||
for (j = 0; j < iv.bv_len; j += tuple_sz) {
|
||||
struct t10_pi_tuple *pi = p;
|
||||
struct t10_pi_tuple *pi = p + offset;
|
||||
|
||||
if (be32_to_cpu(pi->ref_tag) == virt)
|
||||
pi->ref_tag = cpu_to_be32(ref_tag);
|
||||
@ -183,9 +192,11 @@ static void t10_pi_type1_prepare(struct request *rq)
|
||||
*/
|
||||
static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
||||
{
|
||||
unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
|
||||
const int tuple_sz = rq->q->integrity.tuple_size;
|
||||
struct blk_integrity *bi = &rq->q->integrity;
|
||||
unsigned intervals = nr_bytes >> bi->interval_exp;
|
||||
const int tuple_sz = bi->tuple_size;
|
||||
u32 ref_tag = t10_pi_ref_tag(rq);
|
||||
u8 offset = bi->pi_offset;
|
||||
struct bio *bio;
|
||||
|
||||
__rq_for_each_bio(bio, rq) {
|
||||
@ -200,7 +211,7 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
||||
|
||||
p = bvec_kmap_local(&iv);
|
||||
for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
|
||||
struct t10_pi_tuple *pi = p;
|
||||
struct t10_pi_tuple *pi = p + offset;
|
||||
|
||||
if (be32_to_cpu(pi->ref_tag) == ref_tag)
|
||||
pi->ref_tag = cpu_to_be32(virt);
|
||||
@ -280,20 +291,24 @@ const struct blk_integrity_profile t10_pi_type3_ip = {
|
||||
};
|
||||
EXPORT_SYMBOL(t10_pi_type3_ip);
|
||||
|
||||
static __be64 ext_pi_crc64(void *data, unsigned int len)
|
||||
static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len)
|
||||
{
|
||||
return cpu_to_be64(crc64_rocksoft(data, len));
|
||||
return cpu_to_be64(crc64_rocksoft_update(crc, data, len));
|
||||
}
|
||||
|
||||
static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter,
|
||||
enum t10_dif_type type)
|
||||
{
|
||||
u8 offset = iter->pi_offset;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
|
||||
struct crc64_pi_tuple *pi = iter->prot_buf;
|
||||
struct crc64_pi_tuple *pi = iter->prot_buf + offset;
|
||||
|
||||
pi->guard_tag = ext_pi_crc64(iter->data_buf, iter->interval);
|
||||
pi->guard_tag = ext_pi_crc64(0, iter->data_buf, iter->interval);
|
||||
if (offset)
|
||||
pi->guard_tag = ext_pi_crc64(be64_to_cpu(pi->guard_tag),
|
||||
iter->prot_buf, offset);
|
||||
pi->app_tag = 0;
|
||||
|
||||
if (type == T10_PI_TYPE1_PROTECTION)
|
||||
@ -319,10 +334,11 @@ static bool ext_pi_ref_escape(u8 *ref_tag)
|
||||
static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
|
||||
enum t10_dif_type type)
|
||||
{
|
||||
u8 offset = iter->pi_offset;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < iter->data_size; i += iter->interval) {
|
||||
struct crc64_pi_tuple *pi = iter->prot_buf;
|
||||
struct crc64_pi_tuple *pi = iter->prot_buf + offset;
|
||||
u64 ref, seed;
|
||||
__be64 csum;
|
||||
|
||||
@ -343,7 +359,11 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
|
||||
goto next;
|
||||
}
|
||||
|
||||
csum = ext_pi_crc64(iter->data_buf, iter->interval);
|
||||
csum = ext_pi_crc64(0, iter->data_buf, iter->interval);
|
||||
if (offset)
|
||||
csum = ext_pi_crc64(be64_to_cpu(csum), iter->prot_buf,
|
||||
offset);
|
||||
|
||||
if (pi->guard_tag != csum) {
|
||||
pr_err("%s: guard tag error at sector %llu " \
|
||||
"(rcvd %016llx, want %016llx)\n",
|
||||
@ -373,8 +393,10 @@ static blk_status_t ext_pi_type1_generate_crc64(struct blk_integrity_iter *iter)
|
||||
|
||||
static void ext_pi_type1_prepare(struct request *rq)
|
||||
{
|
||||
const int tuple_sz = rq->q->integrity.tuple_size;
|
||||
struct blk_integrity *bi = &rq->q->integrity;
|
||||
const int tuple_sz = bi->tuple_size;
|
||||
u64 ref_tag = ext_pi_ref_tag(rq);
|
||||
u8 offset = bi->pi_offset;
|
||||
struct bio *bio;
|
||||
|
||||
__rq_for_each_bio(bio, rq) {
|
||||
@ -393,7 +415,7 @@ static void ext_pi_type1_prepare(struct request *rq)
|
||||
|
||||
p = bvec_kmap_local(&iv);
|
||||
for (j = 0; j < iv.bv_len; j += tuple_sz) {
|
||||
struct crc64_pi_tuple *pi = p;
|
||||
struct crc64_pi_tuple *pi = p + offset;
|
||||
u64 ref = get_unaligned_be48(pi->ref_tag);
|
||||
|
||||
if (ref == virt)
|
||||
@ -411,9 +433,11 @@ static void ext_pi_type1_prepare(struct request *rq)
|
||||
|
||||
static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
||||
{
|
||||
unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
|
||||
const int tuple_sz = rq->q->integrity.tuple_size;
|
||||
struct blk_integrity *bi = &rq->q->integrity;
|
||||
unsigned intervals = nr_bytes >> bi->interval_exp;
|
||||
const int tuple_sz = bi->tuple_size;
|
||||
u64 ref_tag = ext_pi_ref_tag(rq);
|
||||
u8 offset = bi->pi_offset;
|
||||
struct bio *bio;
|
||||
|
||||
__rq_for_each_bio(bio, rq) {
|
||||
@ -428,7 +452,7 @@ static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
||||
|
||||
p = bvec_kmap_local(&iv);
|
||||
for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
|
||||
struct crc64_pi_tuple *pi = p;
|
||||
struct crc64_pi_tuple *pi = p + offset;
|
||||
u64 ref = get_unaligned_be48(pi->ref_tag);
|
||||
|
||||
if (ref == ref_tag)
|
||||
|
@ -207,7 +207,7 @@ static inline int devtmpfs_init(void) { return 0; }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
extern struct class block_class;
|
||||
extern const struct class block_class;
|
||||
static inline bool is_blockdev(struct device *dev)
|
||||
{
|
||||
return dev->class == &block_class;
|
||||
|
@ -1779,7 +1779,7 @@ static int fd_alloc_disk(int drive, int system)
|
||||
struct gendisk *disk;
|
||||
int err;
|
||||
|
||||
disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL);
|
||||
disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
|
@ -24,8 +24,8 @@ static DEFINE_MUTEX(aoeblk_mutex);
|
||||
static struct kmem_cache *buf_pool_cache;
|
||||
static struct dentry *aoe_debugfs_dir;
|
||||
|
||||
/* GPFS needs a larger value than the default. */
|
||||
static int aoe_maxsectors;
|
||||
/* random default picked from the historic block max_sectors cap */
|
||||
static int aoe_maxsectors = 2560;
|
||||
module_param(aoe_maxsectors, int, 0644);
|
||||
MODULE_PARM_DESC(aoe_maxsectors,
|
||||
"When nonzero, set the maximum number of sectors per I/O request");
|
||||
@ -334,6 +334,10 @@ aoeblk_gdalloc(void *vp)
|
||||
mempool_t *mp;
|
||||
struct blk_mq_tag_set *set;
|
||||
sector_t ssize;
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = aoe_maxsectors,
|
||||
.io_opt = SZ_2M,
|
||||
};
|
||||
ulong flags;
|
||||
int late = 0;
|
||||
int err;
|
||||
@ -371,7 +375,7 @@ aoeblk_gdalloc(void *vp)
|
||||
goto err_mempool;
|
||||
}
|
||||
|
||||
gd = blk_mq_alloc_disk(set, d);
|
||||
gd = blk_mq_alloc_disk(set, &lim, d);
|
||||
if (IS_ERR(gd)) {
|
||||
pr_err("aoe: cannot allocate block queue for %ld.%d\n",
|
||||
d->aoemajor, d->aoeminor);
|
||||
@ -384,14 +388,9 @@ aoeblk_gdalloc(void *vp)
|
||||
WARN_ON(d->flags & DEVFL_TKILL);
|
||||
WARN_ON(d->gd);
|
||||
WARN_ON(d->flags & DEVFL_UP);
|
||||
/* random number picked from the history block max_sectors cap */
|
||||
blk_queue_max_hw_sectors(gd->queue, 2560u);
|
||||
blk_queue_io_opt(gd->queue, SZ_2M);
|
||||
d->bufpool = mp;
|
||||
d->blkq = gd->queue;
|
||||
d->gd = gd;
|
||||
if (aoe_maxsectors)
|
||||
blk_queue_max_hw_sectors(gd->queue, aoe_maxsectors);
|
||||
gd->major = AOE_MAJOR;
|
||||
gd->first_minor = d->sysminor;
|
||||
gd->minors = AOE_PARTITIONS;
|
||||
|
@ -419,13 +419,16 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu
|
||||
rcu_read_lock();
|
||||
for_each_netdev_rcu(&init_net, ifp) {
|
||||
dev_hold(ifp);
|
||||
if (!is_aoe_netif(ifp))
|
||||
goto cont;
|
||||
if (!is_aoe_netif(ifp)) {
|
||||
dev_put(ifp);
|
||||
continue;
|
||||
}
|
||||
|
||||
skb = new_skb(sizeof *h + sizeof *ch);
|
||||
if (skb == NULL) {
|
||||
printk(KERN_INFO "aoe: skb alloc failure\n");
|
||||
goto cont;
|
||||
dev_put(ifp);
|
||||
continue;
|
||||
}
|
||||
skb_put(skb, sizeof *h + sizeof *ch);
|
||||
skb->dev = ifp;
|
||||
@ -440,9 +443,6 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu
|
||||
h->major = cpu_to_be16(aoemajor);
|
||||
h->minor = aoeminor;
|
||||
h->cmd = AOECMD_CFG;
|
||||
|
||||
cont:
|
||||
dev_put(ifp);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
@ -63,6 +63,7 @@ tx(int id) __must_hold(&txlock)
|
||||
pr_warn("aoe: packet could not be sent on %s. %s\n",
|
||||
ifp ? ifp->name : "netif",
|
||||
"consider increasing tx_queue_len");
|
||||
dev_put(ifp);
|
||||
spin_lock_irq(&txlock);
|
||||
}
|
||||
return 0;
|
||||
|
@ -1994,7 +1994,7 @@ static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
|
||||
disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL);
|
||||
disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
|
@ -318,6 +318,16 @@ static int brd_alloc(int i)
|
||||
struct gendisk *disk;
|
||||
char buf[DISK_NAME_LEN];
|
||||
int err = -ENOMEM;
|
||||
struct queue_limits lim = {
|
||||
/*
|
||||
* This is so fdisk will align partitions on 4k, because of
|
||||
* direct_access API needing 4k alignment, returning a PFN
|
||||
* (This is only a problem on very small devices <= 4M,
|
||||
* otherwise fdisk will align on 1M. Regardless this call
|
||||
* is harmless)
|
||||
*/
|
||||
.physical_block_size = PAGE_SIZE,
|
||||
};
|
||||
|
||||
list_for_each_entry(brd, &brd_devices, brd_list)
|
||||
if (brd->brd_number == i)
|
||||
@ -335,10 +345,11 @@ static int brd_alloc(int i)
|
||||
debugfs_create_u64(buf, 0444, brd_debugfs_dir,
|
||||
&brd->brd_nr_pages);
|
||||
|
||||
disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_free_dev;
|
||||
|
||||
}
|
||||
disk->major = RAMDISK_MAJOR;
|
||||
disk->first_minor = i * max_part;
|
||||
disk->minors = max_part;
|
||||
@ -347,15 +358,6 @@ static int brd_alloc(int i)
|
||||
strscpy(disk->disk_name, buf, DISK_NAME_LEN);
|
||||
set_capacity(disk, rd_size * 2);
|
||||
|
||||
/*
|
||||
* This is so fdisk will align partitions on 4k, because of
|
||||
* direct_access API needing 4k alignment, returning a PFN
|
||||
* (This is only a problem on very small devices <= 4M,
|
||||
* otherwise fdisk will align on 1M. Regardless this call
|
||||
* is harmless)
|
||||
*/
|
||||
blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
|
||||
|
||||
/* Tell the block layer that this is not a rotational device */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue);
|
||||
|
@ -2690,6 +2690,14 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
int id;
|
||||
int vnr = adm_ctx->volume;
|
||||
enum drbd_ret_code err = ERR_NOMEM;
|
||||
struct queue_limits lim = {
|
||||
/*
|
||||
* Setting the max_hw_sectors to an odd value of 8kibyte here.
|
||||
* This triggers a max_bio_size message upon first attach or
|
||||
* connect.
|
||||
*/
|
||||
.max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8,
|
||||
};
|
||||
|
||||
device = minor_to_device(minor);
|
||||
if (device)
|
||||
@ -2708,9 +2716,11 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
|
||||
drbd_init_set_defaults(device);
|
||||
|
||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_no_disk;
|
||||
}
|
||||
|
||||
device->vdisk = disk;
|
||||
device->rq_queue = disk->queue;
|
||||
@ -2727,9 +2737,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
|
||||
blk_queue_write_cache(disk->queue, true, true);
|
||||
/* Setting the max_hw_sectors to an odd value of 8kibyte here
|
||||
This triggers a max_bio_size message upon first attach or connect */
|
||||
blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8);
|
||||
|
||||
device->md_io.page = alloc_page(GFP_KERNEL);
|
||||
if (!device->md_io.page)
|
||||
|
@ -1189,9 +1189,31 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity)
|
||||
static unsigned int drbd_max_peer_bio_size(struct drbd_device *device)
|
||||
{
|
||||
q->limits.discard_granularity = granularity;
|
||||
/*
|
||||
* We may ignore peer limits if the peer is modern enough. From 8.3.8
|
||||
* onwards the peer can use multiple BIOs for a single peer_request.
|
||||
*/
|
||||
if (device->state.conn < C_WF_REPORT_PARAMS)
|
||||
return device->peer_max_bio_size;
|
||||
|
||||
if (first_peer_device(device)->connection->agreed_pro_version < 94)
|
||||
return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
|
||||
|
||||
/*
|
||||
* Correct old drbd (up to 8.3.7) if it believes it can do more than
|
||||
* 32KiB.
|
||||
*/
|
||||
if (first_peer_device(device)->connection->agreed_pro_version == 94)
|
||||
return DRBD_MAX_SIZE_H80_PACKET;
|
||||
|
||||
/*
|
||||
* drbd 8.3.8 onwards, before 8.4.0
|
||||
*/
|
||||
if (first_peer_device(device)->connection->agreed_pro_version < 100)
|
||||
return DRBD_MAX_BIO_SIZE_P95;
|
||||
return DRBD_MAX_BIO_SIZE;
|
||||
}
|
||||
|
||||
static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
|
||||
@ -1204,24 +1226,81 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
|
||||
return AL_EXTENT_SIZE >> 9;
|
||||
}
|
||||
|
||||
static void decide_on_discard_support(struct drbd_device *device,
|
||||
static bool drbd_discard_supported(struct drbd_connection *connection,
|
||||
struct drbd_backing_dev *bdev)
|
||||
{
|
||||
struct drbd_connection *connection =
|
||||
first_peer_device(device)->connection;
|
||||
struct request_queue *q = device->rq_queue;
|
||||
unsigned int max_discard_sectors;
|
||||
|
||||
if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
|
||||
goto not_supported;
|
||||
return false;
|
||||
|
||||
if (connection->cstate >= C_CONNECTED &&
|
||||
!(connection->agreed_features & DRBD_FF_TRIM)) {
|
||||
drbd_info(connection,
|
||||
"peer DRBD too old, does not support TRIM: disabling discards\n");
|
||||
goto not_supported;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* This is the workaround for "bio would need to, but cannot, be split" */
|
||||
static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device)
|
||||
{
|
||||
unsigned int max_segments;
|
||||
|
||||
rcu_read_lock();
|
||||
max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!max_segments)
|
||||
return BLK_MAX_SEGMENTS;
|
||||
return max_segments;
|
||||
}
|
||||
|
||||
void drbd_reconsider_queue_parameters(struct drbd_device *device,
|
||||
struct drbd_backing_dev *bdev, struct o_qlim *o)
|
||||
{
|
||||
struct drbd_connection *connection =
|
||||
first_peer_device(device)->connection;
|
||||
struct request_queue * const q = device->rq_queue;
|
||||
unsigned int now = queue_max_hw_sectors(q) << 9;
|
||||
struct queue_limits lim;
|
||||
struct request_queue *b = NULL;
|
||||
unsigned int new;
|
||||
|
||||
if (bdev) {
|
||||
b = bdev->backing_bdev->bd_disk->queue;
|
||||
|
||||
device->local_max_bio_size =
|
||||
queue_max_hw_sectors(b) << SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
/*
|
||||
* We may later detach and re-attach on a disconnected Primary. Avoid
|
||||
* decreasing the value in this case.
|
||||
*
|
||||
* We want to store what we know the peer DRBD can handle, not what the
|
||||
* peer IO backend can handle.
|
||||
*/
|
||||
new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size,
|
||||
max(drbd_max_peer_bio_size(device), device->peer_max_bio_size));
|
||||
if (new != now) {
|
||||
if (device->state.role == R_PRIMARY && new < now)
|
||||
drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n",
|
||||
new, now);
|
||||
drbd_info(device, "max BIO size = %u\n", new);
|
||||
}
|
||||
|
||||
lim = queue_limits_start_update(q);
|
||||
if (bdev) {
|
||||
blk_set_stacking_limits(&lim);
|
||||
lim.max_segments = drbd_backing_dev_max_segments(device);
|
||||
} else {
|
||||
lim.max_segments = BLK_MAX_SEGMENTS;
|
||||
}
|
||||
|
||||
lim.max_hw_sectors = new >> SECTOR_SHIFT;
|
||||
lim.seg_boundary_mask = PAGE_SIZE - 1;
|
||||
|
||||
/*
|
||||
* We don't care for the granularity, really.
|
||||
*
|
||||
@ -1230,123 +1309,36 @@ static void decide_on_discard_support(struct drbd_device *device,
|
||||
* problem, really. If you care, you need to use devices with similar
|
||||
* topology on all peers.
|
||||
*/
|
||||
blk_queue_discard_granularity(q, 512);
|
||||
max_discard_sectors = drbd_max_discard_sectors(connection);
|
||||
blk_queue_max_discard_sectors(q, max_discard_sectors);
|
||||
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
|
||||
return;
|
||||
if (drbd_discard_supported(connection, bdev)) {
|
||||
lim.discard_granularity = 512;
|
||||
lim.max_hw_discard_sectors =
|
||||
drbd_max_discard_sectors(connection);
|
||||
} else {
|
||||
lim.discard_granularity = 0;
|
||||
lim.max_hw_discard_sectors = 0;
|
||||
}
|
||||
|
||||
not_supported:
|
||||
blk_queue_discard_granularity(q, 0);
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
}
|
||||
if (bdev)
|
||||
blk_stack_limits(&lim, &b->limits, 0);
|
||||
|
||||
static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
|
||||
{
|
||||
/* Fixup max_write_zeroes_sectors after blk_stack_limits():
|
||||
* if we can handle "zeroes" efficiently on the protocol,
|
||||
* we want to do that, even if our backend does not announce
|
||||
* max_write_zeroes_sectors itself. */
|
||||
struct drbd_connection *connection = first_peer_device(device)->connection;
|
||||
/* If the peer announces WZEROES support, use it. Otherwise, rather
|
||||
* send explicit zeroes than rely on some discard-zeroes-data magic. */
|
||||
/*
|
||||
* If we can handle "zeroes" efficiently on the protocol, we want to do
|
||||
* that, even if our backend does not announce max_write_zeroes_sectors
|
||||
* itself.
|
||||
*/
|
||||
if (connection->agreed_features & DRBD_FF_WZEROES)
|
||||
q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
|
||||
lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
|
||||
else
|
||||
q->limits.max_write_zeroes_sectors = 0;
|
||||
}
|
||||
lim.max_write_zeroes_sectors = 0;
|
||||
|
||||
static void fixup_discard_support(struct drbd_device *device, struct request_queue *q)
|
||||
{
|
||||
unsigned int max_discard = device->rq_queue->limits.max_discard_sectors;
|
||||
unsigned int discard_granularity =
|
||||
device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT;
|
||||
|
||||
if (discard_granularity > max_discard) {
|
||||
blk_queue_discard_granularity(q, 0);
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
|
||||
unsigned int max_bio_size, struct o_qlim *o)
|
||||
{
|
||||
struct request_queue * const q = device->rq_queue;
|
||||
unsigned int max_hw_sectors = max_bio_size >> 9;
|
||||
unsigned int max_segments = 0;
|
||||
struct request_queue *b = NULL;
|
||||
struct disk_conf *dc;
|
||||
|
||||
if (bdev) {
|
||||
b = bdev->backing_bdev->bd_disk->queue;
|
||||
|
||||
max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
|
||||
rcu_read_lock();
|
||||
dc = rcu_dereference(device->ldev->disk_conf);
|
||||
max_segments = dc->max_bio_bvecs;
|
||||
rcu_read_unlock();
|
||||
|
||||
blk_set_stacking_limits(&q->limits);
|
||||
if ((lim.discard_granularity >> SECTOR_SHIFT) >
|
||||
lim.max_hw_discard_sectors) {
|
||||
lim.discard_granularity = 0;
|
||||
lim.max_hw_discard_sectors = 0;
|
||||
}
|
||||
|
||||
blk_queue_max_hw_sectors(q, max_hw_sectors);
|
||||
/* This is the workaround for "bio would need to, but cannot, be split" */
|
||||
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
|
||||
blk_queue_segment_boundary(q, PAGE_SIZE-1);
|
||||
decide_on_discard_support(device, bdev);
|
||||
|
||||
if (b) {
|
||||
blk_stack_limits(&q->limits, &b->limits, 0);
|
||||
disk_update_readahead(device->vdisk);
|
||||
}
|
||||
fixup_write_zeroes(device, q);
|
||||
fixup_discard_support(device, q);
|
||||
}
|
||||
|
||||
void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
|
||||
{
|
||||
unsigned int now, new, local, peer;
|
||||
|
||||
now = queue_max_hw_sectors(device->rq_queue) << 9;
|
||||
local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
|
||||
peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
|
||||
|
||||
if (bdev) {
|
||||
local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
|
||||
device->local_max_bio_size = local;
|
||||
}
|
||||
local = min(local, DRBD_MAX_BIO_SIZE);
|
||||
|
||||
/* We may ignore peer limits if the peer is modern enough.
|
||||
Because new from 8.3.8 onwards the peer can use multiple
|
||||
BIOs for a single peer_request */
|
||||
if (device->state.conn >= C_WF_REPORT_PARAMS) {
|
||||
if (first_peer_device(device)->connection->agreed_pro_version < 94)
|
||||
peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
|
||||
/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
|
||||
else if (first_peer_device(device)->connection->agreed_pro_version == 94)
|
||||
peer = DRBD_MAX_SIZE_H80_PACKET;
|
||||
else if (first_peer_device(device)->connection->agreed_pro_version < 100)
|
||||
peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */
|
||||
else
|
||||
peer = DRBD_MAX_BIO_SIZE;
|
||||
|
||||
/* We may later detach and re-attach on a disconnected Primary.
|
||||
* Avoid this setting to jump back in that case.
|
||||
* We want to store what we know the peer DRBD can handle,
|
||||
* not what the peer IO backend can handle. */
|
||||
if (peer > device->peer_max_bio_size)
|
||||
device->peer_max_bio_size = peer;
|
||||
}
|
||||
new = min(local, peer);
|
||||
|
||||
if (device->state.role == R_PRIMARY && new < now)
|
||||
drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
|
||||
|
||||
if (new != now)
|
||||
drbd_info(device, "max BIO size = %u\n", new);
|
||||
|
||||
drbd_setup_queue_param(device, bdev, new, o);
|
||||
if (queue_limits_commit_update(q, &lim))
|
||||
drbd_err(device, "setting new queue limits failed\n");
|
||||
}
|
||||
|
||||
/* Starts the worker thread */
|
||||
|
@ -1542,9 +1542,10 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
|
||||
|
||||
int notify_resource_state_change(struct sk_buff *skb,
|
||||
unsigned int seq,
|
||||
struct drbd_resource_state_change *resource_state_change,
|
||||
void *state_change,
|
||||
enum drbd_notification_type type)
|
||||
{
|
||||
struct drbd_resource_state_change *resource_state_change = state_change;
|
||||
struct drbd_resource *resource = resource_state_change->resource;
|
||||
struct resource_info resource_info = {
|
||||
.res_role = resource_state_change->role[NEW],
|
||||
@ -1558,13 +1559,14 @@ int notify_resource_state_change(struct sk_buff *skb,
|
||||
|
||||
int notify_connection_state_change(struct sk_buff *skb,
|
||||
unsigned int seq,
|
||||
struct drbd_connection_state_change *connection_state_change,
|
||||
void *state_change,
|
||||
enum drbd_notification_type type)
|
||||
{
|
||||
struct drbd_connection *connection = connection_state_change->connection;
|
||||
struct drbd_connection_state_change *p = state_change;
|
||||
struct drbd_connection *connection = p->connection;
|
||||
struct connection_info connection_info = {
|
||||
.conn_connection_state = connection_state_change->cstate[NEW],
|
||||
.conn_role = connection_state_change->peer_role[NEW],
|
||||
.conn_connection_state = p->cstate[NEW],
|
||||
.conn_role = p->peer_role[NEW],
|
||||
};
|
||||
|
||||
return notify_connection_state(skb, seq, connection, &connection_info, type);
|
||||
@ -1572,9 +1574,10 @@ int notify_connection_state_change(struct sk_buff *skb,
|
||||
|
||||
int notify_device_state_change(struct sk_buff *skb,
|
||||
unsigned int seq,
|
||||
struct drbd_device_state_change *device_state_change,
|
||||
void *state_change,
|
||||
enum drbd_notification_type type)
|
||||
{
|
||||
struct drbd_device_state_change *device_state_change = state_change;
|
||||
struct drbd_device *device = device_state_change->device;
|
||||
struct device_info device_info = {
|
||||
.dev_disk_state = device_state_change->disk_state[NEW],
|
||||
@ -1585,9 +1588,10 @@ int notify_device_state_change(struct sk_buff *skb,
|
||||
|
||||
int notify_peer_device_state_change(struct sk_buff *skb,
|
||||
unsigned int seq,
|
||||
struct drbd_peer_device_state_change *p,
|
||||
void *state_change,
|
||||
enum drbd_notification_type type)
|
||||
{
|
||||
struct drbd_peer_device_state_change *p = state_change;
|
||||
struct drbd_peer_device *peer_device = p->peer_device;
|
||||
struct peer_device_info peer_device_info = {
|
||||
.peer_repl_state = p->repl_state[NEW],
|
||||
@ -1605,8 +1609,8 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
|
||||
struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
|
||||
bool resource_state_has_changed;
|
||||
unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
|
||||
int (*last_func)(struct sk_buff *, unsigned int, void *,
|
||||
enum drbd_notification_type) = NULL;
|
||||
int (*last_func)(struct sk_buff *, unsigned int,
|
||||
void *, enum drbd_notification_type) = NULL;
|
||||
void *last_arg = NULL;
|
||||
|
||||
#define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW])
|
||||
@ -1616,7 +1620,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change)
|
||||
})
|
||||
#define REMEMBER_STATE_CHANGE(func, arg, type) \
|
||||
({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \
|
||||
last_func = (typeof(last_func))func; \
|
||||
last_func = func; \
|
||||
last_arg = arg; \
|
||||
})
|
||||
|
||||
|
@ -46,19 +46,19 @@ extern void forget_state_change(struct drbd_state_change *);
|
||||
|
||||
extern int notify_resource_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_resource_state_change *,
|
||||
void *,
|
||||
enum drbd_notification_type type);
|
||||
extern int notify_connection_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_connection_state_change *,
|
||||
void *,
|
||||
enum drbd_notification_type type);
|
||||
extern int notify_device_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_device_state_change *,
|
||||
void *,
|
||||
enum drbd_notification_type type);
|
||||
extern int notify_peer_device_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_peer_device_state_change *,
|
||||
void *,
|
||||
enum drbd_notification_type type);
|
||||
|
||||
#endif /* DRBD_STATE_CHANGE_H */
|
||||
|
@ -530,14 +530,13 @@ static struct format_descr format_req;
|
||||
static char *floppy_track_buffer;
|
||||
static int max_buffer_sectors;
|
||||
|
||||
typedef void (*done_f)(int);
|
||||
static const struct cont_t {
|
||||
void (*interrupt)(void);
|
||||
/* this is called after the interrupt of the
|
||||
* main command */
|
||||
void (*redo)(void); /* this is called to retry the operation */
|
||||
void (*error)(void); /* this is called to tally an error */
|
||||
done_f done; /* this is called to say if the operation has
|
||||
void (*done)(int); /* this is called to say if the operation has
|
||||
* succeeded/failed */
|
||||
} *cont;
|
||||
|
||||
@ -985,6 +984,10 @@ static void empty(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void empty_done(int result)
|
||||
{
|
||||
}
|
||||
|
||||
static void (*floppy_work_fn)(void);
|
||||
|
||||
static void floppy_work_workfn(struct work_struct *work)
|
||||
@ -1998,14 +2001,14 @@ static const struct cont_t wakeup_cont = {
|
||||
.interrupt = empty,
|
||||
.redo = do_wakeup,
|
||||
.error = empty,
|
||||
.done = (done_f)empty
|
||||
.done = empty_done,
|
||||
};
|
||||
|
||||
static const struct cont_t intr_cont = {
|
||||
.interrupt = empty,
|
||||
.redo = process_fd_request,
|
||||
.error = empty,
|
||||
.done = (done_f)empty
|
||||
.done = empty_done,
|
||||
};
|
||||
|
||||
/* schedules handler, waiting for completion. May be interrupted, will then
|
||||
@ -4513,13 +4516,15 @@ static bool floppy_available(int drive)
|
||||
|
||||
static int floppy_alloc_disk(unsigned int drive, unsigned int type)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = 64,
|
||||
};
|
||||
struct gendisk *disk;
|
||||
|
||||
disk = blk_mq_alloc_disk(&tag_sets[drive], NULL);
|
||||
disk = blk_mq_alloc_disk(&tag_sets[drive], &lim, NULL);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
blk_queue_max_hw_sectors(disk->queue, 64);
|
||||
disk->major = FLOPPY_MAJOR;
|
||||
disk->first_minor = TOMINOR(drive) | (type << 2);
|
||||
disk->minors = 1;
|
||||
|
@ -750,12 +750,13 @@ static void loop_sysfs_exit(struct loop_device *lo)
|
||||
&loop_attribute_group);
|
||||
}
|
||||
|
||||
static void loop_config_discard(struct loop_device *lo)
|
||||
static void loop_config_discard(struct loop_device *lo,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
struct file *file = lo->lo_backing_file;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct request_queue *q = lo->lo_queue;
|
||||
u32 granularity, max_discard_sectors;
|
||||
u32 granularity = 0, max_discard_sectors = 0;
|
||||
struct kstatfs sbuf;
|
||||
|
||||
/*
|
||||
* If the backing device is a block device, mirror its zeroing
|
||||
@ -775,29 +776,17 @@ static void loop_config_discard(struct loop_device *lo)
|
||||
* We use punch hole to reclaim the free space used by the
|
||||
* image a.k.a. discard.
|
||||
*/
|
||||
} else if (!file->f_op->fallocate) {
|
||||
max_discard_sectors = 0;
|
||||
granularity = 0;
|
||||
|
||||
} else {
|
||||
struct kstatfs sbuf;
|
||||
|
||||
} else if (file->f_op->fallocate && !vfs_statfs(&file->f_path, &sbuf)) {
|
||||
max_discard_sectors = UINT_MAX >> 9;
|
||||
if (!vfs_statfs(&file->f_path, &sbuf))
|
||||
granularity = sbuf.f_bsize;
|
||||
else
|
||||
max_discard_sectors = 0;
|
||||
granularity = sbuf.f_bsize;
|
||||
}
|
||||
|
||||
if (max_discard_sectors) {
|
||||
q->limits.discard_granularity = granularity;
|
||||
blk_queue_max_discard_sectors(q, max_discard_sectors);
|
||||
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
|
||||
} else {
|
||||
q->limits.discard_granularity = 0;
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
blk_queue_max_write_zeroes_sectors(q, 0);
|
||||
}
|
||||
lim->max_hw_discard_sectors = max_discard_sectors;
|
||||
lim->max_write_zeroes_sectors = max_discard_sectors;
|
||||
if (max_discard_sectors)
|
||||
lim->discard_granularity = granularity;
|
||||
else
|
||||
lim->discard_granularity = 0;
|
||||
}
|
||||
|
||||
struct loop_worker {
|
||||
@ -986,6 +975,20 @@ loop_set_status_from_info(struct loop_device *lo,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize,
|
||||
bool update_discard_settings)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
|
||||
lim = queue_limits_start_update(lo->lo_queue);
|
||||
lim.logical_block_size = bsize;
|
||||
lim.physical_block_size = bsize;
|
||||
lim.io_min = bsize;
|
||||
if (update_discard_settings)
|
||||
loop_config_discard(lo, &lim);
|
||||
return queue_limits_commit_update(lo->lo_queue, &lim);
|
||||
}
|
||||
|
||||
static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
||||
struct block_device *bdev,
|
||||
const struct loop_config *config)
|
||||
@ -1083,11 +1086,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
||||
else
|
||||
bsize = 512;
|
||||
|
||||
blk_queue_logical_block_size(lo->lo_queue, bsize);
|
||||
blk_queue_physical_block_size(lo->lo_queue, bsize);
|
||||
blk_queue_io_min(lo->lo_queue, bsize);
|
||||
error = loop_reconfigure_limits(lo, bsize, true);
|
||||
if (WARN_ON_ONCE(error))
|
||||
goto out_unlock;
|
||||
|
||||
loop_config_discard(lo);
|
||||
loop_update_rotational(lo);
|
||||
loop_update_dio(lo);
|
||||
loop_sysfs_init(lo);
|
||||
@ -1154,9 +1156,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
|
||||
lo->lo_offset = 0;
|
||||
lo->lo_sizelimit = 0;
|
||||
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
|
||||
blk_queue_logical_block_size(lo->lo_queue, 512);
|
||||
blk_queue_physical_block_size(lo->lo_queue, 512);
|
||||
blk_queue_io_min(lo->lo_queue, 512);
|
||||
loop_reconfigure_limits(lo, 512, false);
|
||||
invalidate_disk(lo->lo_disk);
|
||||
loop_sysfs_exit(lo);
|
||||
/* let user-space know about this change */
|
||||
@ -1488,9 +1488,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
|
||||
invalidate_bdev(lo->lo_device);
|
||||
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
blk_queue_logical_block_size(lo->lo_queue, arg);
|
||||
blk_queue_physical_block_size(lo->lo_queue, arg);
|
||||
blk_queue_io_min(lo->lo_queue, arg);
|
||||
err = loop_reconfigure_limits(lo, arg, false);
|
||||
loop_update_dio(lo);
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
|
||||
@ -1982,6 +1980,12 @@ static const struct blk_mq_ops loop_mq_ops = {
|
||||
|
||||
static int loop_add(int i)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
/*
|
||||
* Random number picked from the historic block max_sectors cap.
|
||||
*/
|
||||
.max_hw_sectors = 2560u,
|
||||
};
|
||||
struct loop_device *lo;
|
||||
struct gendisk *disk;
|
||||
int err;
|
||||
@ -2025,16 +2029,13 @@ static int loop_add(int i)
|
||||
if (err)
|
||||
goto out_free_idr;
|
||||
|
||||
disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, lo);
|
||||
disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, &lim, lo);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_cleanup_tags;
|
||||
}
|
||||
lo->lo_queue = lo->lo_disk->queue;
|
||||
|
||||
/* random number picked from the history block max_sectors cap */
|
||||
blk_queue_max_hw_sectors(lo->lo_queue, 2560u);
|
||||
|
||||
/*
|
||||
* By default, we do buffer IO, so it doesn't make sense to enable
|
||||
* merge because the I/O submitted to backing file is handled page by
|
||||
|
@ -3401,6 +3401,12 @@ static const struct blk_mq_ops mtip_mq_ops = {
|
||||
*/
|
||||
static int mtip_block_initialize(struct driver_data *dd)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.physical_block_size = 4096,
|
||||
.max_hw_sectors = 0xffff,
|
||||
.max_segments = MTIP_MAX_SG,
|
||||
.max_segment_size = 0x400000,
|
||||
};
|
||||
int rv = 0, wait_for_rebuild = 0;
|
||||
sector_t capacity;
|
||||
unsigned int index = 0;
|
||||
@ -3431,7 +3437,7 @@ static int mtip_block_initialize(struct driver_data *dd)
|
||||
goto block_queue_alloc_tag_error;
|
||||
}
|
||||
|
||||
dd->disk = blk_mq_alloc_disk(&dd->tags, dd);
|
||||
dd->disk = blk_mq_alloc_disk(&dd->tags, &lim, dd);
|
||||
if (IS_ERR(dd->disk)) {
|
||||
dev_err(&dd->pdev->dev,
|
||||
"Unable to allocate request queue\n");
|
||||
@ -3481,12 +3487,7 @@ static int mtip_block_initialize(struct driver_data *dd)
|
||||
/* Set device limits. */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue);
|
||||
blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
|
||||
blk_queue_physical_block_size(dd->queue, 4096);
|
||||
blk_queue_max_hw_sectors(dd->queue, 0xffff);
|
||||
blk_queue_max_segment_size(dd->queue, 0x400000);
|
||||
dma_set_max_seg_size(&dd->pdev->dev, 0x400000);
|
||||
blk_queue_io_min(dd->queue, 4096);
|
||||
|
||||
/* Set the capacity of the device in 512 byte sectors. */
|
||||
if (!(mtip_hw_get_capacity(dd, &capacity))) {
|
||||
|
@ -114,6 +114,10 @@ static const struct block_device_operations n64cart_fops = {
|
||||
*/
|
||||
static int __init n64cart_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.physical_block_size = 4096,
|
||||
.logical_block_size = 4096,
|
||||
};
|
||||
struct gendisk *disk;
|
||||
int err = -ENOMEM;
|
||||
|
||||
@ -131,9 +135,11 @@ static int __init n64cart_probe(struct platform_device *pdev)
|
||||
if (IS_ERR(reg_base))
|
||||
return PTR_ERR(reg_base);
|
||||
|
||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out;
|
||||
}
|
||||
|
||||
disk->first_minor = 0;
|
||||
disk->flags = GENHD_FL_NO_PART;
|
||||
@ -145,8 +151,6 @@ static int __init n64cart_probe(struct platform_device *pdev)
|
||||
set_disk_ro(disk, 1);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_physical_block_size(disk->queue, 4096);
|
||||
blk_queue_logical_block_size(disk->queue, 4096);
|
||||
|
||||
err = add_disk(disk);
|
||||
if (err)
|
||||
|
@ -316,9 +316,12 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
|
||||
nsock->sent = 0;
|
||||
}
|
||||
|
||||
static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
loff_t blksize)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
int error;
|
||||
|
||||
if (!blksize)
|
||||
blksize = 1u << NBD_DEF_BLKSIZE_BITS;
|
||||
|
||||
@ -334,10 +337,16 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
if (!nbd->pid)
|
||||
return 0;
|
||||
|
||||
lim = queue_limits_start_update(nbd->disk->queue);
|
||||
if (nbd->config->flags & NBD_FLAG_SEND_TRIM)
|
||||
blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
|
||||
blk_queue_logical_block_size(nbd->disk->queue, blksize);
|
||||
blk_queue_physical_block_size(nbd->disk->queue, blksize);
|
||||
lim.max_hw_discard_sectors = UINT_MAX;
|
||||
else
|
||||
lim.max_hw_discard_sectors = 0;
|
||||
lim.logical_block_size = blksize;
|
||||
lim.physical_block_size = blksize;
|
||||
error = queue_limits_commit_update(nbd->disk->queue, &lim);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (max_part)
|
||||
set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
|
||||
@ -346,6 +355,18 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
loff_t blksize)
|
||||
{
|
||||
int error;
|
||||
|
||||
blk_mq_freeze_queue(nbd->disk->queue);
|
||||
error = __nbd_set_size(nbd, bytesize, blksize);
|
||||
blk_mq_unfreeze_queue(nbd->disk->queue);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static void nbd_complete_rq(struct request *req)
|
||||
{
|
||||
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
|
||||
@ -1351,7 +1372,6 @@ static void nbd_config_put(struct nbd_device *nbd)
|
||||
nbd->config = NULL;
|
||||
|
||||
nbd->tag_set.timeout = 0;
|
||||
blk_queue_max_discard_sectors(nbd->disk->queue, 0);
|
||||
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
nbd_put(nbd);
|
||||
@ -1783,6 +1803,12 @@ static const struct blk_mq_ops nbd_mq_ops = {
|
||||
|
||||
static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = 65536,
|
||||
.max_user_sectors = 256,
|
||||
.max_segments = USHRT_MAX,
|
||||
.max_segment_size = UINT_MAX,
|
||||
};
|
||||
struct nbd_device *nbd;
|
||||
struct gendisk *disk;
|
||||
int err = -ENOMEM;
|
||||
@ -1823,7 +1849,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
|
||||
if (err < 0)
|
||||
goto out_free_tags;
|
||||
|
||||
disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
|
||||
disk = blk_mq_alloc_disk(&nbd->tag_set, &lim, NULL);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_free_idr;
|
||||
@ -1843,11 +1869,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
|
||||
* Tell the block layer that we are not a rotational device
|
||||
*/
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_max_discard_sectors(disk->queue, 0);
|
||||
blk_queue_max_segment_size(disk->queue, UINT_MAX);
|
||||
blk_queue_max_segments(disk->queue, USHRT_MAX);
|
||||
blk_queue_max_hw_sectors(disk->queue, 65536);
|
||||
disk->queue->limits.max_sectors = 256;
|
||||
|
||||
mutex_init(&nbd->config_lock);
|
||||
refcount_set(&nbd->config_refs, 0);
|
||||
@ -2433,6 +2454,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info)
|
||||
}
|
||||
|
||||
dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST);
|
||||
if (!dev_list) {
|
||||
nlmsg_free(reply);
|
||||
ret = -EMSGSIZE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (index == -1) {
|
||||
ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
|
||||
if (ret) {
|
||||
|
@ -115,6 +115,18 @@ module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
|
||||
MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Historic queue modes.
|
||||
*
|
||||
* These days nothing but NULL_Q_MQ is actually supported, but we keep it the
|
||||
* enum for error reporting.
|
||||
*/
|
||||
enum {
|
||||
NULL_Q_BIO = 0,
|
||||
NULL_Q_RQ = 1,
|
||||
NULL_Q_MQ = 2,
|
||||
};
|
||||
|
||||
static int g_queue_mode = NULL_Q_MQ;
|
||||
|
||||
static int null_param_store_val(const char *str, int *val, int min, int max)
|
||||
@ -165,8 +177,8 @@ static bool g_blocking;
|
||||
module_param_named(blocking, g_blocking, bool, 0444);
|
||||
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
|
||||
|
||||
static bool shared_tags;
|
||||
module_param(shared_tags, bool, 0444);
|
||||
static bool g_shared_tags;
|
||||
module_param_named(shared_tags, g_shared_tags, bool, 0444);
|
||||
MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
|
||||
|
||||
static bool g_shared_tag_bitmap;
|
||||
@ -426,6 +438,7 @@ NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
|
||||
|
||||
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
|
||||
@ -571,6 +584,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
|
||||
&nullb_device_attr_zone_offline,
|
||||
&nullb_device_attr_virt_boundary,
|
||||
&nullb_device_attr_no_sched,
|
||||
&nullb_device_attr_shared_tags,
|
||||
&nullb_device_attr_shared_tag_bitmap,
|
||||
NULL,
|
||||
};
|
||||
@ -653,10 +667,11 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page)
|
||||
"badblocks,blocking,blocksize,cache_size,"
|
||||
"completion_nsec,discard,home_node,hw_queue_depth,"
|
||||
"irqmode,max_sectors,mbps,memory_backed,no_sched,"
|
||||
"poll_queues,power,queue_mode,shared_tag_bitmap,size,"
|
||||
"submit_queues,use_per_node_hctx,virt_boundary,zoned,"
|
||||
"zone_capacity,zone_max_active,zone_max_open,"
|
||||
"zone_nr_conv,zone_offline,zone_readonly,zone_size\n");
|
||||
"poll_queues,power,queue_mode,shared_tag_bitmap,"
|
||||
"shared_tags,size,submit_queues,use_per_node_hctx,"
|
||||
"virt_boundary,zoned,zone_capacity,zone_max_active,"
|
||||
"zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
|
||||
"zone_size\n");
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR_RO(memb_group_, features);
|
||||
@ -738,6 +753,7 @@ static struct nullb_device *null_alloc_dev(void)
|
||||
dev->zone_max_active = g_zone_max_active;
|
||||
dev->virt_boundary = g_virt_boundary;
|
||||
dev->no_sched = g_no_sched;
|
||||
dev->shared_tags = g_shared_tags;
|
||||
dev->shared_tag_bitmap = g_shared_tag_bitmap;
|
||||
return dev;
|
||||
}
|
||||
@ -752,98 +768,11 @@ static void null_free_dev(struct nullb_device *dev)
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
static void put_tag(struct nullb_queue *nq, unsigned int tag)
|
||||
{
|
||||
clear_bit_unlock(tag, nq->tag_map);
|
||||
|
||||
if (waitqueue_active(&nq->wait))
|
||||
wake_up(&nq->wait);
|
||||
}
|
||||
|
||||
static unsigned int get_tag(struct nullb_queue *nq)
|
||||
{
|
||||
unsigned int tag;
|
||||
|
||||
do {
|
||||
tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
|
||||
if (tag >= nq->queue_depth)
|
||||
return -1U;
|
||||
} while (test_and_set_bit_lock(tag, nq->tag_map));
|
||||
|
||||
return tag;
|
||||
}
|
||||
|
||||
static void free_cmd(struct nullb_cmd *cmd)
|
||||
{
|
||||
put_tag(cmd->nq, cmd->tag);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
|
||||
|
||||
static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
|
||||
{
|
||||
struct nullb_cmd *cmd;
|
||||
unsigned int tag;
|
||||
|
||||
tag = get_tag(nq);
|
||||
if (tag != -1U) {
|
||||
cmd = &nq->cmds[tag];
|
||||
cmd->tag = tag;
|
||||
cmd->error = BLK_STS_OK;
|
||||
cmd->nq = nq;
|
||||
if (nq->dev->irqmode == NULL_IRQ_TIMER) {
|
||||
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_REL);
|
||||
cmd->timer.function = null_cmd_timer_expired;
|
||||
}
|
||||
return cmd;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio)
|
||||
{
|
||||
struct nullb_cmd *cmd;
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
do {
|
||||
/*
|
||||
* This avoids multiple return statements, multiple calls to
|
||||
* __alloc_cmd() and a fast path call to prepare_to_wait().
|
||||
*/
|
||||
cmd = __alloc_cmd(nq);
|
||||
if (cmd) {
|
||||
cmd->bio = bio;
|
||||
return cmd;
|
||||
}
|
||||
prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
io_schedule();
|
||||
finish_wait(&nq->wait, &wait);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
static void end_cmd(struct nullb_cmd *cmd)
|
||||
{
|
||||
int queue_mode = cmd->nq->dev->queue_mode;
|
||||
|
||||
switch (queue_mode) {
|
||||
case NULL_Q_MQ:
|
||||
blk_mq_end_request(cmd->rq, cmd->error);
|
||||
return;
|
||||
case NULL_Q_BIO:
|
||||
cmd->bio->bi_status = cmd->error;
|
||||
bio_endio(cmd->bio);
|
||||
break;
|
||||
}
|
||||
|
||||
free_cmd(cmd);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
|
||||
{
|
||||
end_cmd(container_of(timer, struct nullb_cmd, timer));
|
||||
struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer);
|
||||
|
||||
blk_mq_end_request(blk_mq_rq_from_pdu(cmd), cmd->error);
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
@ -856,7 +785,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
|
||||
|
||||
static void null_complete_rq(struct request *rq)
|
||||
{
|
||||
end_cmd(blk_mq_rq_to_pdu(rq));
|
||||
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
||||
|
||||
blk_mq_end_request(rq, cmd->error);
|
||||
}
|
||||
|
||||
static struct nullb_page *null_alloc_page(void)
|
||||
@ -1273,7 +1204,7 @@ static int null_transfer(struct nullb *nullb, struct page *page,
|
||||
|
||||
static int null_handle_rq(struct nullb_cmd *cmd)
|
||||
{
|
||||
struct request *rq = cmd->rq;
|
||||
struct request *rq = blk_mq_rq_from_pdu(cmd);
|
||||
struct nullb *nullb = cmd->nq->dev->nullb;
|
||||
int err;
|
||||
unsigned int len;
|
||||
@ -1298,63 +1229,21 @@ static int null_handle_rq(struct nullb_cmd *cmd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int null_handle_bio(struct nullb_cmd *cmd)
|
||||
{
|
||||
struct bio *bio = cmd->bio;
|
||||
struct nullb *nullb = cmd->nq->dev->nullb;
|
||||
int err;
|
||||
unsigned int len;
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
|
||||
spin_lock_irq(&nullb->lock);
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
len = bvec.bv_len;
|
||||
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
|
||||
op_is_write(bio_op(bio)), sector,
|
||||
bio->bi_opf & REQ_FUA);
|
||||
if (err) {
|
||||
spin_unlock_irq(&nullb->lock);
|
||||
return err;
|
||||
}
|
||||
sector += len >> SECTOR_SHIFT;
|
||||
}
|
||||
spin_unlock_irq(&nullb->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void null_stop_queue(struct nullb *nullb)
|
||||
{
|
||||
struct request_queue *q = nullb->q;
|
||||
|
||||
if (nullb->dev->queue_mode == NULL_Q_MQ)
|
||||
blk_mq_stop_hw_queues(q);
|
||||
}
|
||||
|
||||
static void null_restart_queue_async(struct nullb *nullb)
|
||||
{
|
||||
struct request_queue *q = nullb->q;
|
||||
|
||||
if (nullb->dev->queue_mode == NULL_Q_MQ)
|
||||
blk_mq_start_stopped_hw_queues(q, true);
|
||||
}
|
||||
|
||||
static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
|
||||
{
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
struct nullb *nullb = dev->nullb;
|
||||
blk_status_t sts = BLK_STS_OK;
|
||||
struct request *rq = cmd->rq;
|
||||
struct request *rq = blk_mq_rq_from_pdu(cmd);
|
||||
|
||||
if (!hrtimer_active(&nullb->bw_timer))
|
||||
hrtimer_restart(&nullb->bw_timer);
|
||||
|
||||
if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
|
||||
null_stop_queue(nullb);
|
||||
blk_mq_stop_hw_queues(nullb->q);
|
||||
/* race with timer */
|
||||
if (atomic_long_read(&nullb->cur_bytes) > 0)
|
||||
null_restart_queue_async(nullb);
|
||||
blk_mq_start_stopped_hw_queues(nullb->q, true);
|
||||
/* requeue request */
|
||||
sts = BLK_STS_DEV_RESOURCE;
|
||||
}
|
||||
@ -1381,37 +1270,29 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
|
||||
sector_t nr_sectors)
|
||||
{
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
int err;
|
||||
|
||||
if (op == REQ_OP_DISCARD)
|
||||
return null_handle_discard(dev, sector, nr_sectors);
|
||||
return errno_to_blk_status(null_handle_rq(cmd));
|
||||
|
||||
if (dev->queue_mode == NULL_Q_BIO)
|
||||
err = null_handle_bio(cmd);
|
||||
else
|
||||
err = null_handle_rq(cmd);
|
||||
|
||||
return errno_to_blk_status(err);
|
||||
}
|
||||
|
||||
static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
|
||||
{
|
||||
struct request *rq = blk_mq_rq_from_pdu(cmd);
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
struct bio *bio;
|
||||
|
||||
if (dev->memory_backed)
|
||||
return;
|
||||
|
||||
if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
|
||||
zero_fill_bio(cmd->bio);
|
||||
} else if (req_op(cmd->rq) == REQ_OP_READ) {
|
||||
__rq_for_each_bio(bio, cmd->rq)
|
||||
if (!dev->memory_backed && req_op(rq) == REQ_OP_READ) {
|
||||
__rq_for_each_bio(bio, rq)
|
||||
zero_fill_bio(bio);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
|
||||
{
|
||||
struct request *rq = blk_mq_rq_from_pdu(cmd);
|
||||
|
||||
/*
|
||||
* Since root privileges are required to configure the null_blk
|
||||
* driver, it is fine that this driver does not initialize the
|
||||
@ -1425,20 +1306,10 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
|
||||
/* Complete IO by inline, softirq or timer */
|
||||
switch (cmd->nq->dev->irqmode) {
|
||||
case NULL_IRQ_SOFTIRQ:
|
||||
switch (cmd->nq->dev->queue_mode) {
|
||||
case NULL_Q_MQ:
|
||||
blk_mq_complete_request(cmd->rq);
|
||||
break;
|
||||
case NULL_Q_BIO:
|
||||
/*
|
||||
* XXX: no proper submitting cpu information available.
|
||||
*/
|
||||
end_cmd(cmd);
|
||||
break;
|
||||
}
|
||||
blk_mq_complete_request(rq);
|
||||
break;
|
||||
case NULL_IRQ_NONE:
|
||||
end_cmd(cmd);
|
||||
blk_mq_end_request(rq, cmd->error);
|
||||
break;
|
||||
case NULL_IRQ_TIMER:
|
||||
null_cmd_end_timer(cmd);
|
||||
@ -1499,7 +1370,7 @@ static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
|
||||
null_restart_queue_async(nullb);
|
||||
blk_mq_start_stopped_hw_queues(nullb->q, true);
|
||||
|
||||
hrtimer_forward_now(&nullb->bw_timer, timer_interval);
|
||||
|
||||
@ -1516,26 +1387,6 @@ static void nullb_setup_bwtimer(struct nullb *nullb)
|
||||
hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
|
||||
}
|
||||
|
||||
static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
|
||||
{
|
||||
int index = 0;
|
||||
|
||||
if (nullb->nr_queues != 1)
|
||||
index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
|
||||
|
||||
return &nullb->queues[index];
|
||||
}
|
||||
|
||||
static void null_submit_bio(struct bio *bio)
|
||||
{
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
sector_t nr_sectors = bio_sectors(bio);
|
||||
struct nullb *nullb = bio->bi_bdev->bd_disk->private_data;
|
||||
struct nullb_queue *nq = nullb_to_queue(nullb);
|
||||
|
||||
null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
|
||||
static bool should_timeout_request(struct request *rq)
|
||||
@ -1655,7 +1506,7 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
|
||||
blk_rq_sectors(req));
|
||||
if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error,
|
||||
blk_mq_end_request_batch))
|
||||
end_cmd(cmd);
|
||||
blk_mq_end_request(req, cmd->error);
|
||||
nr++;
|
||||
}
|
||||
|
||||
@ -1711,7 +1562,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
cmd->timer.function = null_cmd_timer_expired;
|
||||
}
|
||||
cmd->rq = rq;
|
||||
cmd->error = BLK_STS_OK;
|
||||
cmd->nq = nq;
|
||||
cmd->fake_timeout = should_timeout_request(rq) ||
|
||||
@ -1770,34 +1620,8 @@ static void null_queue_rqs(struct request **rqlist)
|
||||
*rqlist = requeue_list;
|
||||
}
|
||||
|
||||
static void cleanup_queue(struct nullb_queue *nq)
|
||||
{
|
||||
bitmap_free(nq->tag_map);
|
||||
kfree(nq->cmds);
|
||||
}
|
||||
|
||||
static void cleanup_queues(struct nullb *nullb)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nullb->nr_queues; i++)
|
||||
cleanup_queue(&nullb->queues[i]);
|
||||
|
||||
kfree(nullb->queues);
|
||||
}
|
||||
|
||||
static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
||||
{
|
||||
struct nullb_queue *nq = hctx->driver_data;
|
||||
struct nullb *nullb = nq->dev->nullb;
|
||||
|
||||
nullb->nr_queues--;
|
||||
}
|
||||
|
||||
static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
|
||||
{
|
||||
init_waitqueue_head(&nq->wait);
|
||||
nq->queue_depth = nullb->queue_depth;
|
||||
nq->dev = nullb->dev;
|
||||
INIT_LIST_HEAD(&nq->poll_list);
|
||||
spin_lock_init(&nq->poll_lock);
|
||||
@ -1815,7 +1639,6 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
|
||||
nq = &nullb->queues[hctx_idx];
|
||||
hctx->driver_data = nq;
|
||||
null_init_queue(nullb, nq);
|
||||
nullb->nr_queues++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1828,7 +1651,6 @@ static const struct blk_mq_ops null_mq_ops = {
|
||||
.poll = null_poll,
|
||||
.map_queues = null_map_queues,
|
||||
.init_hctx = null_init_hctx,
|
||||
.exit_hctx = null_exit_hctx,
|
||||
};
|
||||
|
||||
static void null_del_dev(struct nullb *nullb)
|
||||
@ -1849,21 +1671,20 @@ static void null_del_dev(struct nullb *nullb)
|
||||
if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
|
||||
hrtimer_cancel(&nullb->bw_timer);
|
||||
atomic_long_set(&nullb->cur_bytes, LONG_MAX);
|
||||
null_restart_queue_async(nullb);
|
||||
blk_mq_start_stopped_hw_queues(nullb->q, true);
|
||||
}
|
||||
|
||||
put_disk(nullb->disk);
|
||||
if (dev->queue_mode == NULL_Q_MQ &&
|
||||
nullb->tag_set == &nullb->__tag_set)
|
||||
if (nullb->tag_set == &nullb->__tag_set)
|
||||
blk_mq_free_tag_set(nullb->tag_set);
|
||||
cleanup_queues(nullb);
|
||||
kfree(nullb->queues);
|
||||
if (null_cache_active(nullb))
|
||||
null_free_device_storage(nullb->dev, true);
|
||||
kfree(nullb);
|
||||
dev->nullb = NULL;
|
||||
}
|
||||
|
||||
static void null_config_discard(struct nullb *nullb)
|
||||
static void null_config_discard(struct nullb *nullb, struct queue_limits *lim)
|
||||
{
|
||||
if (nullb->dev->discard == false)
|
||||
return;
|
||||
@ -1880,43 +1701,14 @@ static void null_config_discard(struct nullb *nullb)
|
||||
return;
|
||||
}
|
||||
|
||||
blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
|
||||
lim->max_hw_discard_sectors = UINT_MAX >> 9;
|
||||
}
|
||||
|
||||
static const struct block_device_operations null_bio_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.submit_bio = null_submit_bio,
|
||||
.report_zones = null_report_zones,
|
||||
};
|
||||
|
||||
static const struct block_device_operations null_rq_ops = {
|
||||
static const struct block_device_operations null_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.report_zones = null_report_zones,
|
||||
};
|
||||
|
||||
static int setup_commands(struct nullb_queue *nq)
|
||||
{
|
||||
struct nullb_cmd *cmd;
|
||||
int i;
|
||||
|
||||
nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
|
||||
if (!nq->cmds)
|
||||
return -ENOMEM;
|
||||
|
||||
nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL);
|
||||
if (!nq->tag_map) {
|
||||
kfree(nq->cmds);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < nq->queue_depth; i++) {
|
||||
cmd = &nq->cmds[i];
|
||||
cmd->tag = -1U;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_queues(struct nullb *nullb)
|
||||
{
|
||||
int nqueues = nr_cpu_ids;
|
||||
@ -1929,101 +1721,66 @@ static int setup_queues(struct nullb *nullb)
|
||||
if (!nullb->queues)
|
||||
return -ENOMEM;
|
||||
|
||||
nullb->queue_depth = nullb->dev->hw_queue_depth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_driver_queues(struct nullb *nullb)
|
||||
static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues)
|
||||
{
|
||||
struct nullb_queue *nq;
|
||||
int i, ret = 0;
|
||||
|
||||
for (i = 0; i < nullb->dev->submit_queues; i++) {
|
||||
nq = &nullb->queues[i];
|
||||
|
||||
null_init_queue(nullb, nq);
|
||||
|
||||
ret = setup_commands(nq);
|
||||
if (ret)
|
||||
return ret;
|
||||
nullb->nr_queues++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int null_gendisk_register(struct nullb *nullb)
|
||||
{
|
||||
sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
|
||||
struct gendisk *disk = nullb->disk;
|
||||
|
||||
set_capacity(disk, size);
|
||||
|
||||
disk->major = null_major;
|
||||
disk->first_minor = nullb->index;
|
||||
disk->minors = 1;
|
||||
if (queue_is_mq(nullb->q))
|
||||
disk->fops = &null_rq_ops;
|
||||
else
|
||||
disk->fops = &null_bio_ops;
|
||||
disk->private_data = nullb;
|
||||
strscpy_pad(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
|
||||
|
||||
if (nullb->dev->zoned) {
|
||||
int ret = null_register_zoned_dev(nullb);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return add_disk(disk);
|
||||
}
|
||||
|
||||
static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
|
||||
{
|
||||
unsigned int flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
int hw_queues, numa_node;
|
||||
unsigned int queue_depth;
|
||||
int poll_queues;
|
||||
|
||||
if (nullb) {
|
||||
hw_queues = nullb->dev->submit_queues;
|
||||
poll_queues = nullb->dev->poll_queues;
|
||||
queue_depth = nullb->dev->hw_queue_depth;
|
||||
numa_node = nullb->dev->home_node;
|
||||
if (nullb->dev->no_sched)
|
||||
flags |= BLK_MQ_F_NO_SCHED;
|
||||
if (nullb->dev->shared_tag_bitmap)
|
||||
flags |= BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
if (nullb->dev->blocking)
|
||||
flags |= BLK_MQ_F_BLOCKING;
|
||||
} else {
|
||||
hw_queues = g_submit_queues;
|
||||
poll_queues = g_poll_queues;
|
||||
queue_depth = g_hw_queue_depth;
|
||||
numa_node = g_home_node;
|
||||
if (g_no_sched)
|
||||
flags |= BLK_MQ_F_NO_SCHED;
|
||||
if (g_shared_tag_bitmap)
|
||||
flags |= BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
if (g_blocking)
|
||||
flags |= BLK_MQ_F_BLOCKING;
|
||||
}
|
||||
|
||||
set->ops = &null_mq_ops;
|
||||
set->cmd_size = sizeof(struct nullb_cmd);
|
||||
set->flags = flags;
|
||||
set->driver_data = nullb;
|
||||
set->nr_hw_queues = hw_queues;
|
||||
set->queue_depth = queue_depth;
|
||||
set->numa_node = numa_node;
|
||||
set->cmd_size = sizeof(struct nullb_cmd);
|
||||
set->timeout = 5 * HZ;
|
||||
set->nr_maps = 1;
|
||||
if (poll_queues) {
|
||||
set->nr_hw_queues += poll_queues;
|
||||
set->nr_maps = 3;
|
||||
} else {
|
||||
set->nr_maps = 1;
|
||||
set->nr_maps += 2;
|
||||
}
|
||||
return blk_mq_alloc_tag_set(set);
|
||||
}
|
||||
|
||||
static int null_init_global_tag_set(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (tag_set.ops)
|
||||
return 0;
|
||||
|
||||
tag_set.nr_hw_queues = g_submit_queues;
|
||||
tag_set.queue_depth = g_hw_queue_depth;
|
||||
tag_set.numa_node = g_home_node;
|
||||
tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
if (g_no_sched)
|
||||
tag_set.flags |= BLK_MQ_F_NO_SCHED;
|
||||
if (g_shared_tag_bitmap)
|
||||
tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
if (g_blocking)
|
||||
tag_set.flags |= BLK_MQ_F_BLOCKING;
|
||||
|
||||
error = null_init_tag_set(&tag_set, g_poll_queues);
|
||||
if (error)
|
||||
tag_set.ops = NULL;
|
||||
return error;
|
||||
}
|
||||
|
||||
static int null_setup_tagset(struct nullb *nullb)
|
||||
{
|
||||
if (nullb->dev->shared_tags) {
|
||||
nullb->tag_set = &tag_set;
|
||||
return null_init_global_tag_set();
|
||||
}
|
||||
|
||||
return blk_mq_alloc_tag_set(set);
|
||||
nullb->tag_set = &nullb->__tag_set;
|
||||
nullb->tag_set->driver_data = nullb;
|
||||
nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues;
|
||||
nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth;
|
||||
nullb->tag_set->numa_node = nullb->dev->home_node;
|
||||
nullb->tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
if (nullb->dev->no_sched)
|
||||
nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED;
|
||||
if (nullb->dev->shared_tag_bitmap)
|
||||
nullb->tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
if (nullb->dev->blocking)
|
||||
nullb->tag_set->flags |= BLK_MQ_F_BLOCKING;
|
||||
return null_init_tag_set(nullb->tag_set, nullb->dev->poll_queues);
|
||||
}
|
||||
|
||||
static int null_validate_conf(struct nullb_device *dev)
|
||||
@ -2032,11 +1789,15 @@ static int null_validate_conf(struct nullb_device *dev)
|
||||
pr_err("legacy IO path is no longer available\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (dev->queue_mode == NULL_Q_BIO) {
|
||||
pr_err("BIO-based IO path is no longer available, using blk-mq instead.\n");
|
||||
dev->queue_mode = NULL_Q_MQ;
|
||||
}
|
||||
|
||||
dev->blocksize = round_down(dev->blocksize, 512);
|
||||
dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
|
||||
|
||||
if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
|
||||
if (dev->use_per_node_hctx) {
|
||||
if (dev->submit_queues != nr_online_nodes)
|
||||
dev->submit_queues = nr_online_nodes;
|
||||
} else if (dev->submit_queues > nr_cpu_ids)
|
||||
@ -2048,8 +1809,6 @@ static int null_validate_conf(struct nullb_device *dev)
|
||||
if (dev->poll_queues > g_poll_queues)
|
||||
dev->poll_queues = g_poll_queues;
|
||||
dev->prev_poll_queues = dev->poll_queues;
|
||||
|
||||
dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
|
||||
dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
|
||||
|
||||
/* Do memory allocation, so set blocking */
|
||||
@ -2060,9 +1819,6 @@ static int null_validate_conf(struct nullb_device *dev)
|
||||
dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
|
||||
dev->cache_size);
|
||||
dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
|
||||
/* can not stop a queue */
|
||||
if (dev->queue_mode == NULL_Q_BIO)
|
||||
dev->mbps = 0;
|
||||
|
||||
if (dev->zoned &&
|
||||
(!dev->zone_size || !is_power_of_2(dev->zone_size))) {
|
||||
@ -2102,6 +1858,12 @@ static bool null_setup_fault(void)
|
||||
|
||||
static int null_add_dev(struct nullb_device *dev)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = dev->blocksize,
|
||||
.physical_block_size = dev->blocksize,
|
||||
.max_hw_sectors = dev->max_sectors,
|
||||
};
|
||||
|
||||
struct nullb *nullb;
|
||||
int rv;
|
||||
|
||||
@ -2123,37 +1885,26 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
if (rv)
|
||||
goto out_free_nullb;
|
||||
|
||||
if (dev->queue_mode == NULL_Q_MQ) {
|
||||
if (shared_tags) {
|
||||
nullb->tag_set = &tag_set;
|
||||
rv = 0;
|
||||
} else {
|
||||
nullb->tag_set = &nullb->__tag_set;
|
||||
rv = null_init_tag_set(nullb, nullb->tag_set);
|
||||
}
|
||||
rv = null_setup_tagset(nullb);
|
||||
if (rv)
|
||||
goto out_cleanup_queues;
|
||||
|
||||
if (dev->virt_boundary)
|
||||
lim.virt_boundary_mask = PAGE_SIZE - 1;
|
||||
null_config_discard(nullb, &lim);
|
||||
if (dev->zoned) {
|
||||
rv = null_init_zoned_dev(dev, &lim);
|
||||
if (rv)
|
||||
goto out_cleanup_queues;
|
||||
|
||||
nullb->tag_set->timeout = 5 * HZ;
|
||||
nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb);
|
||||
if (IS_ERR(nullb->disk)) {
|
||||
rv = PTR_ERR(nullb->disk);
|
||||
goto out_cleanup_tags;
|
||||
}
|
||||
nullb->q = nullb->disk->queue;
|
||||
} else if (dev->queue_mode == NULL_Q_BIO) {
|
||||
rv = -ENOMEM;
|
||||
nullb->disk = blk_alloc_disk(nullb->dev->home_node);
|
||||
if (!nullb->disk)
|
||||
goto out_cleanup_queues;
|
||||
|
||||
nullb->q = nullb->disk->queue;
|
||||
rv = init_driver_queues(nullb);
|
||||
if (rv)
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
|
||||
nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb);
|
||||
if (IS_ERR(nullb->disk)) {
|
||||
rv = PTR_ERR(nullb->disk);
|
||||
goto out_cleanup_zone;
|
||||
}
|
||||
nullb->q = nullb->disk->queue;
|
||||
|
||||
if (dev->mbps) {
|
||||
set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
|
||||
nullb_setup_bwtimer(nullb);
|
||||
@ -2164,12 +1915,6 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
blk_queue_write_cache(nullb->q, true, true);
|
||||
}
|
||||
|
||||
if (dev->zoned) {
|
||||
rv = null_init_zoned_dev(dev, nullb->q);
|
||||
if (rv)
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
|
||||
nullb->q->queuedata = nullb;
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
|
||||
|
||||
@ -2177,22 +1922,12 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
|
||||
if (rv < 0) {
|
||||
mutex_unlock(&lock);
|
||||
goto out_cleanup_zone;
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
nullb->index = rv;
|
||||
dev->index = rv;
|
||||
mutex_unlock(&lock);
|
||||
|
||||
blk_queue_logical_block_size(nullb->q, dev->blocksize);
|
||||
blk_queue_physical_block_size(nullb->q, dev->blocksize);
|
||||
if (dev->max_sectors)
|
||||
blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
|
||||
|
||||
if (dev->virt_boundary)
|
||||
blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1);
|
||||
|
||||
null_config_discard(nullb);
|
||||
|
||||
if (config_item_name(&dev->group.cg_item)) {
|
||||
/* Use configfs dir name as the device name */
|
||||
snprintf(nullb->disk_name, sizeof(nullb->disk_name),
|
||||
@ -2201,7 +1936,22 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
sprintf(nullb->disk_name, "nullb%d", nullb->index);
|
||||
}
|
||||
|
||||
rv = null_gendisk_register(nullb);
|
||||
set_capacity(nullb->disk,
|
||||
((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT);
|
||||
nullb->disk->major = null_major;
|
||||
nullb->disk->first_minor = nullb->index;
|
||||
nullb->disk->minors = 1;
|
||||
nullb->disk->fops = &null_ops;
|
||||
nullb->disk->private_data = nullb;
|
||||
strscpy_pad(nullb->disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
|
||||
|
||||
if (nullb->dev->zoned) {
|
||||
rv = null_register_zoned_dev(nullb);
|
||||
if (rv)
|
||||
goto out_ida_free;
|
||||
}
|
||||
|
||||
rv = add_disk(nullb->disk);
|
||||
if (rv)
|
||||
goto out_ida_free;
|
||||
|
||||
@ -2220,10 +1970,10 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
out_cleanup_disk:
|
||||
put_disk(nullb->disk);
|
||||
out_cleanup_tags:
|
||||
if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
|
||||
if (nullb->tag_set == &nullb->__tag_set)
|
||||
blk_mq_free_tag_set(nullb->tag_set);
|
||||
out_cleanup_queues:
|
||||
cleanup_queues(nullb);
|
||||
kfree(nullb->queues);
|
||||
out_free_nullb:
|
||||
kfree(nullb);
|
||||
dev->nullb = NULL;
|
||||
@ -2299,7 +2049,7 @@ static int __init null_init(void)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
|
||||
if (g_use_per_node_hctx) {
|
||||
if (g_submit_queues != nr_online_nodes) {
|
||||
pr_warn("submit_queues param is set to %u.\n",
|
||||
nr_online_nodes);
|
||||
@ -2311,18 +2061,12 @@ static int __init null_init(void)
|
||||
g_submit_queues = 1;
|
||||
}
|
||||
|
||||
if (g_queue_mode == NULL_Q_MQ && shared_tags) {
|
||||
ret = null_init_tag_set(NULL, &tag_set);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
config_group_init(&nullb_subsys.su_group);
|
||||
mutex_init(&nullb_subsys.su_mutex);
|
||||
|
||||
ret = configfs_register_subsystem(&nullb_subsys);
|
||||
if (ret)
|
||||
goto err_tagset;
|
||||
return ret;
|
||||
|
||||
mutex_init(&lock);
|
||||
|
||||
@ -2349,9 +2093,6 @@ static int __init null_init(void)
|
||||
unregister_blkdev(null_major, "nullb");
|
||||
err_conf:
|
||||
configfs_unregister_subsystem(&nullb_subsys);
|
||||
err_tagset:
|
||||
if (g_queue_mode == NULL_Q_MQ && shared_tags)
|
||||
blk_mq_free_tag_set(&tag_set);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2370,7 +2111,7 @@ static void __exit null_exit(void)
|
||||
}
|
||||
mutex_unlock(&lock);
|
||||
|
||||
if (g_queue_mode == NULL_Q_MQ && shared_tags)
|
||||
if (tag_set.ops)
|
||||
blk_mq_free_tag_set(&tag_set);
|
||||
}
|
||||
|
||||
|
@ -16,11 +16,6 @@
|
||||
#include <linux/mutex.h>
|
||||
|
||||
struct nullb_cmd {
|
||||
union {
|
||||
struct request *rq;
|
||||
struct bio *bio;
|
||||
};
|
||||
unsigned int tag;
|
||||
blk_status_t error;
|
||||
bool fake_timeout;
|
||||
struct nullb_queue *nq;
|
||||
@ -28,16 +23,11 @@ struct nullb_cmd {
|
||||
};
|
||||
|
||||
struct nullb_queue {
|
||||
unsigned long *tag_map;
|
||||
wait_queue_head_t wait;
|
||||
unsigned int queue_depth;
|
||||
struct nullb_device *dev;
|
||||
unsigned int requeue_selection;
|
||||
|
||||
struct list_head poll_list;
|
||||
spinlock_t poll_lock;
|
||||
|
||||
struct nullb_cmd *cmds;
|
||||
};
|
||||
|
||||
struct nullb_zone {
|
||||
@ -60,13 +50,6 @@ struct nullb_zone {
|
||||
unsigned int capacity;
|
||||
};
|
||||
|
||||
/* Queue modes */
|
||||
enum {
|
||||
NULL_Q_BIO = 0,
|
||||
NULL_Q_RQ = 1,
|
||||
NULL_Q_MQ = 2,
|
||||
};
|
||||
|
||||
struct nullb_device {
|
||||
struct nullb *nullb;
|
||||
struct config_group group;
|
||||
@ -119,6 +102,7 @@ struct nullb_device {
|
||||
bool zoned; /* if device is zoned */
|
||||
bool virt_boundary; /* virtual boundary on/off for the device */
|
||||
bool no_sched; /* no IO scheduler for the device */
|
||||
bool shared_tags; /* share tag set between devices for blk-mq */
|
||||
bool shared_tag_bitmap; /* use hostwide shared tags */
|
||||
};
|
||||
|
||||
@ -130,14 +114,12 @@ struct nullb {
|
||||
struct gendisk *disk;
|
||||
struct blk_mq_tag_set *tag_set;
|
||||
struct blk_mq_tag_set __tag_set;
|
||||
unsigned int queue_depth;
|
||||
atomic_long_t cur_bytes;
|
||||
struct hrtimer bw_timer;
|
||||
unsigned long cache_flush_pos;
|
||||
spinlock_t lock;
|
||||
|
||||
struct nullb_queue *queues;
|
||||
unsigned int nr_queues;
|
||||
char disk_name[DISK_NAME_LEN];
|
||||
};
|
||||
|
||||
@ -147,7 +129,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
|
||||
sector_t sector, unsigned int nr_sectors);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
|
||||
int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim);
|
||||
int null_register_zoned_dev(struct nullb *nullb);
|
||||
void null_free_zoned_dev(struct nullb_device *dev);
|
||||
int null_report_zones(struct gendisk *disk, sector_t sector,
|
||||
@ -160,7 +142,7 @@ ssize_t zone_cond_store(struct nullb_device *dev, const char *page,
|
||||
size_t count, enum blk_zone_cond cond);
|
||||
#else
|
||||
static inline int null_init_zoned_dev(struct nullb_device *dev,
|
||||
struct request_queue *q)
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
pr_err("CONFIG_BLK_DEV_ZONED not enabled\n");
|
||||
return -EINVAL;
|
||||
|
@ -41,10 +41,11 @@ TRACE_EVENT(nullb_zone_op,
|
||||
__field(unsigned int, zone_cond)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->op = req_op(cmd->rq);
|
||||
__entry->op = req_op(blk_mq_rq_from_pdu(cmd));
|
||||
__entry->zone_no = zone_no;
|
||||
__entry->zone_cond = zone_cond;
|
||||
__assign_disk_name(__entry->disk, cmd->rq->q->disk);
|
||||
__assign_disk_name(__entry->disk,
|
||||
blk_mq_rq_from_pdu(cmd)->q->disk);
|
||||
),
|
||||
TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
|
||||
__print_disk_name(__entry->disk),
|
||||
|
@ -58,7 +58,8 @@ static inline void null_unlock_zone(struct nullb_device *dev,
|
||||
mutex_unlock(&zone->mutex);
|
||||
}
|
||||
|
||||
int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
|
||||
int null_init_zoned_dev(struct nullb_device *dev,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
sector_t dev_capacity_sects, zone_capacity_sects;
|
||||
struct nullb_zone *zone;
|
||||
@ -151,27 +152,22 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
|
||||
sector += dev->zone_size_sects;
|
||||
}
|
||||
|
||||
lim->zoned = true;
|
||||
lim->chunk_sectors = dev->zone_size_sects;
|
||||
lim->max_zone_append_sectors = dev->zone_size_sects;
|
||||
lim->max_open_zones = dev->zone_max_open;
|
||||
lim->max_active_zones = dev->zone_max_active;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int null_register_zoned_dev(struct nullb *nullb)
|
||||
{
|
||||
struct nullb_device *dev = nullb->dev;
|
||||
struct request_queue *q = nullb->q;
|
||||
|
||||
disk_set_zoned(nullb->disk);
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
|
||||
blk_queue_chunk_sectors(q, dev->zone_size_sects);
|
||||
nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
|
||||
blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
|
||||
disk_set_max_open_zones(nullb->disk, dev->zone_max_open);
|
||||
disk_set_max_active_zones(nullb->disk, dev->zone_max_active);
|
||||
|
||||
if (queue_is_mq(q))
|
||||
return blk_revalidate_disk_zones(nullb->disk, NULL);
|
||||
|
||||
return 0;
|
||||
return blk_revalidate_disk_zones(nullb->disk, NULL);
|
||||
}
|
||||
|
||||
void null_free_zoned_dev(struct nullb_device *dev)
|
||||
@ -394,10 +390,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
*/
|
||||
if (append) {
|
||||
sector = zone->wp;
|
||||
if (dev->queue_mode == NULL_Q_MQ)
|
||||
cmd->rq->__sector = sector;
|
||||
else
|
||||
cmd->bio->bi_iter.bi_sector = sector;
|
||||
blk_mq_rq_from_pdu(cmd)->__sector = sector;
|
||||
} else if (sector != zone->wp) {
|
||||
ret = BLK_STS_IOERR;
|
||||
goto unlock;
|
||||
|
@ -828,6 +828,12 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
|
||||
*/
|
||||
static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* Some CDRW drives can not handle writes larger than one packet,
|
||||
* even if the size is a multiple of the packet size.
|
||||
*/
|
||||
bio->bi_opf |= REQ_NOMERGE;
|
||||
|
||||
spin_lock(&pd->iosched.lock);
|
||||
if (bio_data_dir(bio) == READ)
|
||||
bio_list_add(&pd->iosched.read_queue, bio);
|
||||
@ -2191,11 +2197,6 @@ static int pkt_open_dev(struct pktcdvd_device *pd, bool write)
|
||||
ret = pkt_open_write(pd);
|
||||
if (ret)
|
||||
goto out_putdev;
|
||||
/*
|
||||
* Some CDRW drives can not handle writes larger than one packet,
|
||||
* even if the size is a multiple of the packet size.
|
||||
*/
|
||||
blk_queue_max_hw_sectors(q, pd->settings.size);
|
||||
set_bit(PACKET_WRITABLE, &pd->flags);
|
||||
} else {
|
||||
pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
|
||||
@ -2338,9 +2339,9 @@ static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
|
||||
pkt_queue_bio(pd, cloned_bio);
|
||||
}
|
||||
|
||||
static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
|
||||
static void pkt_make_request_write(struct bio *bio)
|
||||
{
|
||||
struct pktcdvd_device *pd = q->queuedata;
|
||||
struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->private_data;
|
||||
sector_t zone;
|
||||
struct packet_data *pkt;
|
||||
int was_empty, blocked_bio;
|
||||
@ -2432,7 +2433,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
|
||||
|
||||
static void pkt_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
|
||||
struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->private_data;
|
||||
struct device *ddev = disk_to_dev(pd->disk);
|
||||
struct bio *split;
|
||||
|
||||
@ -2476,7 +2477,7 @@ static void pkt_submit_bio(struct bio *bio)
|
||||
split = bio;
|
||||
}
|
||||
|
||||
pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split);
|
||||
pkt_make_request_write(split);
|
||||
} while (split != bio);
|
||||
|
||||
return;
|
||||
@ -2484,15 +2485,6 @@ static void pkt_submit_bio(struct bio *bio)
|
||||
bio_io_error(bio);
|
||||
}
|
||||
|
||||
static void pkt_init_queue(struct pktcdvd_device *pd)
|
||||
{
|
||||
struct request_queue *q = pd->disk->queue;
|
||||
|
||||
blk_queue_logical_block_size(q, CD_FRAMESIZE);
|
||||
blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
|
||||
q->queuedata = pd;
|
||||
}
|
||||
|
||||
static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
|
||||
{
|
||||
struct device *ddev = disk_to_dev(pd->disk);
|
||||
@ -2536,8 +2528,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
|
||||
pd->bdev_file = bdev_file;
|
||||
set_blocksize(file_bdev(bdev_file), CD_FRAMESIZE);
|
||||
|
||||
pkt_init_queue(pd);
|
||||
|
||||
atomic_set(&pd->cdrw.pending_bios, 0);
|
||||
pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->disk->disk_name);
|
||||
if (IS_ERR(pd->cdrw.thread)) {
|
||||
@ -2634,6 +2624,10 @@ static const struct block_device_operations pktcdvd_ops = {
|
||||
*/
|
||||
static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = PACKET_MAX_SECTORS,
|
||||
.logical_block_size = CD_FRAMESIZE,
|
||||
};
|
||||
int idx;
|
||||
int ret = -ENOMEM;
|
||||
struct pktcdvd_device *pd;
|
||||
@ -2673,10 +2667,11 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
|
||||
pd->write_congestion_on = write_congestion_on;
|
||||
pd->write_congestion_off = write_congestion_off;
|
||||
|
||||
ret = -ENOMEM;
|
||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(disk)) {
|
||||
ret = PTR_ERR(disk);
|
||||
goto out_mem;
|
||||
}
|
||||
pd->disk = disk;
|
||||
disk->major = pktdev_major;
|
||||
disk->first_minor = idx;
|
||||
|
@ -382,6 +382,14 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
||||
struct ps3disk_private *priv;
|
||||
int error;
|
||||
unsigned int devidx;
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = dev->blk_size,
|
||||
.max_hw_sectors = dev->bounce_size >> 9,
|
||||
.max_segments = -1,
|
||||
.max_segment_size = dev->bounce_size,
|
||||
.dma_alignment = dev->blk_size - 1,
|
||||
};
|
||||
|
||||
struct request_queue *queue;
|
||||
struct gendisk *gendisk;
|
||||
|
||||
@ -431,7 +439,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
||||
if (error)
|
||||
goto fail_teardown;
|
||||
|
||||
gendisk = blk_mq_alloc_disk(&priv->tag_set, dev);
|
||||
gendisk = blk_mq_alloc_disk(&priv->tag_set, &lim, dev);
|
||||
if (IS_ERR(gendisk)) {
|
||||
dev_err(&dev->sbd.core, "%s:%u: blk_mq_alloc_disk failed\n",
|
||||
__func__, __LINE__);
|
||||
@ -441,15 +449,8 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
||||
|
||||
queue = gendisk->queue;
|
||||
|
||||
blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
|
||||
blk_queue_dma_alignment(queue, dev->blk_size-1);
|
||||
blk_queue_logical_block_size(queue, dev->blk_size);
|
||||
|
||||
blk_queue_write_cache(queue, true, false);
|
||||
|
||||
blk_queue_max_segments(queue, -1);
|
||||
blk_queue_max_segment_size(queue, dev->bounce_size);
|
||||
|
||||
priv->gendisk = gendisk;
|
||||
gendisk->major = ps3disk_major;
|
||||
gendisk->first_minor = devidx * PS3DISK_MINORS;
|
||||
|
@ -730,10 +730,10 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
|
||||
|
||||
ps3vram_proc_init(dev);
|
||||
|
||||
gendisk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!gendisk) {
|
||||
gendisk = blk_alloc_disk(NULL, NUMA_NO_NODE);
|
||||
if (IS_ERR(gendisk)) {
|
||||
dev_err(&dev->core, "blk_alloc_disk failed\n");
|
||||
error = -ENOMEM;
|
||||
error = PTR_ERR(gendisk);
|
||||
goto out_cache_cleanup;
|
||||
}
|
||||
|
||||
|
@ -575,7 +575,7 @@ static const struct attribute_group rbd_bus_group = {
|
||||
};
|
||||
__ATTRIBUTE_GROUPS(rbd_bus);
|
||||
|
||||
static struct bus_type rbd_bus_type = {
|
||||
static const struct bus_type rbd_bus_type = {
|
||||
.name = "rbd",
|
||||
.bus_groups = rbd_bus_groups,
|
||||
};
|
||||
@ -4952,6 +4952,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
struct request_queue *q;
|
||||
unsigned int objset_bytes =
|
||||
rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = objset_bytes >> SECTOR_SHIFT,
|
||||
.max_user_sectors = objset_bytes >> SECTOR_SHIFT,
|
||||
.io_min = rbd_dev->opts->alloc_size,
|
||||
.io_opt = rbd_dev->opts->alloc_size,
|
||||
.max_segments = USHRT_MAX,
|
||||
.max_segment_size = UINT_MAX,
|
||||
};
|
||||
int err;
|
||||
|
||||
memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
|
||||
@ -4966,7 +4974,13 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
disk = blk_mq_alloc_disk(&rbd_dev->tag_set, rbd_dev);
|
||||
if (rbd_dev->opts->trim) {
|
||||
lim.discard_granularity = rbd_dev->opts->alloc_size;
|
||||
lim.max_hw_discard_sectors = objset_bytes >> SECTOR_SHIFT;
|
||||
lim.max_write_zeroes_sectors = objset_bytes >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_tag_set;
|
||||
@ -4987,19 +5001,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
|
||||
|
||||
blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT);
|
||||
q->limits.max_sectors = queue_max_hw_sectors(q);
|
||||
blk_queue_max_segments(q, USHRT_MAX);
|
||||
blk_queue_max_segment_size(q, UINT_MAX);
|
||||
blk_queue_io_min(q, rbd_dev->opts->alloc_size);
|
||||
blk_queue_io_opt(q, rbd_dev->opts->alloc_size);
|
||||
|
||||
if (rbd_dev->opts->trim) {
|
||||
q->limits.discard_granularity = rbd_dev->opts->alloc_size;
|
||||
blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
|
||||
blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
|
||||
}
|
||||
|
||||
if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
|
||||
|
||||
|
@ -1329,43 +1329,6 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
|
||||
}
|
||||
}
|
||||
|
||||
static void setup_request_queue(struct rnbd_clt_dev *dev,
|
||||
struct rnbd_msg_open_rsp *rsp)
|
||||
{
|
||||
blk_queue_logical_block_size(dev->queue,
|
||||
le16_to_cpu(rsp->logical_block_size));
|
||||
blk_queue_physical_block_size(dev->queue,
|
||||
le16_to_cpu(rsp->physical_block_size));
|
||||
blk_queue_max_hw_sectors(dev->queue,
|
||||
dev->sess->max_io_size / SECTOR_SIZE);
|
||||
|
||||
/*
|
||||
* we don't support discards to "discontiguous" segments
|
||||
* in on request
|
||||
*/
|
||||
blk_queue_max_discard_segments(dev->queue, 1);
|
||||
|
||||
blk_queue_max_discard_sectors(dev->queue,
|
||||
le32_to_cpu(rsp->max_discard_sectors));
|
||||
dev->queue->limits.discard_granularity =
|
||||
le32_to_cpu(rsp->discard_granularity);
|
||||
dev->queue->limits.discard_alignment =
|
||||
le32_to_cpu(rsp->discard_alignment);
|
||||
if (le16_to_cpu(rsp->secure_discard))
|
||||
blk_queue_max_secure_erase_sectors(dev->queue,
|
||||
le32_to_cpu(rsp->max_discard_sectors));
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
|
||||
blk_queue_max_segments(dev->queue, dev->sess->max_segments);
|
||||
blk_queue_io_opt(dev->queue, dev->sess->max_io_size);
|
||||
blk_queue_virt_boundary(dev->queue, SZ_4K - 1);
|
||||
blk_queue_write_cache(dev->queue,
|
||||
!!(rsp->cache_policy & RNBD_WRITEBACK),
|
||||
!!(rsp->cache_policy & RNBD_FUA));
|
||||
blk_queue_max_write_zeroes_sectors(dev->queue,
|
||||
le32_to_cpu(rsp->max_write_zeroes_sectors));
|
||||
}
|
||||
|
||||
static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
|
||||
struct rnbd_msg_open_rsp *rsp, int idx)
|
||||
{
|
||||
@ -1403,18 +1366,41 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
|
||||
static int rnbd_client_setup_device(struct rnbd_clt_dev *dev,
|
||||
struct rnbd_msg_open_rsp *rsp)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = le16_to_cpu(rsp->logical_block_size),
|
||||
.physical_block_size = le16_to_cpu(rsp->physical_block_size),
|
||||
.io_opt = dev->sess->max_io_size,
|
||||
.max_hw_sectors = dev->sess->max_io_size / SECTOR_SIZE,
|
||||
.max_hw_discard_sectors = le32_to_cpu(rsp->max_discard_sectors),
|
||||
.discard_granularity = le32_to_cpu(rsp->discard_granularity),
|
||||
.discard_alignment = le32_to_cpu(rsp->discard_alignment),
|
||||
.max_segments = dev->sess->max_segments,
|
||||
.virt_boundary_mask = SZ_4K - 1,
|
||||
.max_write_zeroes_sectors =
|
||||
le32_to_cpu(rsp->max_write_zeroes_sectors),
|
||||
};
|
||||
int idx = dev->clt_device_id;
|
||||
|
||||
dev->size = le64_to_cpu(rsp->nsectors) *
|
||||
le16_to_cpu(rsp->logical_block_size);
|
||||
|
||||
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev);
|
||||
if (rsp->secure_discard) {
|
||||
lim.max_secure_erase_sectors =
|
||||
le32_to_cpu(rsp->max_discard_sectors);
|
||||
}
|
||||
|
||||
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, &lim, dev);
|
||||
if (IS_ERR(dev->gd))
|
||||
return PTR_ERR(dev->gd);
|
||||
dev->queue = dev->gd->queue;
|
||||
rnbd_init_mq_hw_queues(dev);
|
||||
|
||||
setup_request_queue(dev, rsp);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
|
||||
blk_queue_write_cache(dev->queue,
|
||||
!!(rsp->cache_policy & RNBD_WRITEBACK),
|
||||
!!(rsp->cache_policy & RNBD_FUA));
|
||||
|
||||
return rnbd_clt_setup_gen_disk(dev, rsp, idx);
|
||||
}
|
||||
|
||||
|
@ -784,6 +784,14 @@ static const struct blk_mq_ops vdc_mq_ops = {
|
||||
|
||||
static int probe_disk(struct vdc_port *port)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.physical_block_size = port->vdisk_phys_blksz,
|
||||
.max_hw_sectors = port->max_xfer_size,
|
||||
/* Each segment in a request is up to an aligned page in size. */
|
||||
.seg_boundary_mask = PAGE_SIZE - 1,
|
||||
.max_segment_size = PAGE_SIZE,
|
||||
.max_segments = port->ring_cookies,
|
||||
};
|
||||
struct request_queue *q;
|
||||
struct gendisk *g;
|
||||
int err;
|
||||
@ -824,7 +832,7 @@ static int probe_disk(struct vdc_port *port)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
g = blk_mq_alloc_disk(&port->tag_set, port);
|
||||
g = blk_mq_alloc_disk(&port->tag_set, &lim, port);
|
||||
if (IS_ERR(g)) {
|
||||
printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
|
||||
port->vio.name);
|
||||
@ -835,12 +843,6 @@ static int probe_disk(struct vdc_port *port)
|
||||
port->disk = g;
|
||||
q = g->queue;
|
||||
|
||||
/* Each segment in a request is up to an aligned page in size. */
|
||||
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
||||
blk_queue_max_segment_size(q, PAGE_SIZE);
|
||||
|
||||
blk_queue_max_segments(q, port->ring_cookies);
|
||||
blk_queue_max_hw_sectors(q, port->max_xfer_size);
|
||||
g->major = vdc_major;
|
||||
g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
|
||||
g->minors = 1 << PARTITION_SHIFT;
|
||||
@ -872,8 +874,6 @@ static int probe_disk(struct vdc_port *port)
|
||||
}
|
||||
}
|
||||
|
||||
blk_queue_physical_block_size(q, port->vdisk_phys_blksz);
|
||||
|
||||
pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n",
|
||||
g->disk_name,
|
||||
port->vdisk_size, (port->vdisk_size >> (20 - 9)),
|
||||
|
@ -820,7 +820,7 @@ static int swim_floppy_init(struct swim_priv *swd)
|
||||
goto exit_put_disks;
|
||||
|
||||
swd->unit[drive].disk =
|
||||
blk_mq_alloc_disk(&swd->unit[drive].tag_set,
|
||||
blk_mq_alloc_disk(&swd->unit[drive].tag_set, NULL,
|
||||
&swd->unit[drive]);
|
||||
if (IS_ERR(swd->unit[drive].disk)) {
|
||||
blk_mq_free_tag_set(&swd->unit[drive].tag_set);
|
||||
@ -916,7 +916,7 @@ static int swim_probe(struct platform_device *dev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int swim_remove(struct platform_device *dev)
|
||||
static void swim_remove(struct platform_device *dev)
|
||||
{
|
||||
struct swim_priv *swd = platform_get_drvdata(dev);
|
||||
int drive;
|
||||
@ -937,13 +937,11 @@ static int swim_remove(struct platform_device *dev)
|
||||
release_mem_region(res->start, resource_size(res));
|
||||
|
||||
kfree(swd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct platform_driver swim_driver = {
|
||||
.probe = swim_probe,
|
||||
.remove = swim_remove,
|
||||
.remove_new = swim_remove,
|
||||
.driver = {
|
||||
.name = CARDNAME,
|
||||
},
|
||||
|
@ -1210,7 +1210,7 @@ static int swim3_attach(struct macio_dev *mdev,
|
||||
if (rc)
|
||||
goto out_unregister;
|
||||
|
||||
disk = blk_mq_alloc_disk(&fs->tag_set, fs);
|
||||
disk = blk_mq_alloc_disk(&fs->tag_set, NULL, fs);
|
||||
if (IS_ERR(disk)) {
|
||||
rc = PTR_ERR(disk);
|
||||
goto out_free_tag_set;
|
||||
|
@ -246,21 +246,12 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ublk_dev_param_zoned_apply(struct ublk_device *ub)
|
||||
static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
|
||||
{
|
||||
const struct ublk_param_zoned *p = &ub->params.zoned;
|
||||
|
||||
disk_set_zoned(ub->ub_disk);
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue);
|
||||
blk_queue_required_elevator_features(ub->ub_disk->queue,
|
||||
ELEVATOR_F_ZBD_SEQ_WRITE);
|
||||
disk_set_max_active_zones(ub->ub_disk, p->max_active_zones);
|
||||
disk_set_max_open_zones(ub->ub_disk, p->max_open_zones);
|
||||
blk_queue_max_zone_append_sectors(ub->ub_disk->queue, p->max_zone_append_sectors);
|
||||
|
||||
ub->ub_disk->nr_zones = ublk_get_nr_zones(ub);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Based on virtblk_alloc_report_buffer */
|
||||
@ -432,9 +423,8 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int ublk_dev_param_zoned_apply(struct ublk_device *ub)
|
||||
static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int ublk_revalidate_disk_zones(struct ublk_device *ub)
|
||||
@ -498,11 +488,6 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub)
|
||||
struct request_queue *q = ub->ub_disk->queue;
|
||||
const struct ublk_param_basic *p = &ub->params.basic;
|
||||
|
||||
blk_queue_logical_block_size(q, 1 << p->logical_bs_shift);
|
||||
blk_queue_physical_block_size(q, 1 << p->physical_bs_shift);
|
||||
blk_queue_io_min(q, 1 << p->io_min_shift);
|
||||
blk_queue_io_opt(q, 1 << p->io_opt_shift);
|
||||
|
||||
blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
|
||||
p->attrs & UBLK_ATTR_FUA);
|
||||
if (p->attrs & UBLK_ATTR_ROTATIONAL)
|
||||
@ -510,29 +495,12 @@ static void ublk_dev_param_basic_apply(struct ublk_device *ub)
|
||||
else
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
|
||||
blk_queue_max_hw_sectors(q, p->max_sectors);
|
||||
blk_queue_chunk_sectors(q, p->chunk_sectors);
|
||||
blk_queue_virt_boundary(q, p->virt_boundary_mask);
|
||||
|
||||
if (p->attrs & UBLK_ATTR_READ_ONLY)
|
||||
set_disk_ro(ub->ub_disk, true);
|
||||
|
||||
set_capacity(ub->ub_disk, p->dev_sectors);
|
||||
}
|
||||
|
||||
static void ublk_dev_param_discard_apply(struct ublk_device *ub)
|
||||
{
|
||||
struct request_queue *q = ub->ub_disk->queue;
|
||||
const struct ublk_param_discard *p = &ub->params.discard;
|
||||
|
||||
q->limits.discard_alignment = p->discard_alignment;
|
||||
q->limits.discard_granularity = p->discard_granularity;
|
||||
blk_queue_max_discard_sectors(q, p->max_discard_sectors);
|
||||
blk_queue_max_write_zeroes_sectors(q,
|
||||
p->max_write_zeroes_sectors);
|
||||
blk_queue_max_discard_segments(q, p->max_discard_segments);
|
||||
}
|
||||
|
||||
static int ublk_validate_params(const struct ublk_device *ub)
|
||||
{
|
||||
/* basic param is the only one which must be set */
|
||||
@ -576,20 +544,12 @@ static int ublk_validate_params(const struct ublk_device *ub)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ublk_apply_params(struct ublk_device *ub)
|
||||
static void ublk_apply_params(struct ublk_device *ub)
|
||||
{
|
||||
if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC))
|
||||
return -EINVAL;
|
||||
|
||||
ublk_dev_param_basic_apply(ub);
|
||||
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
|
||||
ublk_dev_param_discard_apply(ub);
|
||||
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_ZONED)
|
||||
return ublk_dev_param_zoned_apply(ub);
|
||||
|
||||
return 0;
|
||||
ublk_dev_param_zoned_apply(ub);
|
||||
}
|
||||
|
||||
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
|
||||
@ -645,14 +605,16 @@ static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
|
||||
return ubq->flags & UBLK_F_NEED_GET_DATA;
|
||||
}
|
||||
|
||||
static struct ublk_device *ublk_get_device(struct ublk_device *ub)
|
||||
/* Called in slow path only, keep it noinline for trace purpose */
|
||||
static noinline struct ublk_device *ublk_get_device(struct ublk_device *ub)
|
||||
{
|
||||
if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
|
||||
return ub;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void ublk_put_device(struct ublk_device *ub)
|
||||
/* Called in slow path only, keep it noinline for trace purpose */
|
||||
static noinline void ublk_put_device(struct ublk_device *ub)
|
||||
{
|
||||
put_device(&ub->cdev_dev);
|
||||
}
|
||||
@ -711,7 +673,7 @@ static void ublk_free_disk(struct gendisk *disk)
|
||||
struct ublk_device *ub = disk->private_data;
|
||||
|
||||
clear_bit(UB_STATE_USED, &ub->state);
|
||||
put_device(&ub->cdev_dev);
|
||||
ublk_put_device(ub);
|
||||
}
|
||||
|
||||
static void ublk_store_owner_uid_gid(unsigned int *owner_uid,
|
||||
@ -2182,7 +2144,7 @@ static void ublk_remove(struct ublk_device *ub)
|
||||
cancel_work_sync(&ub->stop_work);
|
||||
cancel_work_sync(&ub->quiesce_work);
|
||||
cdev_device_del(&ub->cdev, &ub->cdev_dev);
|
||||
put_device(&ub->cdev_dev);
|
||||
ublk_put_device(ub);
|
||||
ublks_added--;
|
||||
}
|
||||
|
||||
@ -2205,12 +2167,47 @@ static struct ublk_device *ublk_get_device_from_id(int idx)
|
||||
static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
|
||||
{
|
||||
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
|
||||
const struct ublk_param_basic *p = &ub->params.basic;
|
||||
int ublksrv_pid = (int)header->data[0];
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = 1 << p->logical_bs_shift,
|
||||
.physical_block_size = 1 << p->physical_bs_shift,
|
||||
.io_min = 1 << p->io_min_shift,
|
||||
.io_opt = 1 << p->io_opt_shift,
|
||||
.max_hw_sectors = p->max_sectors,
|
||||
.chunk_sectors = p->chunk_sectors,
|
||||
.virt_boundary_mask = p->virt_boundary_mask,
|
||||
|
||||
};
|
||||
struct gendisk *disk;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (ublksrv_pid <= 0)
|
||||
return -EINVAL;
|
||||
if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC))
|
||||
return -EINVAL;
|
||||
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) {
|
||||
const struct ublk_param_discard *pd = &ub->params.discard;
|
||||
|
||||
lim.discard_alignment = pd->discard_alignment;
|
||||
lim.discard_granularity = pd->discard_granularity;
|
||||
lim.max_hw_discard_sectors = pd->max_discard_sectors;
|
||||
lim.max_write_zeroes_sectors = pd->max_write_zeroes_sectors;
|
||||
lim.max_discard_segments = pd->max_discard_segments;
|
||||
}
|
||||
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_ZONED) {
|
||||
const struct ublk_param_zoned *p = &ub->params.zoned;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
lim.zoned = true;
|
||||
lim.max_active_zones = p->max_active_zones;
|
||||
lim.max_open_zones = p->max_open_zones;
|
||||
lim.max_zone_append_sectors = p->max_zone_append_sectors;
|
||||
}
|
||||
|
||||
if (wait_for_completion_interruptible(&ub->completion) != 0)
|
||||
return -EINTR;
|
||||
@ -2222,7 +2219,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
disk = blk_mq_alloc_disk(&ub->tag_set, NULL);
|
||||
disk = blk_mq_alloc_disk(&ub->tag_set, &lim, NULL);
|
||||
if (IS_ERR(disk)) {
|
||||
ret = PTR_ERR(disk);
|
||||
goto out_unlock;
|
||||
@ -2234,15 +2231,13 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
|
||||
ub->dev_info.ublksrv_pid = ublksrv_pid;
|
||||
ub->ub_disk = disk;
|
||||
|
||||
ret = ublk_apply_params(ub);
|
||||
if (ret)
|
||||
goto out_put_disk;
|
||||
ublk_apply_params(ub);
|
||||
|
||||
/* don't probe partitions if any one ubq daemon is un-trusted */
|
||||
if (ub->nr_privileged_daemon != ub->nr_queues_ready)
|
||||
set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
|
||||
|
||||
get_device(&ub->cdev_dev);
|
||||
ublk_get_device(ub);
|
||||
ub->dev_info.state = UBLK_S_DEV_LIVE;
|
||||
|
||||
if (ublk_dev_is_zoned(ub)) {
|
||||
@ -2262,7 +2257,6 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
|
||||
ub->dev_info.state = UBLK_S_DEV_DEAD;
|
||||
ublk_put_device(ub);
|
||||
}
|
||||
out_put_disk:
|
||||
if (ret)
|
||||
put_disk(disk);
|
||||
out_unlock:
|
||||
@ -2474,7 +2468,7 @@ static inline bool ublk_idr_freed(int id)
|
||||
return ptr == NULL;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
|
||||
static int ublk_ctrl_del_dev(struct ublk_device **p_ub, bool wait)
|
||||
{
|
||||
struct ublk_device *ub = *p_ub;
|
||||
int idx = ub->ub_number;
|
||||
@ -2508,7 +2502,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
|
||||
* - the device number is freed already, we will not find this
|
||||
* device via ublk_get_device_from_id()
|
||||
*/
|
||||
if (wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx)))
|
||||
if (wait && wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx)))
|
||||
return -EINTR;
|
||||
return 0;
|
||||
}
|
||||
@ -2907,7 +2901,10 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
||||
ret = ublk_ctrl_add_dev(cmd);
|
||||
break;
|
||||
case UBLK_CMD_DEL_DEV:
|
||||
ret = ublk_ctrl_del_dev(&ub);
|
||||
ret = ublk_ctrl_del_dev(&ub, true);
|
||||
break;
|
||||
case UBLK_U_CMD_DEL_DEV_ASYNC:
|
||||
ret = ublk_ctrl_del_dev(&ub, false);
|
||||
break;
|
||||
case UBLK_CMD_GET_QUEUE_AFFINITY:
|
||||
ret = ublk_ctrl_get_queue_affinity(ub, cmd);
|
||||
|
@ -720,25 +720,24 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int virtblk_probe_zoned_device(struct virtio_device *vdev,
|
||||
struct virtio_blk *vblk,
|
||||
struct request_queue *q)
|
||||
static int virtblk_read_zoned_limits(struct virtio_blk *vblk,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
struct virtio_device *vdev = vblk->vdev;
|
||||
u32 v, wg;
|
||||
|
||||
dev_dbg(&vdev->dev, "probing host-managed zoned device\n");
|
||||
|
||||
disk_set_zoned(vblk->disk);
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
lim->zoned = true;
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
zoned.max_open_zones, &v);
|
||||
disk_set_max_open_zones(vblk->disk, v);
|
||||
lim->max_open_zones = v;
|
||||
dev_dbg(&vdev->dev, "max open zones = %u\n", v);
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
zoned.max_active_zones, &v);
|
||||
disk_set_max_active_zones(vblk->disk, v);
|
||||
lim->max_active_zones = v;
|
||||
dev_dbg(&vdev->dev, "max active zones = %u\n", v);
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
@ -747,8 +746,8 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
|
||||
dev_warn(&vdev->dev, "zero write granularity reported\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
blk_queue_physical_block_size(q, wg);
|
||||
blk_queue_io_min(q, wg);
|
||||
lim->physical_block_size = wg;
|
||||
lim->io_min = wg;
|
||||
|
||||
dev_dbg(&vdev->dev, "write granularity = %u\n", wg);
|
||||
|
||||
@ -764,13 +763,13 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
|
||||
vblk->zone_sectors);
|
||||
return -ENODEV;
|
||||
}
|
||||
blk_queue_chunk_sectors(q, vblk->zone_sectors);
|
||||
lim->chunk_sectors = vblk->zone_sectors;
|
||||
dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors);
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
|
||||
dev_warn(&vblk->vdev->dev,
|
||||
"ignoring negotiated F_DISCARD for zoned device\n");
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
lim->max_hw_discard_sectors = 0;
|
||||
}
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
@ -785,25 +784,21 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
|
||||
wg, v);
|
||||
return -ENODEV;
|
||||
}
|
||||
blk_queue_max_zone_append_sectors(q, v);
|
||||
lim->max_zone_append_sectors = v;
|
||||
dev_dbg(&vdev->dev, "max append sectors = %u\n", v);
|
||||
|
||||
return blk_revalidate_disk_zones(vblk->disk, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* Zoned block device support is not configured in this kernel.
|
||||
* Host-managed zoned devices can't be supported, but others are
|
||||
* good to go as regular block devices.
|
||||
* Zoned block device support is not configured in this kernel, host-managed
|
||||
* zoned devices can't be supported.
|
||||
*/
|
||||
#define virtblk_report_zones NULL
|
||||
|
||||
static inline int virtblk_probe_zoned_device(struct virtio_device *vdev,
|
||||
struct virtio_blk *vblk, struct request_queue *q)
|
||||
static inline int virtblk_read_zoned_limits(struct virtio_blk *vblk,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
dev_err(&vdev->dev,
|
||||
dev_err(&vblk->vdev->dev,
|
||||
"virtio_blk: zoned devices are not supported");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
@ -1248,31 +1243,17 @@ static const struct blk_mq_ops virtio_mq_ops = {
|
||||
static unsigned int virtblk_queue_depth;
|
||||
module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
|
||||
|
||||
static int virtblk_probe(struct virtio_device *vdev)
|
||||
static int virtblk_read_limits(struct virtio_blk *vblk,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
struct virtio_blk *vblk;
|
||||
struct request_queue *q;
|
||||
int err, index;
|
||||
|
||||
struct virtio_device *vdev = vblk->vdev;
|
||||
u32 v, blk_size, max_size, sg_elems, opt_io_size;
|
||||
u32 max_discard_segs = 0;
|
||||
u32 discard_granularity = 0;
|
||||
u16 min_io_size;
|
||||
u8 physical_block_exp, alignment_offset;
|
||||
unsigned int queue_depth;
|
||||
size_t max_dma_size;
|
||||
|
||||
if (!vdev->config->get) {
|
||||
dev_err(&vdev->dev, "%s failure: config access disabled\n",
|
||||
__func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = ida_alloc_range(&vd_index_ida, 0,
|
||||
minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
index = err;
|
||||
int err;
|
||||
|
||||
/* We need to know how many segments before we allocate. */
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
|
||||
@ -1286,6 +1267,203 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
/* Prevent integer overflows and honor max vq size */
|
||||
sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
|
||||
|
||||
/* We can handle whatever the host told us to handle. */
|
||||
lim->max_segments = sg_elems;
|
||||
|
||||
/* No real sector limit. */
|
||||
lim->max_hw_sectors = UINT_MAX;
|
||||
|
||||
max_dma_size = virtio_max_dma_size(vdev);
|
||||
max_size = max_dma_size > U32_MAX ? U32_MAX : max_dma_size;
|
||||
|
||||
/* Host can optionally specify maximum segment size and number of
|
||||
* segments. */
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
|
||||
struct virtio_blk_config, size_max, &v);
|
||||
if (!err)
|
||||
max_size = min(max_size, v);
|
||||
|
||||
lim->max_segment_size = max_size;
|
||||
|
||||
/* Host can optionally specify the block size of the device */
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
|
||||
struct virtio_blk_config, blk_size,
|
||||
&blk_size);
|
||||
if (!err) {
|
||||
err = blk_validate_block_size(blk_size);
|
||||
if (err) {
|
||||
dev_err(&vdev->dev,
|
||||
"virtio_blk: invalid block size: 0x%x\n",
|
||||
blk_size);
|
||||
return err;
|
||||
}
|
||||
|
||||
lim->logical_block_size = blk_size;
|
||||
} else
|
||||
blk_size = lim->logical_block_size;
|
||||
|
||||
/* Use topology information if available */
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, physical_block_exp,
|
||||
&physical_block_exp);
|
||||
if (!err && physical_block_exp)
|
||||
lim->physical_block_size = blk_size * (1 << physical_block_exp);
|
||||
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, alignment_offset,
|
||||
&alignment_offset);
|
||||
if (!err && alignment_offset)
|
||||
lim->alignment_offset = blk_size * alignment_offset;
|
||||
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, min_io_size,
|
||||
&min_io_size);
|
||||
if (!err && min_io_size)
|
||||
lim->io_min = blk_size * min_io_size;
|
||||
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, opt_io_size,
|
||||
&opt_io_size);
|
||||
if (!err && opt_io_size)
|
||||
lim->io_opt = blk_size * opt_io_size;
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
discard_sector_alignment, &discard_granularity);
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
max_discard_sectors, &v);
|
||||
lim->max_hw_discard_sectors = v ? v : UINT_MAX;
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
|
||||
&max_discard_segs);
|
||||
}
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
max_write_zeroes_sectors, &v);
|
||||
lim->max_write_zeroes_sectors = v ? v : UINT_MAX;
|
||||
}
|
||||
|
||||
/* The discard and secure erase limits are combined since the Linux
|
||||
* block layer uses the same limit for both commands.
|
||||
*
|
||||
* If both VIRTIO_BLK_F_SECURE_ERASE and VIRTIO_BLK_F_DISCARD features
|
||||
* are negotiated, we will use the minimum between the limits.
|
||||
*
|
||||
* discard sector alignment is set to the minimum between discard_sector_alignment
|
||||
* and secure_erase_sector_alignment.
|
||||
*
|
||||
* max discard sectors is set to the minimum between max_discard_seg and
|
||||
* max_secure_erase_seg.
|
||||
*/
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) {
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
secure_erase_sector_alignment, &v);
|
||||
|
||||
/* secure_erase_sector_alignment should not be zero, the device should set a
|
||||
* valid number of sectors.
|
||||
*/
|
||||
if (!v) {
|
||||
dev_err(&vdev->dev,
|
||||
"virtio_blk: secure_erase_sector_alignment can't be 0\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
discard_granularity = min_not_zero(discard_granularity, v);
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
max_secure_erase_sectors, &v);
|
||||
|
||||
/* max_secure_erase_sectors should not be zero, the device should set a
|
||||
* valid number of sectors.
|
||||
*/
|
||||
if (!v) {
|
||||
dev_err(&vdev->dev,
|
||||
"virtio_blk: max_secure_erase_sectors can't be 0\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
lim->max_secure_erase_sectors = v;
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
max_secure_erase_seg, &v);
|
||||
|
||||
/* max_secure_erase_seg should not be zero, the device should set a
|
||||
* valid number of segments
|
||||
*/
|
||||
if (!v) {
|
||||
dev_err(&vdev->dev,
|
||||
"virtio_blk: max_secure_erase_seg can't be 0\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
max_discard_segs = min_not_zero(max_discard_segs, v);
|
||||
}
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD) ||
|
||||
virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) {
|
||||
/* max_discard_seg and discard_granularity will be 0 only
|
||||
* if max_discard_seg and discard_sector_alignment fields in the virtio
|
||||
* config are 0 and VIRTIO_BLK_F_SECURE_ERASE feature is not negotiated.
|
||||
* In this case, we use default values.
|
||||
*/
|
||||
if (!max_discard_segs)
|
||||
max_discard_segs = sg_elems;
|
||||
|
||||
lim->max_discard_segments =
|
||||
min(max_discard_segs, MAX_DISCARD_SEGMENTS);
|
||||
|
||||
if (discard_granularity)
|
||||
lim->discard_granularity =
|
||||
discard_granularity << SECTOR_SHIFT;
|
||||
else
|
||||
lim->discard_granularity = blk_size;
|
||||
}
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) {
|
||||
u8 model;
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config, zoned.model, &model);
|
||||
switch (model) {
|
||||
case VIRTIO_BLK_Z_NONE:
|
||||
case VIRTIO_BLK_Z_HA:
|
||||
/* treat host-aware devices as non-zoned */
|
||||
return 0;
|
||||
case VIRTIO_BLK_Z_HM:
|
||||
err = virtblk_read_zoned_limits(vblk, lim);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
default:
|
||||
dev_err(&vdev->dev, "unsupported zone model %d\n", model);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int virtblk_probe(struct virtio_device *vdev)
|
||||
{
|
||||
struct virtio_blk *vblk;
|
||||
struct queue_limits lim = { };
|
||||
int err, index;
|
||||
unsigned int queue_depth;
|
||||
|
||||
if (!vdev->config->get) {
|
||||
dev_err(&vdev->dev, "%s failure: config access disabled\n",
|
||||
__func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = ida_alloc_range(&vd_index_ida, 0,
|
||||
minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
index = err;
|
||||
|
||||
vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
|
||||
if (!vblk) {
|
||||
err = -ENOMEM;
|
||||
@ -1330,12 +1508,15 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
if (err)
|
||||
goto out_free_vq;
|
||||
|
||||
vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk);
|
||||
err = virtblk_read_limits(vblk, &lim);
|
||||
if (err)
|
||||
goto out_free_tags;
|
||||
|
||||
vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, &lim, vblk);
|
||||
if (IS_ERR(vblk->disk)) {
|
||||
err = PTR_ERR(vblk->disk);
|
||||
goto out_free_tags;
|
||||
}
|
||||
q = vblk->disk->queue;
|
||||
|
||||
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
|
||||
|
||||
@ -1353,164 +1534,6 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
|
||||
set_disk_ro(vblk->disk, 1);
|
||||
|
||||
/* We can handle whatever the host told us to handle. */
|
||||
blk_queue_max_segments(q, sg_elems);
|
||||
|
||||
/* No real sector limit. */
|
||||
blk_queue_max_hw_sectors(q, UINT_MAX);
|
||||
|
||||
max_dma_size = virtio_max_dma_size(vdev);
|
||||
max_size = max_dma_size > U32_MAX ? U32_MAX : max_dma_size;
|
||||
|
||||
/* Host can optionally specify maximum segment size and number of
|
||||
* segments. */
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
|
||||
struct virtio_blk_config, size_max, &v);
|
||||
if (!err)
|
||||
max_size = min(max_size, v);
|
||||
|
||||
blk_queue_max_segment_size(q, max_size);
|
||||
|
||||
/* Host can optionally specify the block size of the device */
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
|
||||
struct virtio_blk_config, blk_size,
|
||||
&blk_size);
|
||||
if (!err) {
|
||||
err = blk_validate_block_size(blk_size);
|
||||
if (err) {
|
||||
dev_err(&vdev->dev,
|
||||
"virtio_blk: invalid block size: 0x%x\n",
|
||||
blk_size);
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
|
||||
blk_queue_logical_block_size(q, blk_size);
|
||||
} else
|
||||
blk_size = queue_logical_block_size(q);
|
||||
|
||||
/* Use topology information if available */
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, physical_block_exp,
|
||||
&physical_block_exp);
|
||||
if (!err && physical_block_exp)
|
||||
blk_queue_physical_block_size(q,
|
||||
blk_size * (1 << physical_block_exp));
|
||||
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, alignment_offset,
|
||||
&alignment_offset);
|
||||
if (!err && alignment_offset)
|
||||
blk_queue_alignment_offset(q, blk_size * alignment_offset);
|
||||
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, min_io_size,
|
||||
&min_io_size);
|
||||
if (!err && min_io_size)
|
||||
blk_queue_io_min(q, blk_size * min_io_size);
|
||||
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, opt_io_size,
|
||||
&opt_io_size);
|
||||
if (!err && opt_io_size)
|
||||
blk_queue_io_opt(q, blk_size * opt_io_size);
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
discard_sector_alignment, &discard_granularity);
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
max_discard_sectors, &v);
|
||||
blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
|
||||
&max_discard_segs);
|
||||
}
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
max_write_zeroes_sectors, &v);
|
||||
blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
|
||||
}
|
||||
|
||||
/* The discard and secure erase limits are combined since the Linux
|
||||
* block layer uses the same limit for both commands.
|
||||
*
|
||||
* If both VIRTIO_BLK_F_SECURE_ERASE and VIRTIO_BLK_F_DISCARD features
|
||||
* are negotiated, we will use the minimum between the limits.
|
||||
*
|
||||
* discard sector alignment is set to the minimum between discard_sector_alignment
|
||||
* and secure_erase_sector_alignment.
|
||||
*
|
||||
* max discard sectors is set to the minimum between max_discard_seg and
|
||||
* max_secure_erase_seg.
|
||||
*/
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) {
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
secure_erase_sector_alignment, &v);
|
||||
|
||||
/* secure_erase_sector_alignment should not be zero, the device should set a
|
||||
* valid number of sectors.
|
||||
*/
|
||||
if (!v) {
|
||||
dev_err(&vdev->dev,
|
||||
"virtio_blk: secure_erase_sector_alignment can't be 0\n");
|
||||
err = -EINVAL;
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
|
||||
discard_granularity = min_not_zero(discard_granularity, v);
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
max_secure_erase_sectors, &v);
|
||||
|
||||
/* max_secure_erase_sectors should not be zero, the device should set a
|
||||
* valid number of sectors.
|
||||
*/
|
||||
if (!v) {
|
||||
dev_err(&vdev->dev,
|
||||
"virtio_blk: max_secure_erase_sectors can't be 0\n");
|
||||
err = -EINVAL;
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
|
||||
blk_queue_max_secure_erase_sectors(q, v);
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
max_secure_erase_seg, &v);
|
||||
|
||||
/* max_secure_erase_seg should not be zero, the device should set a
|
||||
* valid number of segments
|
||||
*/
|
||||
if (!v) {
|
||||
dev_err(&vdev->dev,
|
||||
"virtio_blk: max_secure_erase_seg can't be 0\n");
|
||||
err = -EINVAL;
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
|
||||
max_discard_segs = min_not_zero(max_discard_segs, v);
|
||||
}
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD) ||
|
||||
virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) {
|
||||
/* max_discard_seg and discard_granularity will be 0 only
|
||||
* if max_discard_seg and discard_sector_alignment fields in the virtio
|
||||
* config are 0 and VIRTIO_BLK_F_SECURE_ERASE feature is not negotiated.
|
||||
* In this case, we use default values.
|
||||
*/
|
||||
if (!max_discard_segs)
|
||||
max_discard_segs = sg_elems;
|
||||
|
||||
blk_queue_max_discard_segments(q,
|
||||
min(max_discard_segs, MAX_DISCARD_SEGMENTS));
|
||||
|
||||
if (discard_granularity)
|
||||
q->limits.discard_granularity = discard_granularity << SECTOR_SHIFT;
|
||||
else
|
||||
q->limits.discard_granularity = blk_size;
|
||||
}
|
||||
|
||||
virtblk_update_capacity(vblk, false);
|
||||
virtio_device_ready(vdev);
|
||||
|
||||
@ -1518,27 +1541,11 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
* All steps that follow use the VQs therefore they need to be
|
||||
* placed after the virtio_device_ready() call above.
|
||||
*/
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) {
|
||||
u8 model;
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config, zoned.model,
|
||||
&model);
|
||||
switch (model) {
|
||||
case VIRTIO_BLK_Z_NONE:
|
||||
case VIRTIO_BLK_Z_HA:
|
||||
/* Present the host-aware device as non-zoned */
|
||||
break;
|
||||
case VIRTIO_BLK_Z_HM:
|
||||
err = virtblk_probe_zoned_device(vdev, vblk, q);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
break;
|
||||
default:
|
||||
dev_err(&vdev->dev, "unsupported zone model %d\n",
|
||||
model);
|
||||
err = -EINVAL;
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue);
|
||||
err = blk_revalidate_disk_zones(vblk->disk, NULL);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
}
|
||||
|
||||
err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
|
||||
|
@ -941,39 +941,35 @@ static const struct blk_mq_ops blkfront_mq_ops = {
|
||||
.complete = blkif_complete_rq,
|
||||
};
|
||||
|
||||
static void blkif_set_queue_limits(struct blkfront_info *info)
|
||||
static void blkif_set_queue_limits(const struct blkfront_info *info,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
struct request_queue *rq = info->rq;
|
||||
struct gendisk *gd = info->gd;
|
||||
unsigned int segments = info->max_indirect_segments ? :
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
|
||||
|
||||
if (info->feature_discard) {
|
||||
blk_queue_max_discard_sectors(rq, get_capacity(gd));
|
||||
rq->limits.discard_granularity = info->discard_granularity ?:
|
||||
info->physical_sector_size;
|
||||
rq->limits.discard_alignment = info->discard_alignment;
|
||||
lim->max_hw_discard_sectors = UINT_MAX;
|
||||
if (info->discard_granularity)
|
||||
lim->discard_granularity = info->discard_granularity;
|
||||
lim->discard_alignment = info->discard_alignment;
|
||||
if (info->feature_secdiscard)
|
||||
blk_queue_max_secure_erase_sectors(rq,
|
||||
get_capacity(gd));
|
||||
lim->max_secure_erase_sectors = UINT_MAX;
|
||||
}
|
||||
|
||||
/* Hard sector size and max sectors impersonate the equiv. hardware. */
|
||||
blk_queue_logical_block_size(rq, info->sector_size);
|
||||
blk_queue_physical_block_size(rq, info->physical_sector_size);
|
||||
blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
|
||||
lim->logical_block_size = info->sector_size;
|
||||
lim->physical_block_size = info->physical_sector_size;
|
||||
lim->max_hw_sectors = (segments * XEN_PAGE_SIZE) / 512;
|
||||
|
||||
/* Each segment in a request is up to an aligned page in size. */
|
||||
blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
|
||||
blk_queue_max_segment_size(rq, PAGE_SIZE);
|
||||
lim->seg_boundary_mask = PAGE_SIZE - 1;
|
||||
lim->max_segment_size = PAGE_SIZE;
|
||||
|
||||
/* Ensure a merged request will fit in a single I/O ring slot. */
|
||||
blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
|
||||
lim->max_segments = segments / GRANTS_PER_PSEG;
|
||||
|
||||
/* Make sure buffer addresses are sector-aligned. */
|
||||
blk_queue_dma_alignment(rq, 511);
|
||||
lim->dma_alignment = 511;
|
||||
}
|
||||
|
||||
static const char *flush_info(struct blkfront_info *info)
|
||||
@ -1070,6 +1066,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
struct blkfront_info *info, u16 sector_size,
|
||||
unsigned int physical_sector_size)
|
||||
{
|
||||
struct queue_limits lim = {};
|
||||
struct gendisk *gd;
|
||||
int nr_minors = 1;
|
||||
int err;
|
||||
@ -1136,11 +1133,13 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
if (err)
|
||||
goto out_release_minors;
|
||||
|
||||
gd = blk_mq_alloc_disk(&info->tag_set, info);
|
||||
blkif_set_queue_limits(info, &lim);
|
||||
gd = blk_mq_alloc_disk(&info->tag_set, &lim, info);
|
||||
if (IS_ERR(gd)) {
|
||||
err = PTR_ERR(gd);
|
||||
goto out_free_tag_set;
|
||||
}
|
||||
blk_queue_flag_set(QUEUE_FLAG_VIRT, gd->queue);
|
||||
|
||||
strcpy(gd->disk_name, DEV_NAME);
|
||||
ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
|
||||
@ -1162,7 +1161,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
info->gd = gd;
|
||||
info->sector_size = sector_size;
|
||||
info->physical_sector_size = physical_sector_size;
|
||||
blkif_set_queue_limits(info);
|
||||
|
||||
xlvbd_flush(info);
|
||||
|
||||
@ -2006,18 +2004,19 @@ static int blkfront_probe(struct xenbus_device *dev,
|
||||
|
||||
static int blkif_recover(struct blkfront_info *info)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
unsigned int r_index;
|
||||
struct request *req, *n;
|
||||
int rc;
|
||||
struct bio *bio;
|
||||
unsigned int segs;
|
||||
struct blkfront_ring_info *rinfo;
|
||||
|
||||
lim = queue_limits_start_update(info->rq);
|
||||
blkfront_gather_backend_features(info);
|
||||
/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
|
||||
blkif_set_queue_limits(info);
|
||||
segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
|
||||
blkif_set_queue_limits(info, &lim);
|
||||
rc = queue_limits_commit_update(info->rq, &lim);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
for_each_rinfo(info, rinfo, r_index) {
|
||||
rc = blkfront_setup_indirect(rinfo);
|
||||
@ -2037,7 +2036,9 @@ static int blkif_recover(struct blkfront_info *info)
|
||||
list_for_each_entry_safe(req, n, &info->requests, queuelist) {
|
||||
/* Requeue pending requests (flush or discard) */
|
||||
list_del_init(&req->queuelist);
|
||||
BUG_ON(req->nr_phys_segments > segs);
|
||||
BUG_ON(req->nr_phys_segments >
|
||||
(info->max_indirect_segments ? :
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST));
|
||||
blk_mq_requeue_request(req, false);
|
||||
}
|
||||
blk_mq_start_stopped_hw_queues(info->rq, true);
|
||||
|
@ -318,7 +318,7 @@ static int z2ram_register_disk(int minor)
|
||||
struct gendisk *disk;
|
||||
int err;
|
||||
|
||||
disk = blk_mq_alloc_disk(&tag_set, NULL);
|
||||
disk = blk_mq_alloc_disk(&tag_set, NULL, NULL);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
|
@ -2177,6 +2177,28 @@ ATTRIBUTE_GROUPS(zram_disk);
|
||||
*/
|
||||
static int zram_add(void)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE,
|
||||
/*
|
||||
* To ensure that we always get PAGE_SIZE aligned and
|
||||
* n*PAGE_SIZED sized I/O requests.
|
||||
*/
|
||||
.physical_block_size = PAGE_SIZE,
|
||||
.io_min = PAGE_SIZE,
|
||||
.io_opt = PAGE_SIZE,
|
||||
.max_hw_discard_sectors = UINT_MAX,
|
||||
/*
|
||||
* zram_bio_discard() will clear all logical blocks if logical
|
||||
* block size is identical with physical block size(PAGE_SIZE).
|
||||
* But if it is different, we will skip discarding some parts of
|
||||
* logical blocks in the part of the request range which isn't
|
||||
* aligned to physical block size. So we can't ensure that all
|
||||
* discarded logical blocks are zeroed.
|
||||
*/
|
||||
#if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
|
||||
.max_write_zeroes_sectors = UINT_MAX,
|
||||
#endif
|
||||
};
|
||||
struct zram *zram;
|
||||
int ret, device_id;
|
||||
|
||||
@ -2195,11 +2217,11 @@ static int zram_add(void)
|
||||
#endif
|
||||
|
||||
/* gendisk structure */
|
||||
zram->disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!zram->disk) {
|
||||
zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(zram->disk)) {
|
||||
pr_err("Error allocating disk structure for device %d\n",
|
||||
device_id);
|
||||
ret = -ENOMEM;
|
||||
ret = PTR_ERR(zram->disk);
|
||||
goto out_free_idr;
|
||||
}
|
||||
|
||||
@ -2216,29 +2238,6 @@ static int zram_add(void)
|
||||
/* zram devices sort of resembles non-rotational disks */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue);
|
||||
|
||||
/*
|
||||
* To ensure that we always get PAGE_SIZE aligned
|
||||
* and n*PAGE_SIZED sized I/O requests.
|
||||
*/
|
||||
blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
|
||||
blk_queue_logical_block_size(zram->disk->queue,
|
||||
ZRAM_LOGICAL_BLOCK_SIZE);
|
||||
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
|
||||
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
|
||||
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
|
||||
|
||||
/*
|
||||
* zram_bio_discard() will clear all logical blocks if logical block
|
||||
* size is identical with physical block size(PAGE_SIZE). But if it is
|
||||
* different, we will skip discarding some parts of logical blocks in
|
||||
* the part of the request range which isn't aligned to physical block
|
||||
* size. So we can't ensure that all discarded logical blocks are
|
||||
* zeroed.
|
||||
*/
|
||||
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
|
||||
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
|
||||
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
|
||||
if (ret)
|
||||
|
@ -724,11 +724,6 @@ static void probe_gdrom_setupdisk(void)
|
||||
|
||||
static int probe_gdrom_setupqueue(void)
|
||||
{
|
||||
blk_queue_logical_block_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
|
||||
/* using DMA so memory will need to be contiguous */
|
||||
blk_queue_max_segments(gd.gdrom_rq, 1);
|
||||
/* set a large max size to get most from DMA */
|
||||
blk_queue_max_segment_size(gd.gdrom_rq, 0x40000);
|
||||
gd.disk->queue = gd.gdrom_rq;
|
||||
return gdrom_init_dma_mode();
|
||||
}
|
||||
@ -743,6 +738,13 @@ static const struct blk_mq_ops gdrom_mq_ops = {
|
||||
*/
|
||||
static int probe_gdrom(struct platform_device *devptr)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = GDROM_HARD_SECTOR,
|
||||
/* using DMA so memory will need to be contiguous */
|
||||
.max_segments = 1,
|
||||
/* set a large max size to get most from DMA */
|
||||
.max_segment_size = 0x40000,
|
||||
};
|
||||
int err;
|
||||
|
||||
/*
|
||||
@ -778,7 +780,7 @@ static int probe_gdrom(struct platform_device *devptr)
|
||||
if (err)
|
||||
goto probe_fail_free_cd_info;
|
||||
|
||||
gd.disk = blk_mq_alloc_disk(&gd.tag_set, NULL);
|
||||
gd.disk = blk_mq_alloc_disk(&gd.tag_set, &lim, NULL);
|
||||
if (IS_ERR(gd.disk)) {
|
||||
err = PTR_ERR(gd.disk);
|
||||
goto probe_fail_free_tag_set;
|
||||
@ -829,7 +831,7 @@ static int probe_gdrom(struct platform_device *devptr)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int remove_gdrom(struct platform_device *devptr)
|
||||
static void remove_gdrom(struct platform_device *devptr)
|
||||
{
|
||||
blk_mq_free_tag_set(&gd.tag_set);
|
||||
free_irq(HW_EVENT_GDROM_CMD, &gd);
|
||||
@ -840,13 +842,11 @@ static int remove_gdrom(struct platform_device *devptr)
|
||||
unregister_cdrom(gd.cd_info);
|
||||
kfree(gd.cd_info);
|
||||
kfree(gd.toc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct platform_driver gdrom_driver = {
|
||||
.probe = probe_gdrom,
|
||||
.remove = remove_gdrom,
|
||||
.remove_new = remove_gdrom,
|
||||
.driver = {
|
||||
.name = GDROM_DEV_NAME,
|
||||
},
|
||||
|
@ -900,9 +900,23 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
struct request_queue *q;
|
||||
const size_t max_stripes = min_t(size_t, INT_MAX,
|
||||
SIZE_MAX / sizeof(atomic_t));
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = UINT_MAX,
|
||||
.max_sectors = UINT_MAX,
|
||||
.max_segment_size = UINT_MAX,
|
||||
.max_segments = BIO_MAX_VECS,
|
||||
.max_hw_discard_sectors = UINT_MAX,
|
||||
.io_min = block_size,
|
||||
.logical_block_size = block_size,
|
||||
.physical_block_size = block_size,
|
||||
};
|
||||
uint64_t n;
|
||||
int idx;
|
||||
|
||||
if (cached_bdev) {
|
||||
d->stripe_size = bdev_io_opt(cached_bdev) >> SECTOR_SHIFT;
|
||||
lim.io_opt = umax(block_size, bdev_io_opt(cached_bdev));
|
||||
}
|
||||
if (!d->stripe_size)
|
||||
d->stripe_size = 1 << 31;
|
||||
else if (d->stripe_size < BCH_MIN_STRIPE_SZ)
|
||||
@ -935,8 +949,21 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
|
||||
goto out_ida_remove;
|
||||
|
||||
d->disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!d->disk)
|
||||
if (lim.logical_block_size > PAGE_SIZE && cached_bdev) {
|
||||
/*
|
||||
* This should only happen with BCACHE_SB_VERSION_BDEV.
|
||||
* Block/page size is checked for BCACHE_SB_VERSION_CDEV.
|
||||
*/
|
||||
pr_info("bcache%i: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n",
|
||||
idx, lim.logical_block_size,
|
||||
PAGE_SIZE, bdev_logical_block_size(cached_bdev));
|
||||
|
||||
/* This also adjusts physical block size/min io size if needed */
|
||||
lim.logical_block_size = bdev_logical_block_size(cached_bdev);
|
||||
}
|
||||
|
||||
d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(d->disk))
|
||||
goto out_bioset_exit;
|
||||
|
||||
set_capacity(d->disk, sectors);
|
||||
@ -949,27 +976,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
d->disk->private_data = d;
|
||||
|
||||
q = d->disk->queue;
|
||||
q->limits.max_hw_sectors = UINT_MAX;
|
||||
q->limits.max_sectors = UINT_MAX;
|
||||
q->limits.max_segment_size = UINT_MAX;
|
||||
q->limits.max_segments = BIO_MAX_VECS;
|
||||
blk_queue_max_discard_sectors(q, UINT_MAX);
|
||||
q->limits.io_min = block_size;
|
||||
q->limits.logical_block_size = block_size;
|
||||
q->limits.physical_block_size = block_size;
|
||||
|
||||
if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) {
|
||||
/*
|
||||
* This should only happen with BCACHE_SB_VERSION_BDEV.
|
||||
* Block/page size is checked for BCACHE_SB_VERSION_CDEV.
|
||||
*/
|
||||
pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n",
|
||||
d->disk->disk_name, q->limits.logical_block_size,
|
||||
PAGE_SIZE, bdev_logical_block_size(cached_bdev));
|
||||
|
||||
/* This also adjusts physical block size/min io size if needed */
|
||||
blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev));
|
||||
}
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
|
||||
|
||||
@ -1416,9 +1422,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
|
||||
hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
|
||||
}
|
||||
|
||||
dc->disk.stripe_size = q->limits.io_opt >> 9;
|
||||
|
||||
if (dc->disk.stripe_size)
|
||||
if (bdev_io_opt(dc->bdev))
|
||||
dc->partial_stripes_expensive =
|
||||
q->limits.raid_partial_stripes_expensive;
|
||||
|
||||
@ -1428,9 +1432,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
blk_queue_io_opt(dc->disk.disk->queue,
|
||||
max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q)));
|
||||
|
||||
atomic_set(&dc->io_errors, 0);
|
||||
dc->io_disable = false;
|
||||
dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
|
||||
|
@ -213,6 +213,7 @@ struct raid_dev {
|
||||
#define RT_FLAG_RS_IN_SYNC 6
|
||||
#define RT_FLAG_RS_RESYNCING 7
|
||||
#define RT_FLAG_RS_GROW 8
|
||||
#define RT_FLAG_RS_FROZEN 9
|
||||
|
||||
/* Array elements of 64 bit needed for rebuild/failed disk bits */
|
||||
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
|
||||
@ -3240,11 +3241,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||
rs->md.ro = 1;
|
||||
rs->md.in_sync = 1;
|
||||
|
||||
/* Keep array frozen until resume. */
|
||||
set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
|
||||
|
||||
/* Has to be held on running the array */
|
||||
mddev_suspend_and_lock_nointr(&rs->md);
|
||||
|
||||
/* Keep array frozen until resume. */
|
||||
md_frozen_sync_thread(&rs->md);
|
||||
|
||||
r = md_run(&rs->md);
|
||||
rs->md.in_sync = 0; /* Assume already marked dirty */
|
||||
if (r) {
|
||||
@ -3339,7 +3341,8 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
|
||||
if (unlikely(bio_end_sector(bio) > mddev->array_sectors))
|
||||
return DM_MAPIO_REQUEUE;
|
||||
|
||||
md_handle_request(mddev, bio);
|
||||
if (unlikely(!md_handle_request(mddev, bio)))
|
||||
return DM_MAPIO_REQUEUE;
|
||||
|
||||
return DM_MAPIO_SUBMITTED;
|
||||
}
|
||||
@ -3718,21 +3721,33 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
|
||||
{
|
||||
struct raid_set *rs = ti->private;
|
||||
struct mddev *mddev = &rs->md;
|
||||
int ret = 0;
|
||||
|
||||
if (!mddev->pers || !mddev->pers->sync_request)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcasecmp(argv[0], "frozen"))
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
else
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags) ||
|
||||
test_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags))
|
||||
return -EBUSY;
|
||||
|
||||
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
|
||||
if (!strcasecmp(argv[0], "frozen")) {
|
||||
ret = mddev_lock(mddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
md_frozen_sync_thread(mddev);
|
||||
mddev_unlock(mddev);
|
||||
} else if (!strcasecmp(argv[0], "idle")) {
|
||||
ret = mddev_lock(mddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
md_idle_sync_thread(mddev);
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
|
||||
return -EBUSY;
|
||||
else if (!strcasecmp(argv[0], "resync"))
|
||||
; /* MD_RECOVERY_NEEDED set below */
|
||||
@ -3791,15 +3806,46 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||
blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
|
||||
}
|
||||
|
||||
static void raid_presuspend(struct dm_target *ti)
|
||||
{
|
||||
struct raid_set *rs = ti->private;
|
||||
struct mddev *mddev = &rs->md;
|
||||
|
||||
/*
|
||||
* From now on, disallow raid_message() to change sync_thread until
|
||||
* resume, raid_postsuspend() is too late.
|
||||
*/
|
||||
set_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
|
||||
|
||||
if (!reshape_interrupted(mddev))
|
||||
return;
|
||||
|
||||
/*
|
||||
* For raid456, if reshape is interrupted, IO across reshape position
|
||||
* will never make progress, while caller will wait for IO to be done.
|
||||
* Inform raid456 to handle those IO to prevent deadlock.
|
||||
*/
|
||||
if (mddev->pers && mddev->pers->prepare_suspend)
|
||||
mddev->pers->prepare_suspend(mddev);
|
||||
}
|
||||
|
||||
static void raid_presuspend_undo(struct dm_target *ti)
|
||||
{
|
||||
struct raid_set *rs = ti->private;
|
||||
|
||||
clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
|
||||
}
|
||||
|
||||
static void raid_postsuspend(struct dm_target *ti)
|
||||
{
|
||||
struct raid_set *rs = ti->private;
|
||||
|
||||
if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
|
||||
/* Writes have to be stopped before suspending to avoid deadlocks. */
|
||||
if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
|
||||
md_stop_writes(&rs->md);
|
||||
|
||||
/*
|
||||
* sync_thread must be stopped during suspend, and writes have
|
||||
* to be stopped before suspending to avoid deadlocks.
|
||||
*/
|
||||
md_stop_writes(&rs->md);
|
||||
mddev_suspend(&rs->md, false);
|
||||
}
|
||||
}
|
||||
@ -4012,8 +4058,6 @@ static int raid_preresume(struct dm_target *ti)
|
||||
}
|
||||
|
||||
/* Check for any resize/reshape on @rs and adjust/initiate */
|
||||
/* Be prepared for mddev_resume() in raid_resume() */
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
|
||||
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
mddev->resync_min = mddev->recovery_cp;
|
||||
@ -4047,7 +4091,9 @@ static void raid_resume(struct dm_target *ti)
|
||||
* Take this opportunity to check whether any failed
|
||||
* devices are reachable again.
|
||||
*/
|
||||
mddev_lock_nointr(mddev);
|
||||
attempt_restore_of_faulty_devices(rs);
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
|
||||
if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
|
||||
@ -4055,10 +4101,13 @@ static void raid_resume(struct dm_target *ti)
|
||||
if (mddev->delta_disks < 0)
|
||||
rs_set_capacity(rs);
|
||||
|
||||
WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery));
|
||||
WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
|
||||
clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
|
||||
mddev_lock_nointr(mddev);
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
mddev->ro = 0;
|
||||
mddev->in_sync = 0;
|
||||
md_unfrozen_sync_thread(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
}
|
||||
}
|
||||
@ -4074,6 +4123,8 @@ static struct target_type raid_target = {
|
||||
.message = raid_message,
|
||||
.iterate_devices = raid_iterate_devices,
|
||||
.io_hints = raid_io_hints,
|
||||
.presuspend = raid_presuspend,
|
||||
.presuspend_undo = raid_presuspend_undo,
|
||||
.postsuspend = raid_postsuspend,
|
||||
.preresume = raid_preresume,
|
||||
.resume = raid_resume,
|
||||
|
@ -1963,26 +1963,27 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
bool wc = false, fua = false;
|
||||
int r;
|
||||
|
||||
/*
|
||||
* Copy table's limits to the DM device's request_queue
|
||||
*/
|
||||
q->limits = *limits;
|
||||
|
||||
if (dm_table_supports_nowait(t))
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
|
||||
|
||||
if (!dm_table_supports_discards(t)) {
|
||||
q->limits.max_discard_sectors = 0;
|
||||
q->limits.max_hw_discard_sectors = 0;
|
||||
q->limits.discard_granularity = 0;
|
||||
q->limits.discard_alignment = 0;
|
||||
q->limits.discard_misaligned = 0;
|
||||
limits->max_hw_discard_sectors = 0;
|
||||
limits->discard_granularity = 0;
|
||||
limits->discard_alignment = 0;
|
||||
limits->discard_misaligned = 0;
|
||||
}
|
||||
|
||||
if (!dm_table_supports_write_zeroes(t))
|
||||
limits->max_write_zeroes_sectors = 0;
|
||||
|
||||
if (!dm_table_supports_secure_erase(t))
|
||||
q->limits.max_secure_erase_sectors = 0;
|
||||
limits->max_secure_erase_sectors = 0;
|
||||
|
||||
r = queue_limits_set(q, limits);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
|
||||
wc = true;
|
||||
@ -2007,9 +2008,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
else
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
|
||||
if (!dm_table_supports_write_zeroes(t))
|
||||
q->limits.max_write_zeroes_sectors = 0;
|
||||
|
||||
dm_table_verify_integrity(t);
|
||||
|
||||
/*
|
||||
@ -2047,7 +2045,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
}
|
||||
|
||||
dm_update_crypto_profile(q, t);
|
||||
disk_update_readahead(t->md->disk);
|
||||
|
||||
/*
|
||||
* Check for request-based device is left to
|
||||
|
@ -1655,10 +1655,13 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
|
||||
|
||||
if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
|
||||
struct dmz_dev *dev = zone->dev;
|
||||
unsigned int noio_flag;
|
||||
|
||||
noio_flag = memalloc_noio_save();
|
||||
ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET,
|
||||
dmz_start_sect(zmd, zone),
|
||||
zmd->zone_nr_sectors, GFP_NOIO);
|
||||
zmd->zone_nr_sectors);
|
||||
memalloc_noio_restore(noio_flag);
|
||||
if (ret) {
|
||||
dmz_dev_err(dev, "Reset zone %u failed %d",
|
||||
zone->id, ret);
|
||||
|
@ -2101,8 +2101,8 @@ static struct mapped_device *alloc_dev(int minor)
|
||||
* established. If request-based table is loaded: blk-mq will
|
||||
* override accordingly.
|
||||
*/
|
||||
md->disk = blk_alloc_disk(md->numa_node_id);
|
||||
if (!md->disk)
|
||||
md->disk = blk_alloc_disk(NULL, md->numa_node_id);
|
||||
if (IS_ERR(md->disk))
|
||||
goto bad;
|
||||
md->queue = md->disk->queue;
|
||||
|
||||
|
@ -234,7 +234,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
||||
sector_t doff;
|
||||
|
||||
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
|
||||
if (pg_index == store->file_pages - 1) {
|
||||
/* we compare length (page numbers), not page offset. */
|
||||
if ((pg_index - store->sb_index) == store->file_pages - 1) {
|
||||
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
|
||||
|
||||
if (last_page_size == 0)
|
||||
@ -438,8 +439,8 @@ static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index,
|
||||
struct page *page = store->filemap[pg_index];
|
||||
|
||||
if (mddev_is_clustered(bitmap->mddev)) {
|
||||
pg_index += bitmap->cluster_slot *
|
||||
DIV_ROUND_UP(store->bytes, PAGE_SIZE);
|
||||
/* go to node bitmap area starting point */
|
||||
pg_index += store->sb_index;
|
||||
}
|
||||
|
||||
if (store->file)
|
||||
@ -952,6 +953,7 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
||||
unsigned long index = file_page_index(store, chunk);
|
||||
unsigned long node_offset = 0;
|
||||
|
||||
index += store->sb_index;
|
||||
if (mddev_is_clustered(bitmap->mddev))
|
||||
node_offset = bitmap->cluster_slot * store->file_pages;
|
||||
|
||||
@ -982,6 +984,7 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
|
||||
unsigned long index = file_page_index(store, chunk);
|
||||
unsigned long node_offset = 0;
|
||||
|
||||
index += store->sb_index;
|
||||
if (mddev_is_clustered(bitmap->mddev))
|
||||
node_offset = bitmap->cluster_slot * store->file_pages;
|
||||
|
||||
@ -1043,9 +1046,8 @@ void md_bitmap_unplug(struct bitmap *bitmap)
|
||||
if (dirty || need_write) {
|
||||
if (!writing) {
|
||||
md_bitmap_wait_writes(bitmap);
|
||||
if (bitmap->mddev->queue)
|
||||
blk_add_trace_msg(bitmap->mddev->queue,
|
||||
"md bitmap_unplug");
|
||||
mddev_add_trace_msg(bitmap->mddev,
|
||||
"md bitmap_unplug");
|
||||
}
|
||||
clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
|
||||
filemap_write_page(bitmap, i, false);
|
||||
@ -1316,9 +1318,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
|
||||
}
|
||||
bitmap->allclean = 1;
|
||||
|
||||
if (bitmap->mddev->queue)
|
||||
blk_add_trace_msg(bitmap->mddev->queue,
|
||||
"md bitmap_daemon_work");
|
||||
mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work");
|
||||
|
||||
/* Any file-page which is PENDING now needs to be written.
|
||||
* So set NEEDWRITE now, then after we make any last-minute changes
|
||||
|
@ -1,17 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINEAR_H
|
||||
#define _LINEAR_H
|
||||
|
||||
struct dev_info {
|
||||
struct md_rdev *rdev;
|
||||
sector_t end_sector;
|
||||
};
|
||||
|
||||
struct linear_conf
|
||||
{
|
||||
struct rcu_head rcu;
|
||||
sector_t array_sectors;
|
||||
int raid_disks; /* a copy of mddev->raid_disks */
|
||||
struct dev_info disks[] __counted_by(raid_disks);
|
||||
};
|
||||
#endif
|
@ -1,32 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _MULTIPATH_H
|
||||
#define _MULTIPATH_H
|
||||
|
||||
struct multipath_info {
|
||||
struct md_rdev *rdev;
|
||||
};
|
||||
|
||||
struct mpconf {
|
||||
struct mddev *mddev;
|
||||
struct multipath_info *multipaths;
|
||||
int raid_disks;
|
||||
spinlock_t device_lock;
|
||||
struct list_head retry_list;
|
||||
|
||||
mempool_t pool;
|
||||
};
|
||||
|
||||
/*
|
||||
* this is our 'private' 'collective' MULTIPATH buffer head.
|
||||
* it contains information about what kind of IO operations were started
|
||||
* for this MULTIPATH operation, and about their status:
|
||||
*/
|
||||
|
||||
struct multipath_bh {
|
||||
struct mddev *mddev;
|
||||
struct bio *master_bio;
|
||||
struct bio bio;
|
||||
int path;
|
||||
struct list_head retry_list;
|
||||
};
|
||||
#endif
|
400
drivers/md/md.c
400
drivers/md/md.c
@ -65,7 +65,6 @@
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/part_stat.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
#include "md.h"
|
||||
#include "md-bitmap.h"
|
||||
#include "md-cluster.h"
|
||||
@ -99,18 +98,6 @@ static void mddev_detach(struct mddev *mddev);
|
||||
static void export_rdev(struct md_rdev *rdev, struct mddev *mddev);
|
||||
static void md_wakeup_thread_directly(struct md_thread __rcu *thread);
|
||||
|
||||
enum md_ro_state {
|
||||
MD_RDWR,
|
||||
MD_RDONLY,
|
||||
MD_AUTO_READ,
|
||||
MD_MAX_STATE
|
||||
};
|
||||
|
||||
static bool md_is_rdwr(struct mddev *mddev)
|
||||
{
|
||||
return (mddev->ro == MD_RDWR);
|
||||
}
|
||||
|
||||
/*
|
||||
* Default number of read corrections we'll attempt on an rdev
|
||||
* before ejecting it from the array. We divide the read error
|
||||
@ -378,7 +365,7 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio)
|
||||
return true;
|
||||
}
|
||||
|
||||
void md_handle_request(struct mddev *mddev, struct bio *bio)
|
||||
bool md_handle_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
check_suspended:
|
||||
if (is_suspended(mddev, bio)) {
|
||||
@ -386,7 +373,7 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
|
||||
/* Bail out if REQ_NOWAIT is set for the bio */
|
||||
if (bio->bi_opf & REQ_NOWAIT) {
|
||||
bio_wouldblock_error(bio);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
for (;;) {
|
||||
prepare_to_wait(&mddev->sb_wait, &__wait,
|
||||
@ -402,10 +389,13 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
|
||||
|
||||
if (!mddev->pers->make_request(mddev, bio)) {
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
if (!mddev->gendisk && mddev->pers->prepare_suspend)
|
||||
return false;
|
||||
goto check_suspended;
|
||||
}
|
||||
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(md_handle_request);
|
||||
|
||||
@ -529,6 +519,24 @@ void mddev_resume(struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_resume);
|
||||
|
||||
/* sync bdev before setting device to readonly or stopping raid*/
|
||||
static int mddev_set_closing_and_sync_blockdev(struct mddev *mddev, int opener_num)
|
||||
{
|
||||
mutex_lock(&mddev->open_mutex);
|
||||
if (mddev->pers && atomic_read(&mddev->openers) > opener_num) {
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
|
||||
sync_blockdev(mddev->gendisk->part0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generic flush handling for md
|
||||
*/
|
||||
@ -2406,7 +2414,7 @@ int md_integrity_register(struct mddev *mddev)
|
||||
|
||||
if (list_empty(&mddev->disks))
|
||||
return 0; /* nothing to do */
|
||||
if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
|
||||
if (mddev_is_dm(mddev) || blk_get_integrity(mddev->gendisk))
|
||||
return 0; /* shouldn't register, or already is */
|
||||
rdev_for_each(rdev, mddev) {
|
||||
/* skip spares and non-functional disks */
|
||||
@ -2459,7 +2467,7 @@ int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
||||
{
|
||||
struct blk_integrity *bi_mddev;
|
||||
|
||||
if (!mddev->gendisk)
|
||||
if (mddev_is_dm(mddev))
|
||||
return 0;
|
||||
|
||||
bi_mddev = blk_get_integrity(mddev->gendisk);
|
||||
@ -2566,6 +2574,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
|
||||
fail:
|
||||
pr_warn("md: failed to register dev-%s for %s\n",
|
||||
b, mdname(mddev));
|
||||
mddev_destroy_serial_pool(mddev, rdev);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -2595,7 +2604,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
|
||||
list_del_rcu(&rdev->same_set);
|
||||
pr_debug("md: unbind<%pg>\n", rdev->bdev);
|
||||
mddev_destroy_serial_pool(rdev->mddev, rdev);
|
||||
rdev->mddev = NULL;
|
||||
WRITE_ONCE(rdev->mddev, NULL);
|
||||
sysfs_remove_link(&rdev->kobj, "block");
|
||||
sysfs_put(rdev->sysfs_state);
|
||||
sysfs_put(rdev->sysfs_unack_badblocks);
|
||||
@ -2851,8 +2860,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
|
||||
pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
|
||||
mdname(mddev), mddev->in_sync);
|
||||
|
||||
if (mddev->queue)
|
||||
blk_add_trace_msg(mddev->queue, "md md_update_sb");
|
||||
mddev_add_trace_msg(mddev, "md md_update_sb");
|
||||
rewrite:
|
||||
md_bitmap_update_sb(mddev->bitmap);
|
||||
rdev_for_each(rdev, mddev) {
|
||||
@ -2933,7 +2941,6 @@ static int add_bound_rdev(struct md_rdev *rdev)
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_new_event();
|
||||
md_wakeup_thread(mddev->thread);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3048,10 +3055,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
|
||||
if (err == 0) {
|
||||
md_kick_rdev_from_array(rdev);
|
||||
if (mddev->pers) {
|
||||
if (mddev->pers)
|
||||
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
md_new_event();
|
||||
}
|
||||
}
|
||||
@ -3081,7 +3086,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
clear_bit(BlockedBadBlocks, &rdev->flags);
|
||||
wake_up(&rdev->blocked_wait);
|
||||
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
|
||||
md_wakeup_thread(rdev->mddev->thread);
|
||||
|
||||
err = 0;
|
||||
} else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
|
||||
@ -3119,7 +3123,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
!test_bit(Replacement, &rdev->flags))
|
||||
set_bit(WantReplacement, &rdev->flags);
|
||||
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
|
||||
md_wakeup_thread(rdev->mddev->thread);
|
||||
err = 0;
|
||||
} else if (cmd_match(buf, "-want_replacement")) {
|
||||
/* Clearing 'want_replacement' is always allowed.
|
||||
@ -3249,7 +3252,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
if (rdev->raid_disk >= 0)
|
||||
return -EBUSY;
|
||||
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
|
||||
md_wakeup_thread(rdev->mddev->thread);
|
||||
} else if (rdev->mddev->pers) {
|
||||
/* Activating a spare .. or possibly reactivating
|
||||
* if we ever get bitmaps working here.
|
||||
@ -3343,8 +3345,7 @@ static ssize_t new_offset_store(struct md_rdev *rdev,
|
||||
if (kstrtoull(buf, 10, &new_offset) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (mddev->sync_thread ||
|
||||
test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return -EBUSY;
|
||||
if (new_offset == rdev->data_offset)
|
||||
/* reset is always permitted */
|
||||
@ -3675,7 +3676,7 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
struct kernfs_node *kn = NULL;
|
||||
bool suspend = false;
|
||||
ssize_t rv;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
struct mddev *mddev = READ_ONCE(rdev->mddev);
|
||||
|
||||
if (!entry->store)
|
||||
return -EIO;
|
||||
@ -4017,8 +4018,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
*/
|
||||
|
||||
rv = -EBUSY;
|
||||
if (mddev->sync_thread ||
|
||||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
mddev->reshape_position != MaxSector ||
|
||||
mddev->sysfs_active)
|
||||
goto out_unlock;
|
||||
@ -4168,7 +4168,6 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
mddev->in_sync = 1;
|
||||
del_timer_sync(&mddev->safemode_timer);
|
||||
}
|
||||
blk_set_stacking_limits(&mddev->queue->limits);
|
||||
pers->run(mddev);
|
||||
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
|
||||
if (!mddev->thread)
|
||||
@ -4475,8 +4474,8 @@ array_state_show(struct mddev *mddev, char *page)
|
||||
return sprintf(page, "%s\n", array_states[st]);
|
||||
}
|
||||
|
||||
static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
|
||||
static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
|
||||
static int do_md_stop(struct mddev *mddev, int ro);
|
||||
static int md_set_readonly(struct mddev *mddev);
|
||||
static int restart_array(struct mddev *mddev);
|
||||
|
||||
static ssize_t
|
||||
@ -4493,6 +4492,17 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
case broken: /* cannot be set */
|
||||
case bad_word:
|
||||
return -EINVAL;
|
||||
case clear:
|
||||
case readonly:
|
||||
case inactive:
|
||||
case read_auto:
|
||||
if (!mddev->pers || !md_is_rdwr(mddev))
|
||||
break;
|
||||
/* write sysfs will not open mddev and opener should be 0 */
|
||||
err = mddev_set_closing_and_sync_blockdev(mddev, 0);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -4526,14 +4536,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
case inactive:
|
||||
/* stop an active array, return 0 otherwise */
|
||||
if (mddev->pers)
|
||||
err = do_md_stop(mddev, 2, NULL);
|
||||
err = do_md_stop(mddev, 2);
|
||||
break;
|
||||
case clear:
|
||||
err = do_md_stop(mddev, 0, NULL);
|
||||
err = do_md_stop(mddev, 0);
|
||||
break;
|
||||
case readonly:
|
||||
if (mddev->pers)
|
||||
err = md_set_readonly(mddev, NULL);
|
||||
err = md_set_readonly(mddev);
|
||||
else {
|
||||
mddev->ro = MD_RDONLY;
|
||||
set_disk_ro(mddev->gendisk, 1);
|
||||
@ -4543,7 +4553,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
case read_auto:
|
||||
if (mddev->pers) {
|
||||
if (md_is_rdwr(mddev))
|
||||
err = md_set_readonly(mddev, NULL);
|
||||
err = md_set_readonly(mddev);
|
||||
else if (mddev->ro == MD_RDONLY)
|
||||
err = restart_array(mddev);
|
||||
if (err == 0) {
|
||||
@ -4592,6 +4602,11 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
}
|
||||
mddev_unlock(mddev);
|
||||
|
||||
if (st == readonly || st == read_auto || st == inactive ||
|
||||
(err && st == clear))
|
||||
clear_bit(MD_CLOSING, &mddev->flags);
|
||||
|
||||
return err ?: len;
|
||||
}
|
||||
static struct md_sysfs_entry md_array_state =
|
||||
@ -4919,6 +4934,35 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
|
||||
mddev_lock_nointr(mddev);
|
||||
}
|
||||
|
||||
void md_idle_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
stop_sync_thread(mddev, true, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_idle_sync_thread);
|
||||
|
||||
void md_frozen_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
stop_sync_thread(mddev, true, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
|
||||
|
||||
void md_unfrozen_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
|
||||
|
||||
static void idle_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
mutex_lock(&mddev->sync_mutex);
|
||||
@ -5710,6 +5754,51 @@ static const struct kobj_type md_ktype = {
|
||||
|
||||
int mdp_major = 0;
|
||||
|
||||
/* stack the limit for all rdevs into lim */
|
||||
void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
queue_limits_stack_bdev(lim, rdev->bdev, rdev->data_offset,
|
||||
mddev->gendisk->disk_name);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits);
|
||||
|
||||
/* apply the extra stacking limits from a new rdev into mddev */
|
||||
int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
|
||||
if (mddev_is_dm(mddev))
|
||||
return 0;
|
||||
|
||||
lim = queue_limits_start_update(mddev->gendisk->queue);
|
||||
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset,
|
||||
mddev->gendisk->disk_name);
|
||||
return queue_limits_commit_update(mddev->gendisk->queue, &lim);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_stack_new_rdev);
|
||||
|
||||
/* update the optimal I/O size after a reshape */
|
||||
void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
|
||||
if (mddev_is_dm(mddev))
|
||||
return;
|
||||
|
||||
/* don't bother updating io_opt if we can't suspend the array */
|
||||
if (mddev_suspend(mddev, false) < 0)
|
||||
return;
|
||||
lim = queue_limits_start_update(mddev->gendisk->queue);
|
||||
lim.io_opt = lim.io_min * nr_stripes;
|
||||
queue_limits_commit_update(mddev->gendisk->queue, &lim);
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_update_io_opt);
|
||||
|
||||
static void mddev_delayed_delete(struct work_struct *ws)
|
||||
{
|
||||
struct mddev *mddev = container_of(ws, struct mddev, del_work);
|
||||
@ -5774,10 +5863,11 @@ struct mddev *md_alloc(dev_t dev, char *name)
|
||||
*/
|
||||
mddev->hold_active = UNTIL_STOP;
|
||||
|
||||
error = -ENOMEM;
|
||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
disk = blk_alloc_disk(NULL, NUMA_NO_NODE);
|
||||
if (IS_ERR(disk)) {
|
||||
error = PTR_ERR(disk);
|
||||
goto out_free_mddev;
|
||||
}
|
||||
|
||||
disk->major = MAJOR(mddev->unit);
|
||||
disk->first_minor = unit << shift;
|
||||
@ -5791,9 +5881,7 @@ struct mddev *md_alloc(dev_t dev, char *name)
|
||||
disk->fops = &md_fops;
|
||||
disk->private_data = mddev;
|
||||
|
||||
mddev->queue = disk->queue;
|
||||
blk_set_stacking_limits(&mddev->queue->limits);
|
||||
blk_queue_write_cache(mddev->queue, true, true);
|
||||
blk_queue_write_cache(disk->queue, true, true);
|
||||
disk->events |= DISK_EVENT_MEDIA_CHANGE;
|
||||
mddev->gendisk = disk;
|
||||
error = add_disk(disk);
|
||||
@ -5935,7 +6023,7 @@ int md_run(struct mddev *mddev)
|
||||
invalidate_bdev(rdev->bdev);
|
||||
if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) {
|
||||
mddev->ro = MD_RDONLY;
|
||||
if (mddev->gendisk)
|
||||
if (!mddev_is_dm(mddev))
|
||||
set_disk_ro(mddev->gendisk, 1);
|
||||
}
|
||||
|
||||
@ -6038,7 +6126,10 @@ int md_run(struct mddev *mddev)
|
||||
pr_warn("True protection against single-disk failure might be compromised.\n");
|
||||
}
|
||||
|
||||
mddev->recovery = 0;
|
||||
/* dm-raid expect sync_thread to be frozen until resume */
|
||||
if (mddev->gendisk)
|
||||
mddev->recovery = 0;
|
||||
|
||||
/* may be over-ridden by personality */
|
||||
mddev->resync_max_sectors = mddev->dev_sectors;
|
||||
|
||||
@ -6094,7 +6185,8 @@ int md_run(struct mddev *mddev)
|
||||
}
|
||||
}
|
||||
|
||||
if (mddev->queue) {
|
||||
if (!mddev_is_dm(mddev)) {
|
||||
struct request_queue *q = mddev->gendisk->queue;
|
||||
bool nonrot = true;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
@ -6106,14 +6198,14 @@ int md_run(struct mddev *mddev)
|
||||
if (mddev->degraded)
|
||||
nonrot = false;
|
||||
if (nonrot)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q);
|
||||
|
||||
/* Set the NOWAIT flags if all underlying devices support it */
|
||||
if (nowait)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
|
||||
}
|
||||
if (pers->sync_request) {
|
||||
if (mddev->kobj.sd &&
|
||||
@ -6192,7 +6284,6 @@ int do_md_run(struct mddev *mddev)
|
||||
/* run start up tasks that require md_thread */
|
||||
md_start(mddev);
|
||||
|
||||
md_wakeup_thread(mddev->thread);
|
||||
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
|
||||
|
||||
set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
|
||||
@ -6213,7 +6304,6 @@ int md_start(struct mddev *mddev)
|
||||
|
||||
if (mddev->pers->start) {
|
||||
set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
ret = mddev->pers->start(mddev);
|
||||
clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
@ -6258,7 +6348,6 @@ static int restart_array(struct mddev *mddev)
|
||||
pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
|
||||
/* Kick recovery or resync if necessary */
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
return 0;
|
||||
@ -6278,7 +6367,15 @@ static void md_clean(struct mddev *mddev)
|
||||
mddev->persistent = 0;
|
||||
mddev->level = LEVEL_NONE;
|
||||
mddev->clevel[0] = 0;
|
||||
mddev->flags = 0;
|
||||
/*
|
||||
* Don't clear MD_CLOSING, or mddev can be opened again.
|
||||
* 'hold_active != 0' means mddev is still in the creation
|
||||
* process and will be used later.
|
||||
*/
|
||||
if (mddev->hold_active)
|
||||
mddev->flags = 0;
|
||||
else
|
||||
mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
|
||||
mddev->sb_flags = 0;
|
||||
mddev->ro = MD_RDWR;
|
||||
mddev->metadata_type[0] = 0;
|
||||
@ -6315,7 +6412,6 @@ static void md_clean(struct mddev *mddev)
|
||||
|
||||
static void __md_stop_writes(struct mddev *mddev)
|
||||
{
|
||||
stop_sync_thread(mddev, true, false);
|
||||
del_timer_sync(&mddev->safemode_timer);
|
||||
|
||||
if (mddev->pers && mddev->pers->quiesce) {
|
||||
@ -6340,6 +6436,8 @@ static void __md_stop_writes(struct mddev *mddev)
|
||||
void md_stop_writes(struct mddev *mddev)
|
||||
{
|
||||
mddev_lock_nointr(mddev);
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
stop_sync_thread(mddev, true, false);
|
||||
__md_stop_writes(mddev);
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
@ -6353,8 +6451,10 @@ static void mddev_detach(struct mddev *mddev)
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
}
|
||||
md_unregister_thread(mddev, &mddev->thread);
|
||||
if (mddev->queue)
|
||||
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
|
||||
|
||||
/* the unplug fn references 'conf' */
|
||||
if (!mddev_is_dm(mddev))
|
||||
blk_sync_queue(mddev->gendisk->queue);
|
||||
}
|
||||
|
||||
static void __md_stop(struct mddev *mddev)
|
||||
@ -6391,7 +6491,8 @@ void md_stop(struct mddev *mddev)
|
||||
|
||||
EXPORT_SYMBOL_GPL(md_stop);
|
||||
|
||||
static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
|
||||
/* ensure 'mddev->pers' exist before calling md_set_readonly() */
|
||||
static int md_set_readonly(struct mddev *mddev)
|
||||
{
|
||||
int err = 0;
|
||||
int did_freeze = 0;
|
||||
@ -6402,7 +6503,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
|
||||
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
|
||||
did_freeze = 1;
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
|
||||
stop_sync_thread(mddev, false, false);
|
||||
@ -6410,36 +6510,29 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
|
||||
mddev_lock_nointr(mddev);
|
||||
|
||||
mutex_lock(&mddev->open_mutex);
|
||||
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
|
||||
mddev->sync_thread ||
|
||||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
pr_warn("md: %s still in use.\n",mdname(mddev));
|
||||
err = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (mddev->pers) {
|
||||
__md_stop_writes(mddev);
|
||||
__md_stop_writes(mddev);
|
||||
|
||||
if (mddev->ro == MD_RDONLY) {
|
||||
err = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mddev->ro = MD_RDONLY;
|
||||
set_disk_ro(mddev->gendisk, 1);
|
||||
if (mddev->ro == MD_RDONLY) {
|
||||
err = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mddev->ro = MD_RDONLY;
|
||||
set_disk_ro(mddev->gendisk, 1);
|
||||
|
||||
out:
|
||||
if ((mddev->pers && !err) || did_freeze) {
|
||||
if (!err || did_freeze) {
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
}
|
||||
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -6447,8 +6540,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
|
||||
* 0 - completely stop and dis-assemble array
|
||||
* 2 - stop but do not disassemble array
|
||||
*/
|
||||
static int do_md_stop(struct mddev *mddev, int mode,
|
||||
struct block_device *bdev)
|
||||
static int do_md_stop(struct mddev *mddev, int mode)
|
||||
{
|
||||
struct gendisk *disk = mddev->gendisk;
|
||||
struct md_rdev *rdev;
|
||||
@ -6457,22 +6549,16 @@ static int do_md_stop(struct mddev *mddev, int mode,
|
||||
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
|
||||
did_freeze = 1;
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
|
||||
stop_sync_thread(mddev, true, false);
|
||||
|
||||
mutex_lock(&mddev->open_mutex);
|
||||
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
|
||||
mddev->sysfs_active ||
|
||||
mddev->sync_thread ||
|
||||
if (mddev->sysfs_active ||
|
||||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
pr_warn("md: %s still in use.\n",mdname(mddev));
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
if (did_freeze) {
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
return -EBUSY;
|
||||
}
|
||||
@ -6491,13 +6577,11 @@ static int do_md_stop(struct mddev *mddev, int mode,
|
||||
sysfs_unlink_rdev(mddev, rdev);
|
||||
|
||||
set_capacity_and_notify(disk, 0);
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
mddev->changed = 1;
|
||||
|
||||
if (!md_is_rdwr(mddev))
|
||||
mddev->ro = MD_RDWR;
|
||||
} else
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
}
|
||||
/*
|
||||
* Free resources if final stop
|
||||
*/
|
||||
@ -6543,7 +6627,7 @@ static void autorun_array(struct mddev *mddev)
|
||||
err = do_md_run(mddev);
|
||||
if (err) {
|
||||
pr_warn("md: do_md_run() returned %d\n", err);
|
||||
do_md_stop(mddev, 0, NULL);
|
||||
do_md_stop(mddev, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -7013,9 +7097,7 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
|
||||
|
||||
md_kick_rdev_from_array(rdev);
|
||||
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
|
||||
if (mddev->thread)
|
||||
md_wakeup_thread(mddev->thread);
|
||||
else
|
||||
if (!mddev->thread)
|
||||
md_update_sb(mddev, 1);
|
||||
md_new_event();
|
||||
|
||||
@ -7090,14 +7172,13 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
|
||||
if (!bdev_nowait(rdev->bdev)) {
|
||||
pr_info("%s: Disabling nowait because %pg does not support nowait\n",
|
||||
mdname(mddev), rdev->bdev);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->gendisk->queue);
|
||||
}
|
||||
/*
|
||||
* Kick recovery, maybe this spare has to be added to the
|
||||
* array immediately.
|
||||
*/
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
md_new_event();
|
||||
return 0;
|
||||
|
||||
@ -7311,8 +7392,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
|
||||
* of each device. If num_sectors is zero, we find the largest size
|
||||
* that fits.
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
mddev->sync_thread)
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return -EBUSY;
|
||||
if (!md_is_rdwr(mddev))
|
||||
return -EROFS;
|
||||
@ -7329,10 +7409,9 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
|
||||
if (!rv) {
|
||||
if (mddev_is_clustered(mddev))
|
||||
md_cluster_ops->update_size(mddev, old_dev_sectors);
|
||||
else if (mddev->queue) {
|
||||
else if (!mddev_is_dm(mddev))
|
||||
set_capacity_and_notify(mddev->gendisk,
|
||||
mddev->array_sectors);
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
@ -7349,8 +7428,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
|
||||
if (raid_disks <= 0 ||
|
||||
(mddev->max_disks && raid_disks >= mddev->max_disks))
|
||||
return -EINVAL;
|
||||
if (mddev->sync_thread ||
|
||||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
|
||||
mddev->reshape_position != MaxSector)
|
||||
return -EBUSY;
|
||||
@ -7546,16 +7624,17 @@ static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool md_ioctl_valid(unsigned int cmd)
|
||||
static inline int md_ioctl_valid(unsigned int cmd)
|
||||
{
|
||||
switch (cmd) {
|
||||
case ADD_NEW_DISK:
|
||||
case GET_ARRAY_INFO:
|
||||
case GET_BITMAP_FILE:
|
||||
case GET_DISK_INFO:
|
||||
case RAID_VERSION:
|
||||
return 0;
|
||||
case ADD_NEW_DISK:
|
||||
case GET_BITMAP_FILE:
|
||||
case HOT_ADD_DISK:
|
||||
case HOT_REMOVE_DISK:
|
||||
case RAID_VERSION:
|
||||
case RESTART_ARRAY_RW:
|
||||
case RUN_ARRAY:
|
||||
case SET_ARRAY_INFO:
|
||||
@ -7564,9 +7643,11 @@ static inline bool md_ioctl_valid(unsigned int cmd)
|
||||
case STOP_ARRAY:
|
||||
case STOP_ARRAY_RO:
|
||||
case CLUSTERED_DISK_NACK:
|
||||
return true;
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
return 0;
|
||||
default:
|
||||
return false;
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
@ -7624,31 +7705,17 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
int err = 0;
|
||||
void __user *argp = (void __user *)arg;
|
||||
struct mddev *mddev = NULL;
|
||||
bool did_set_md_closing = false;
|
||||
|
||||
if (!md_ioctl_valid(cmd))
|
||||
return -ENOTTY;
|
||||
|
||||
switch (cmd) {
|
||||
case RAID_VERSION:
|
||||
case GET_ARRAY_INFO:
|
||||
case GET_DISK_INFO:
|
||||
break;
|
||||
default:
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
}
|
||||
err = md_ioctl_valid(cmd);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* Commands dealing with the RAID driver but not any
|
||||
* particular array:
|
||||
*/
|
||||
switch (cmd) {
|
||||
case RAID_VERSION:
|
||||
err = get_version(argp);
|
||||
goto out;
|
||||
default:;
|
||||
}
|
||||
if (cmd == RAID_VERSION)
|
||||
return get_version(argp);
|
||||
|
||||
/*
|
||||
* Commands creating/starting a new array:
|
||||
@ -7656,35 +7723,23 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
|
||||
mddev = bdev->bd_disk->private_data;
|
||||
|
||||
if (!mddev) {
|
||||
BUG();
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Some actions do not requires the mutex */
|
||||
switch (cmd) {
|
||||
case GET_ARRAY_INFO:
|
||||
if (!mddev->raid_disks && !mddev->external)
|
||||
err = -ENODEV;
|
||||
else
|
||||
err = get_array_info(mddev, argp);
|
||||
goto out;
|
||||
return -ENODEV;
|
||||
return get_array_info(mddev, argp);
|
||||
|
||||
case GET_DISK_INFO:
|
||||
if (!mddev->raid_disks && !mddev->external)
|
||||
err = -ENODEV;
|
||||
else
|
||||
err = get_disk_info(mddev, argp);
|
||||
goto out;
|
||||
return -ENODEV;
|
||||
return get_disk_info(mddev, argp);
|
||||
|
||||
case SET_DISK_FAULTY:
|
||||
err = set_disk_faulty(mddev, new_decode_dev(arg));
|
||||
goto out;
|
||||
return set_disk_faulty(mddev, new_decode_dev(arg));
|
||||
|
||||
case GET_BITMAP_FILE:
|
||||
err = get_bitmap_file(mddev, argp);
|
||||
goto out;
|
||||
|
||||
return get_bitmap_file(mddev, argp);
|
||||
}
|
||||
|
||||
if (cmd == HOT_REMOVE_DISK)
|
||||
@ -7697,20 +7752,9 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
/* Need to flush page cache, and ensure no-one else opens
|
||||
* and writes
|
||||
*/
|
||||
mutex_lock(&mddev->open_mutex);
|
||||
if (mddev->pers && atomic_read(&mddev->openers) > 1) {
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
err = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
err = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
did_set_md_closing = true;
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
sync_blockdev(bdev);
|
||||
err = mddev_set_closing_and_sync_blockdev(mddev, 1);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (!md_is_rdwr(mddev))
|
||||
@ -7751,11 +7795,12 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
goto unlock;
|
||||
|
||||
case STOP_ARRAY:
|
||||
err = do_md_stop(mddev, 0, bdev);
|
||||
err = do_md_stop(mddev, 0);
|
||||
goto unlock;
|
||||
|
||||
case STOP_ARRAY_RO:
|
||||
err = md_set_readonly(mddev, bdev);
|
||||
if (mddev->pers)
|
||||
err = md_set_readonly(mddev);
|
||||
goto unlock;
|
||||
|
||||
case HOT_REMOVE_DISK:
|
||||
@ -7850,7 +7895,7 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
mddev_unlock(mddev);
|
||||
|
||||
out:
|
||||
if(did_set_md_closing)
|
||||
if (cmd == STOP_ARRAY_RO || (err && cmd == STOP_ARRAY))
|
||||
clear_bit(MD_CLOSING, &mddev->flags);
|
||||
return err;
|
||||
}
|
||||
@ -8687,10 +8732,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
|
||||
|
||||
bio_chain(discard_bio, bio);
|
||||
bio_clone_blkg_association(discard_bio, bio);
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(discard_bio,
|
||||
disk_devt(mddev->gendisk),
|
||||
bio->bi_iter.bi_sector);
|
||||
mddev_trace_remap(mddev, discard_bio, bio->bi_iter.bi_sector);
|
||||
submit_bio_noacct(discard_bio);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
|
||||
@ -8737,6 +8779,23 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_account_bio);
|
||||
|
||||
void md_free_cloned_bio(struct bio *bio)
|
||||
{
|
||||
struct md_io_clone *md_io_clone = bio->bi_private;
|
||||
struct bio *orig_bio = md_io_clone->orig_bio;
|
||||
struct mddev *mddev = md_io_clone->mddev;
|
||||
|
||||
if (bio->bi_status && !orig_bio->bi_status)
|
||||
orig_bio->bi_status = bio->bi_status;
|
||||
|
||||
if (md_io_clone->start_time)
|
||||
bio_end_io_acct(orig_bio, md_io_clone->start_time);
|
||||
|
||||
bio_put(bio);
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_free_cloned_bio);
|
||||
|
||||
/* md_allow_write(mddev)
|
||||
* Calling this ensures that the array is marked 'active' so that writes
|
||||
* may proceed without blocking. It is important to call this before
|
||||
@ -9170,7 +9229,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
mddev->delta_disks > 0 &&
|
||||
mddev->pers->finish_reshape &&
|
||||
mddev->pers->size &&
|
||||
mddev->queue) {
|
||||
!mddev_is_dm(mddev)) {
|
||||
mddev_lock_nointr(mddev);
|
||||
md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
|
||||
mddev_unlock(mddev);
|
||||
@ -9270,9 +9329,14 @@ static bool md_spares_need_change(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
if (rdev_removeable(rdev) || rdev_addable(rdev))
|
||||
rcu_read_lock();
|
||||
rdev_for_each_rcu(rdev, mddev) {
|
||||
if (rdev_removeable(rdev) || rdev_addable(rdev)) {
|
||||
rcu_read_unlock();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/timer.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <trace/events/block.h>
|
||||
#include "md-cluster.h"
|
||||
|
||||
#define MaxSector (~(sector_t)0)
|
||||
@ -207,6 +208,7 @@ enum flag_bits {
|
||||
* check if there is collision between raid1
|
||||
* serial bios.
|
||||
*/
|
||||
Nonrot, /* non-rotational device (SSD) */
|
||||
};
|
||||
|
||||
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
@ -222,6 +224,16 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int rdev_has_badblock(struct md_rdev *rdev, sector_t s,
|
||||
int sectors)
|
||||
{
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
return is_badblock(rdev, s, sectors, &first_bad, &bad_sectors);
|
||||
}
|
||||
|
||||
extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int is_new);
|
||||
extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
@ -468,7 +480,6 @@ struct mddev {
|
||||
struct timer_list safemode_timer;
|
||||
struct percpu_ref writes_pending;
|
||||
int sync_checkers; /* # of threads checking writes_pending */
|
||||
struct request_queue *queue; /* for plugging ... */
|
||||
|
||||
struct bitmap *bitmap; /* the bitmap for the device */
|
||||
struct {
|
||||
@ -558,6 +569,37 @@ enum recovery_flags {
|
||||
MD_RESYNCING_REMOTE, /* remote node is running resync thread */
|
||||
};
|
||||
|
||||
enum md_ro_state {
|
||||
MD_RDWR,
|
||||
MD_RDONLY,
|
||||
MD_AUTO_READ,
|
||||
MD_MAX_STATE
|
||||
};
|
||||
|
||||
static inline bool md_is_rdwr(struct mddev *mddev)
|
||||
{
|
||||
return (mddev->ro == MD_RDWR);
|
||||
}
|
||||
|
||||
static inline bool reshape_interrupted(struct mddev *mddev)
|
||||
{
|
||||
/* reshape never start */
|
||||
if (mddev->reshape_position == MaxSector)
|
||||
return false;
|
||||
|
||||
/* interrupted */
|
||||
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return true;
|
||||
|
||||
/* running reshape will be interrupted soon. */
|
||||
if (test_bit(MD_RECOVERY_WAIT, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int __must_check mddev_lock(struct mddev *mddev)
|
||||
{
|
||||
return mutex_lock_interruptible(&mddev->reconfig_mutex);
|
||||
@ -617,6 +659,7 @@ struct md_personality
|
||||
int (*start_reshape) (struct mddev *mddev);
|
||||
void (*finish_reshape) (struct mddev *mddev);
|
||||
void (*update_reshape_pos) (struct mddev *mddev);
|
||||
void (*prepare_suspend) (struct mddev *mddev);
|
||||
/* quiesce suspends or resumes internal processing.
|
||||
* 1 - stop new actions and wait for action io to complete
|
||||
* 0 - return to normal behaviour
|
||||
@ -750,6 +793,7 @@ extern void md_finish_reshape(struct mddev *mddev);
|
||||
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
|
||||
struct bio *bio, sector_t start, sector_t size);
|
||||
void md_account_bio(struct mddev *mddev, struct bio **bio);
|
||||
void md_free_cloned_bio(struct bio *bio);
|
||||
|
||||
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
|
||||
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
|
||||
@ -778,9 +822,12 @@ extern void md_stop_writes(struct mddev *mddev);
|
||||
extern int md_rdev_init(struct md_rdev *rdev);
|
||||
extern void md_rdev_clear(struct md_rdev *rdev);
|
||||
|
||||
extern void md_handle_request(struct mddev *mddev, struct bio *bio);
|
||||
extern bool md_handle_request(struct mddev *mddev, struct bio *bio);
|
||||
extern int mddev_suspend(struct mddev *mddev, bool interruptible);
|
||||
extern void mddev_resume(struct mddev *mddev);
|
||||
extern void md_idle_sync_thread(struct mddev *mddev);
|
||||
extern void md_frozen_sync_thread(struct mddev *mddev);
|
||||
extern void md_unfrozen_sync_thread(struct mddev *mddev);
|
||||
|
||||
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
|
||||
extern void md_update_sb(struct mddev *mddev, int force);
|
||||
@ -821,7 +868,7 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
|
||||
{
|
||||
if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
|
||||
!bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors)
|
||||
mddev->queue->limits.max_write_zeroes_sectors = 0;
|
||||
mddev->gendisk->queue->limits.max_write_zeroes_sectors = 0;
|
||||
}
|
||||
|
||||
static inline int mddev_suspend_and_lock(struct mddev *mddev)
|
||||
@ -860,7 +907,31 @@ void md_autostart_arrays(int part);
|
||||
int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
|
||||
int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
|
||||
int do_md_run(struct mddev *mddev);
|
||||
void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim);
|
||||
int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev);
|
||||
void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes);
|
||||
|
||||
extern const struct block_device_operations md_fops;
|
||||
|
||||
/*
|
||||
* MD devices can be used undeneath by DM, in which case ->gendisk is NULL.
|
||||
*/
|
||||
static inline bool mddev_is_dm(struct mddev *mddev)
|
||||
{
|
||||
return !mddev->gendisk;
|
||||
}
|
||||
|
||||
static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio,
|
||||
sector_t sector)
|
||||
{
|
||||
if (!mddev_is_dm(mddev))
|
||||
trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector);
|
||||
}
|
||||
|
||||
#define mddev_add_trace_msg(mddev, fmt, args...) \
|
||||
do { \
|
||||
if (!mddev_is_dm(mddev)) \
|
||||
blk_add_trace_msg((mddev)->gendisk->queue, fmt, ##args); \
|
||||
} while (0)
|
||||
|
||||
#endif /* _MD_MD_H */
|
||||
|
@ -379,6 +379,19 @@ static void raid0_free(struct mddev *mddev, void *priv)
|
||||
free_conf(mddev, conf);
|
||||
}
|
||||
|
||||
static int raid0_set_limits(struct mddev *mddev)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
|
||||
blk_set_stacking_limits(&lim);
|
||||
lim.max_hw_sectors = mddev->chunk_sectors;
|
||||
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
|
||||
lim.io_min = mddev->chunk_sectors << 9;
|
||||
lim.io_opt = lim.io_min * mddev->raid_disks;
|
||||
mddev_stack_rdev_limits(mddev, &lim);
|
||||
return queue_limits_set(mddev->gendisk->queue, &lim);
|
||||
}
|
||||
|
||||
static int raid0_run(struct mddev *mddev)
|
||||
{
|
||||
struct r0conf *conf;
|
||||
@ -399,20 +412,10 @@ static int raid0_run(struct mddev *mddev)
|
||||
mddev->private = conf;
|
||||
}
|
||||
conf = mddev->private;
|
||||
if (mddev->queue) {
|
||||
struct md_rdev *rdev;
|
||||
|
||||
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
|
||||
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
|
||||
blk_queue_io_opt(mddev->queue,
|
||||
(mddev->chunk_sectors << 9) * mddev->raid_disks);
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
}
|
||||
if (!mddev_is_dm(mddev)) {
|
||||
ret = raid0_set_limits(mddev);
|
||||
if (ret)
|
||||
goto out_free_conf;
|
||||
}
|
||||
|
||||
/* calculate array device size */
|
||||
@ -426,8 +429,10 @@ static int raid0_run(struct mddev *mddev)
|
||||
|
||||
ret = md_integrity_register(mddev);
|
||||
if (ret)
|
||||
free_conf(mddev, conf);
|
||||
|
||||
goto out_free_conf;
|
||||
return 0;
|
||||
out_free_conf:
|
||||
free_conf(mddev, conf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -578,10 +583,7 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
|
||||
bio_set_dev(bio, tmp_dev->bdev);
|
||||
bio->bi_iter.bi_sector = sector + zone->dev_start +
|
||||
tmp_dev->data_offset;
|
||||
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
|
||||
bio_sector);
|
||||
mddev_trace_remap(mddev, bio, bio_sector);
|
||||
mddev_check_write_zeroes(mddev, bio);
|
||||
submit_bio_noacct(bio);
|
||||
}
|
||||
|
@ -227,3 +227,72 @@ static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev)
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* raid1_check_read_range() - check a given read range for bad blocks,
|
||||
* available read length is returned;
|
||||
* @rdev: the rdev to read;
|
||||
* @this_sector: read position;
|
||||
* @len: read length;
|
||||
*
|
||||
* helper function for read_balance()
|
||||
*
|
||||
* 1) If there are no bad blocks in the range, @len is returned;
|
||||
* 2) If the range are all bad blocks, 0 is returned;
|
||||
* 3) If there are partial bad blocks:
|
||||
* - If the bad block range starts after @this_sector, the length of first
|
||||
* good region is returned;
|
||||
* - If the bad block range starts before @this_sector, 0 is returned and
|
||||
* the @len is updated to the offset into the region before we get to the
|
||||
* good blocks;
|
||||
*/
|
||||
static inline int raid1_check_read_range(struct md_rdev *rdev,
|
||||
sector_t this_sector, int *len)
|
||||
{
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
/* no bad block overlap */
|
||||
if (!is_badblock(rdev, this_sector, *len, &first_bad, &bad_sectors))
|
||||
return *len;
|
||||
|
||||
/*
|
||||
* bad block range starts offset into our range so we can return the
|
||||
* number of sectors before the bad blocks start.
|
||||
*/
|
||||
if (first_bad > this_sector)
|
||||
return first_bad - this_sector;
|
||||
|
||||
/* read range is fully consumed by bad blocks. */
|
||||
if (this_sector + *len <= first_bad + bad_sectors)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* final case, bad block range starts before or at the start of our
|
||||
* range but does not cover our entire range so we still return 0 but
|
||||
* update the length with the number of sectors before we get to the
|
||||
* good ones.
|
||||
*/
|
||||
*len = first_bad + bad_sectors - this_sector;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if read should choose the first rdev.
|
||||
*
|
||||
* Balance on the whole device if no resync is going on (recovery is ok) or
|
||||
* below the resync window. Otherwise, take the first readable disk.
|
||||
*/
|
||||
static inline bool raid1_should_read_first(struct mddev *mddev,
|
||||
sector_t this_sector, int len)
|
||||
{
|
||||
if ((mddev->recovery_cp < this_sector + len))
|
||||
return true;
|
||||
|
||||
if (mddev_is_clustered(mddev) &&
|
||||
md_cluster_ops->area_resyncing(mddev, READ, this_sector,
|
||||
this_sector + len))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -46,9 +46,6 @@
|
||||
static void allow_barrier(struct r1conf *conf, sector_t sector_nr);
|
||||
static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
|
||||
|
||||
#define raid1_log(md, fmt, args...) \
|
||||
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
|
||||
|
||||
#define RAID_1_10_NAME "raid1"
|
||||
#include "raid1-10.c"
|
||||
|
||||
@ -498,9 +495,6 @@ static void raid1_end_write_request(struct bio *bio)
|
||||
* to user-side. So if something waits for IO, then it
|
||||
* will wait for the 'master' bio.
|
||||
*/
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
r1_bio->bios[mirror] = NULL;
|
||||
to_put = bio;
|
||||
/*
|
||||
@ -516,8 +510,8 @@ static void raid1_end_write_request(struct bio *bio)
|
||||
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
||||
|
||||
/* Maybe we can clear some bad blocks. */
|
||||
if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors,
|
||||
&first_bad, &bad_sectors) && !discard_error) {
|
||||
if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) &&
|
||||
!discard_error) {
|
||||
r1_bio->bios[mirror] = IO_MADE_GOOD;
|
||||
set_bit(R1BIO_MadeGood, &r1_bio->state);
|
||||
}
|
||||
@ -582,211 +576,312 @@ static sector_t align_to_barrier_unit_end(sector_t start_sector,
|
||||
return len;
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine returns the disk from which the requested read should
|
||||
* be done. There is a per-array 'next expected sequential IO' sector
|
||||
* number - if this matches on the next IO then we use the last disk.
|
||||
* There is also a per-disk 'last know head position' sector that is
|
||||
* maintained from IRQ contexts, both the normal and the resync IO
|
||||
* completion handlers update this position correctly. If there is no
|
||||
* perfect sequential match then we pick the disk whose head is closest.
|
||||
*
|
||||
* If there are 2 mirrors in the same 2 devices, performance degrades
|
||||
* because position is mirror, not device based.
|
||||
*
|
||||
* The rdev for the device selected will have nr_pending incremented.
|
||||
*/
|
||||
static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors)
|
||||
static void update_read_sectors(struct r1conf *conf, int disk,
|
||||
sector_t this_sector, int len)
|
||||
{
|
||||
const sector_t this_sector = r1_bio->sector;
|
||||
int sectors;
|
||||
int best_good_sectors;
|
||||
int best_disk, best_dist_disk, best_pending_disk;
|
||||
int has_nonrot_disk;
|
||||
struct raid1_info *info = &conf->mirrors[disk];
|
||||
|
||||
atomic_inc(&info->rdev->nr_pending);
|
||||
if (info->next_seq_sect != this_sector)
|
||||
info->seq_start = this_sector;
|
||||
info->next_seq_sect = this_sector + len;
|
||||
}
|
||||
|
||||
static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio,
|
||||
int *max_sectors)
|
||||
{
|
||||
sector_t this_sector = r1_bio->sector;
|
||||
int len = r1_bio->sectors;
|
||||
int disk;
|
||||
sector_t best_dist;
|
||||
unsigned int min_pending;
|
||||
struct md_rdev *rdev;
|
||||
int choose_first;
|
||||
int choose_next_idle;
|
||||
|
||||
/*
|
||||
* Check if we can balance. We can balance on the whole
|
||||
* device if no resync is going on, or below the resync window.
|
||||
* We take the first readable disk when above the resync window.
|
||||
*/
|
||||
retry:
|
||||
sectors = r1_bio->sectors;
|
||||
best_disk = -1;
|
||||
best_dist_disk = -1;
|
||||
best_dist = MaxSector;
|
||||
best_pending_disk = -1;
|
||||
min_pending = UINT_MAX;
|
||||
best_good_sectors = 0;
|
||||
has_nonrot_disk = 0;
|
||||
choose_next_idle = 0;
|
||||
clear_bit(R1BIO_FailFast, &r1_bio->state);
|
||||
|
||||
if ((conf->mddev->recovery_cp < this_sector + sectors) ||
|
||||
(mddev_is_clustered(conf->mddev) &&
|
||||
md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
|
||||
this_sector + sectors)))
|
||||
choose_first = 1;
|
||||
else
|
||||
choose_first = 0;
|
||||
|
||||
for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
|
||||
sector_t dist;
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
unsigned int pending;
|
||||
bool nonrot;
|
||||
struct md_rdev *rdev;
|
||||
int read_len;
|
||||
|
||||
if (r1_bio->bios[disk] == IO_BLOCKED)
|
||||
continue;
|
||||
|
||||
rdev = conf->mirrors[disk].rdev;
|
||||
if (r1_bio->bios[disk] == IO_BLOCKED
|
||||
|| rdev == NULL
|
||||
|| test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
if (!test_bit(In_sync, &rdev->flags) &&
|
||||
rdev->recovery_offset < this_sector + sectors)
|
||||
continue;
|
||||
if (test_bit(WriteMostly, &rdev->flags)) {
|
||||
/* Don't balance among write-mostly, just
|
||||
* use the first as a last resort */
|
||||
if (best_dist_disk < 0) {
|
||||
if (is_badblock(rdev, this_sector, sectors,
|
||||
&first_bad, &bad_sectors)) {
|
||||
if (first_bad <= this_sector)
|
||||
/* Cannot use this */
|
||||
continue;
|
||||
best_good_sectors = first_bad - this_sector;
|
||||
} else
|
||||
best_good_sectors = sectors;
|
||||
best_dist_disk = disk;
|
||||
best_pending_disk = disk;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/* This is a reasonable device to use. It might
|
||||
* even be best.
|
||||
*/
|
||||
if (is_badblock(rdev, this_sector, sectors,
|
||||
&first_bad, &bad_sectors)) {
|
||||
if (best_dist < MaxSector)
|
||||
/* already have a better device */
|
||||
continue;
|
||||
if (first_bad <= this_sector) {
|
||||
/* cannot read here. If this is the 'primary'
|
||||
* device, then we must not read beyond
|
||||
* bad_sectors from another device..
|
||||
*/
|
||||
bad_sectors -= (this_sector - first_bad);
|
||||
if (choose_first && sectors > bad_sectors)
|
||||
sectors = bad_sectors;
|
||||
if (best_good_sectors > sectors)
|
||||
best_good_sectors = sectors;
|
||||
|
||||
} else {
|
||||
sector_t good_sectors = first_bad - this_sector;
|
||||
if (good_sectors > best_good_sectors) {
|
||||
best_good_sectors = good_sectors;
|
||||
best_disk = disk;
|
||||
}
|
||||
if (choose_first)
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
if ((sectors > best_good_sectors) && (best_disk >= 0))
|
||||
best_disk = -1;
|
||||
best_good_sectors = sectors;
|
||||
}
|
||||
|
||||
if (best_disk >= 0)
|
||||
/* At least two disks to choose from so failfast is OK */
|
||||
set_bit(R1BIO_FailFast, &r1_bio->state);
|
||||
|
||||
nonrot = bdev_nonrot(rdev->bdev);
|
||||
has_nonrot_disk |= nonrot;
|
||||
pending = atomic_read(&rdev->nr_pending);
|
||||
dist = abs(this_sector - conf->mirrors[disk].head_position);
|
||||
if (choose_first) {
|
||||
best_disk = disk;
|
||||
break;
|
||||
}
|
||||
/* Don't change to another disk for sequential reads */
|
||||
if (conf->mirrors[disk].next_seq_sect == this_sector
|
||||
|| dist == 0) {
|
||||
int opt_iosize = bdev_io_opt(rdev->bdev) >> 9;
|
||||
struct raid1_info *mirror = &conf->mirrors[disk];
|
||||
|
||||
best_disk = disk;
|
||||
/*
|
||||
* If buffered sequential IO size exceeds optimal
|
||||
* iosize, check if there is idle disk. If yes, choose
|
||||
* the idle disk. read_balance could already choose an
|
||||
* idle disk before noticing it's a sequential IO in
|
||||
* this disk. This doesn't matter because this disk
|
||||
* will idle, next time it will be utilized after the
|
||||
* first disk has IO size exceeds optimal iosize. In
|
||||
* this way, iosize of the first disk will be optimal
|
||||
* iosize at least. iosize of the second disk might be
|
||||
* small, but not a big deal since when the second disk
|
||||
* starts IO, the first disk is likely still busy.
|
||||
*/
|
||||
if (nonrot && opt_iosize > 0 &&
|
||||
mirror->seq_start != MaxSector &&
|
||||
mirror->next_seq_sect > opt_iosize &&
|
||||
mirror->next_seq_sect - opt_iosize >=
|
||||
mirror->seq_start) {
|
||||
choose_next_idle = 1;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (choose_next_idle)
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
|
||||
if (min_pending > pending) {
|
||||
min_pending = pending;
|
||||
best_pending_disk = disk;
|
||||
}
|
||||
|
||||
if (dist < best_dist) {
|
||||
best_dist = dist;
|
||||
best_dist_disk = disk;
|
||||
/* choose the first disk even if it has some bad blocks. */
|
||||
read_len = raid1_check_read_range(rdev, this_sector, &len);
|
||||
if (read_len > 0) {
|
||||
update_read_sectors(conf, disk, this_sector, read_len);
|
||||
*max_sectors = read_len;
|
||||
return disk;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio,
|
||||
int *max_sectors)
|
||||
{
|
||||
sector_t this_sector = r1_bio->sector;
|
||||
int best_disk = -1;
|
||||
int best_len = 0;
|
||||
int disk;
|
||||
|
||||
for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
|
||||
struct md_rdev *rdev;
|
||||
int len;
|
||||
int read_len;
|
||||
|
||||
if (r1_bio->bios[disk] == IO_BLOCKED)
|
||||
continue;
|
||||
|
||||
rdev = conf->mirrors[disk].rdev;
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags) ||
|
||||
test_bit(WriteMostly, &rdev->flags))
|
||||
continue;
|
||||
|
||||
/* keep track of the disk with the most readable sectors. */
|
||||
len = r1_bio->sectors;
|
||||
read_len = raid1_check_read_range(rdev, this_sector, &len);
|
||||
if (read_len > best_len) {
|
||||
best_disk = disk;
|
||||
best_len = read_len;
|
||||
}
|
||||
}
|
||||
|
||||
if (best_disk != -1) {
|
||||
*max_sectors = best_len;
|
||||
update_read_sectors(conf, best_disk, this_sector, best_len);
|
||||
}
|
||||
|
||||
return best_disk;
|
||||
}
|
||||
|
||||
static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
|
||||
int *max_sectors)
|
||||
{
|
||||
sector_t this_sector = r1_bio->sector;
|
||||
int bb_disk = -1;
|
||||
int bb_read_len = 0;
|
||||
int disk;
|
||||
|
||||
for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
|
||||
struct md_rdev *rdev;
|
||||
int len;
|
||||
int read_len;
|
||||
|
||||
if (r1_bio->bios[disk] == IO_BLOCKED)
|
||||
continue;
|
||||
|
||||
rdev = conf->mirrors[disk].rdev;
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags) ||
|
||||
!test_bit(WriteMostly, &rdev->flags))
|
||||
continue;
|
||||
|
||||
/* there are no bad blocks, we can use this disk */
|
||||
len = r1_bio->sectors;
|
||||
read_len = raid1_check_read_range(rdev, this_sector, &len);
|
||||
if (read_len == r1_bio->sectors) {
|
||||
update_read_sectors(conf, disk, this_sector, read_len);
|
||||
return disk;
|
||||
}
|
||||
|
||||
/*
|
||||
* there are partial bad blocks, choose the rdev with largest
|
||||
* read length.
|
||||
*/
|
||||
if (read_len > bb_read_len) {
|
||||
bb_disk = disk;
|
||||
bb_read_len = read_len;
|
||||
}
|
||||
}
|
||||
|
||||
if (bb_disk != -1) {
|
||||
*max_sectors = bb_read_len;
|
||||
update_read_sectors(conf, bb_disk, this_sector, bb_read_len);
|
||||
}
|
||||
|
||||
return bb_disk;
|
||||
}
|
||||
|
||||
static bool is_sequential(struct r1conf *conf, int disk, struct r1bio *r1_bio)
|
||||
{
|
||||
/* TODO: address issues with this check and concurrency. */
|
||||
return conf->mirrors[disk].next_seq_sect == r1_bio->sector ||
|
||||
conf->mirrors[disk].head_position == r1_bio->sector;
|
||||
}
|
||||
|
||||
/*
|
||||
* If buffered sequential IO size exceeds optimal iosize, check if there is idle
|
||||
* disk. If yes, choose the idle disk.
|
||||
*/
|
||||
static bool should_choose_next(struct r1conf *conf, int disk)
|
||||
{
|
||||
struct raid1_info *mirror = &conf->mirrors[disk];
|
||||
int opt_iosize;
|
||||
|
||||
if (!test_bit(Nonrot, &mirror->rdev->flags))
|
||||
return false;
|
||||
|
||||
opt_iosize = bdev_io_opt(mirror->rdev->bdev) >> 9;
|
||||
return opt_iosize > 0 && mirror->seq_start != MaxSector &&
|
||||
mirror->next_seq_sect > opt_iosize &&
|
||||
mirror->next_seq_sect - opt_iosize >= mirror->seq_start;
|
||||
}
|
||||
|
||||
static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio)
|
||||
{
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||
return false;
|
||||
|
||||
/* still in recovery */
|
||||
if (!test_bit(In_sync, &rdev->flags) &&
|
||||
rdev->recovery_offset < r1_bio->sector + r1_bio->sectors)
|
||||
return false;
|
||||
|
||||
/* don't read from slow disk unless have to */
|
||||
if (test_bit(WriteMostly, &rdev->flags))
|
||||
return false;
|
||||
|
||||
/* don't split IO for bad blocks unless have to */
|
||||
if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct read_balance_ctl {
|
||||
sector_t closest_dist;
|
||||
int closest_dist_disk;
|
||||
int min_pending;
|
||||
int min_pending_disk;
|
||||
int sequential_disk;
|
||||
int readable_disks;
|
||||
};
|
||||
|
||||
static int choose_best_rdev(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
{
|
||||
int disk;
|
||||
struct read_balance_ctl ctl = {
|
||||
.closest_dist_disk = -1,
|
||||
.closest_dist = MaxSector,
|
||||
.min_pending_disk = -1,
|
||||
.min_pending = UINT_MAX,
|
||||
.sequential_disk = -1,
|
||||
};
|
||||
|
||||
for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
|
||||
struct md_rdev *rdev;
|
||||
sector_t dist;
|
||||
unsigned int pending;
|
||||
|
||||
if (r1_bio->bios[disk] == IO_BLOCKED)
|
||||
continue;
|
||||
|
||||
rdev = conf->mirrors[disk].rdev;
|
||||
if (!rdev_readable(rdev, r1_bio))
|
||||
continue;
|
||||
|
||||
/* At least two disks to choose from so failfast is OK */
|
||||
if (ctl.readable_disks++ == 1)
|
||||
set_bit(R1BIO_FailFast, &r1_bio->state);
|
||||
|
||||
pending = atomic_read(&rdev->nr_pending);
|
||||
dist = abs(r1_bio->sector - conf->mirrors[disk].head_position);
|
||||
|
||||
/* Don't change to another disk for sequential reads */
|
||||
if (is_sequential(conf, disk, r1_bio)) {
|
||||
if (!should_choose_next(conf, disk))
|
||||
return disk;
|
||||
|
||||
/*
|
||||
* Add 'pending' to avoid choosing this disk if
|
||||
* there is other idle disk.
|
||||
*/
|
||||
pending++;
|
||||
/*
|
||||
* If there is no other idle disk, this disk
|
||||
* will be chosen.
|
||||
*/
|
||||
ctl.sequential_disk = disk;
|
||||
}
|
||||
|
||||
if (ctl.min_pending > pending) {
|
||||
ctl.min_pending = pending;
|
||||
ctl.min_pending_disk = disk;
|
||||
}
|
||||
|
||||
if (ctl.closest_dist > dist) {
|
||||
ctl.closest_dist = dist;
|
||||
ctl.closest_dist_disk = disk;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* sequential IO size exceeds optimal iosize, however, there is no other
|
||||
* idle disk, so choose the sequential disk.
|
||||
*/
|
||||
if (ctl.sequential_disk != -1 && ctl.min_pending != 0)
|
||||
return ctl.sequential_disk;
|
||||
|
||||
/*
|
||||
* If all disks are rotational, choose the closest disk. If any disk is
|
||||
* non-rotational, choose the disk with less pending request even the
|
||||
* disk is rotational, which might/might not be optimal for raids with
|
||||
* mixed ratation/non-rotational disks depending on workload.
|
||||
*/
|
||||
if (best_disk == -1) {
|
||||
if (has_nonrot_disk || min_pending == 0)
|
||||
best_disk = best_pending_disk;
|
||||
else
|
||||
best_disk = best_dist_disk;
|
||||
if (ctl.min_pending_disk != -1 &&
|
||||
(READ_ONCE(conf->nonrot_disks) || ctl.min_pending == 0))
|
||||
return ctl.min_pending_disk;
|
||||
else
|
||||
return ctl.closest_dist_disk;
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine returns the disk from which the requested read should be done.
|
||||
*
|
||||
* 1) If resync is in progress, find the first usable disk and use it even if it
|
||||
* has some bad blocks.
|
||||
*
|
||||
* 2) Now that there is no resync, loop through all disks and skipping slow
|
||||
* disks and disks with bad blocks for now. Only pay attention to key disk
|
||||
* choice.
|
||||
*
|
||||
* 3) If we've made it this far, now look for disks with bad blocks and choose
|
||||
* the one with most number of sectors.
|
||||
*
|
||||
* 4) If we are all the way at the end, we have no choice but to use a disk even
|
||||
* if it is write mostly.
|
||||
*
|
||||
* The rdev for the device selected will have nr_pending incremented.
|
||||
*/
|
||||
static int read_balance(struct r1conf *conf, struct r1bio *r1_bio,
|
||||
int *max_sectors)
|
||||
{
|
||||
int disk;
|
||||
|
||||
clear_bit(R1BIO_FailFast, &r1_bio->state);
|
||||
|
||||
if (raid1_should_read_first(conf->mddev, r1_bio->sector,
|
||||
r1_bio->sectors))
|
||||
return choose_first_rdev(conf, r1_bio, max_sectors);
|
||||
|
||||
disk = choose_best_rdev(conf, r1_bio);
|
||||
if (disk >= 0) {
|
||||
*max_sectors = r1_bio->sectors;
|
||||
update_read_sectors(conf, disk, r1_bio->sector,
|
||||
r1_bio->sectors);
|
||||
return disk;
|
||||
}
|
||||
|
||||
if (best_disk >= 0) {
|
||||
rdev = conf->mirrors[best_disk].rdev;
|
||||
if (!rdev)
|
||||
goto retry;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
sectors = best_good_sectors;
|
||||
/*
|
||||
* If we are here it means we didn't find a perfectly good disk so
|
||||
* now spend a bit more time trying to find one with the most good
|
||||
* sectors.
|
||||
*/
|
||||
disk = choose_bb_rdev(conf, r1_bio, max_sectors);
|
||||
if (disk >= 0)
|
||||
return disk;
|
||||
|
||||
if (conf->mirrors[best_disk].next_seq_sect != this_sector)
|
||||
conf->mirrors[best_disk].seq_start = this_sector;
|
||||
|
||||
conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
|
||||
}
|
||||
*max_sectors = sectors;
|
||||
|
||||
return best_disk;
|
||||
return choose_slow_rdev(conf, r1_bio, max_sectors);
|
||||
}
|
||||
|
||||
static void wake_up_barrier(struct r1conf *conf)
|
||||
@ -1098,7 +1193,7 @@ static void freeze_array(struct r1conf *conf, int extra)
|
||||
*/
|
||||
spin_lock_irq(&conf->resync_lock);
|
||||
conf->array_frozen = 1;
|
||||
raid1_log(conf->mddev, "wait freeze");
|
||||
mddev_add_trace_msg(conf->mddev, "raid1 wait freeze");
|
||||
wait_event_lock_irq_cmd(
|
||||
conf->wait_barrier,
|
||||
get_unqueued_pending(conf) == extra,
|
||||
@ -1287,7 +1382,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
* Reading from a write-mostly device must take care not to
|
||||
* over-take any writes that are 'behind'
|
||||
*/
|
||||
raid1_log(mddev, "wait behind writes");
|
||||
mddev_add_trace_msg(mddev, "raid1 wait behind writes");
|
||||
wait_event(bitmap->behind_wait,
|
||||
atomic_read(&bitmap->behind_writes) == 0);
|
||||
}
|
||||
@ -1320,11 +1415,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
test_bit(R1BIO_FailFast, &r1_bio->state))
|
||||
read_bio->bi_opf |= MD_FAILFAST;
|
||||
read_bio->bi_private = r1_bio;
|
||||
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk),
|
||||
r1_bio->sector);
|
||||
|
||||
mddev_trace_remap(mddev, read_bio, r1_bio->sector);
|
||||
submit_bio_noacct(read_bio);
|
||||
}
|
||||
|
||||
@ -1474,7 +1565,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
bio_wouldblock_error(bio);
|
||||
return;
|
||||
}
|
||||
raid1_log(mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
|
||||
mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked",
|
||||
blocked_rdev->raid_disk);
|
||||
md_wait_for_blocked_rdev(blocked_rdev, mddev);
|
||||
wait_barrier(conf, bio->bi_iter.bi_sector, false);
|
||||
goto retry_write;
|
||||
@ -1557,10 +1649,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
mbio->bi_private = r1_bio;
|
||||
|
||||
atomic_inc(&r1_bio->remaining);
|
||||
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(mbio, disk_devt(mddev->gendisk),
|
||||
r1_bio->sector);
|
||||
mddev_trace_remap(mddev, mbio, r1_bio->sector);
|
||||
/* flush_pending_writes() needs access to the rdev so...*/
|
||||
mbio->bi_bdev = (void *)rdev;
|
||||
if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug, disks)) {
|
||||
@ -1760,6 +1849,52 @@ static int raid1_spare_active(struct mddev *mddev)
|
||||
return count;
|
||||
}
|
||||
|
||||
static bool raid1_add_conf(struct r1conf *conf, struct md_rdev *rdev, int disk,
|
||||
bool replacement)
|
||||
{
|
||||
struct raid1_info *info = conf->mirrors + disk;
|
||||
|
||||
if (replacement)
|
||||
info += conf->raid_disks;
|
||||
|
||||
if (info->rdev)
|
||||
return false;
|
||||
|
||||
if (bdev_nonrot(rdev->bdev)) {
|
||||
set_bit(Nonrot, &rdev->flags);
|
||||
WRITE_ONCE(conf->nonrot_disks, conf->nonrot_disks + 1);
|
||||
}
|
||||
|
||||
rdev->raid_disk = disk;
|
||||
info->head_position = 0;
|
||||
info->seq_start = MaxSector;
|
||||
WRITE_ONCE(info->rdev, rdev);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool raid1_remove_conf(struct r1conf *conf, int disk)
|
||||
{
|
||||
struct raid1_info *info = conf->mirrors + disk;
|
||||
struct md_rdev *rdev = info->rdev;
|
||||
|
||||
if (!rdev || test_bit(In_sync, &rdev->flags) ||
|
||||
atomic_read(&rdev->nr_pending))
|
||||
return false;
|
||||
|
||||
/* Only remove non-faulty devices if recovery is not possible. */
|
||||
if (!test_bit(Faulty, &rdev->flags) &&
|
||||
rdev->mddev->recovery_disabled != conf->recovery_disabled &&
|
||||
rdev->mddev->degraded < conf->raid_disks)
|
||||
return false;
|
||||
|
||||
if (test_and_clear_bit(Nonrot, &rdev->flags))
|
||||
WRITE_ONCE(conf->nonrot_disks, conf->nonrot_disks - 1);
|
||||
|
||||
WRITE_ONCE(info->rdev, NULL);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct r1conf *conf = mddev->private;
|
||||
@ -1791,19 +1926,16 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
for (mirror = first; mirror <= last; mirror++) {
|
||||
p = conf->mirrors + mirror;
|
||||
if (!p->rdev) {
|
||||
if (mddev->gendisk)
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
err = mddev_stack_new_rdev(mddev, rdev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
p->head_position = 0;
|
||||
rdev->raid_disk = mirror;
|
||||
err = 0;
|
||||
raid1_add_conf(conf, rdev, mirror, false);
|
||||
/* As all devices are equivalent, we don't need a full recovery
|
||||
* if this was recently any drive of the array
|
||||
*/
|
||||
if (rdev->saved_raid_disk < 0)
|
||||
conf->fullsync = 1;
|
||||
WRITE_ONCE(p->rdev, rdev);
|
||||
break;
|
||||
}
|
||||
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||
@ -1813,13 +1945,11 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
|
||||
if (err && repl_slot >= 0) {
|
||||
/* Add this device as a replacement */
|
||||
p = conf->mirrors + repl_slot;
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
rdev->raid_disk = repl_slot;
|
||||
raid1_add_conf(conf, rdev, repl_slot, true);
|
||||
err = 0;
|
||||
conf->fullsync = 1;
|
||||
WRITE_ONCE(p[conf->raid_disks].rdev, rdev);
|
||||
}
|
||||
|
||||
print_conf(conf);
|
||||
@ -1836,27 +1966,20 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
if (unlikely(number >= conf->raid_disks))
|
||||
goto abort;
|
||||
|
||||
if (rdev != p->rdev)
|
||||
p = conf->mirrors + conf->raid_disks + number;
|
||||
if (rdev != p->rdev) {
|
||||
number += conf->raid_disks;
|
||||
p = conf->mirrors + number;
|
||||
}
|
||||
|
||||
print_conf(conf);
|
||||
if (rdev == p->rdev) {
|
||||
if (test_bit(In_sync, &rdev->flags) ||
|
||||
atomic_read(&rdev->nr_pending)) {
|
||||
if (!raid1_remove_conf(conf, number)) {
|
||||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
/* Only remove non-faulty devices if recovery
|
||||
* is not possible.
|
||||
*/
|
||||
if (!test_bit(Faulty, &rdev->flags) &&
|
||||
mddev->recovery_disabled != conf->recovery_disabled &&
|
||||
mddev->degraded < conf->raid_disks) {
|
||||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
WRITE_ONCE(p->rdev, NULL);
|
||||
if (conf->mirrors[conf->raid_disks + number].rdev) {
|
||||
|
||||
if (number < conf->raid_disks &&
|
||||
conf->mirrors[conf->raid_disks + number].rdev) {
|
||||
/* We just removed a device that is being replaced.
|
||||
* Move down the replacement. We drain all IO before
|
||||
* doing this to avoid confusion.
|
||||
@ -1944,8 +2067,6 @@ static void end_sync_write(struct bio *bio)
|
||||
struct r1bio *r1_bio = get_resync_r1bio(bio);
|
||||
struct mddev *mddev = r1_bio->mddev;
|
||||
struct r1conf *conf = mddev->private;
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev;
|
||||
|
||||
if (!uptodate) {
|
||||
@ -1955,14 +2076,11 @@ static void end_sync_write(struct bio *bio)
|
||||
set_bit(MD_RECOVERY_NEEDED, &
|
||||
mddev->recovery);
|
||||
set_bit(R1BIO_WriteError, &r1_bio->state);
|
||||
} else if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors,
|
||||
&first_bad, &bad_sectors) &&
|
||||
!is_badblock(conf->mirrors[r1_bio->read_disk].rdev,
|
||||
r1_bio->sector,
|
||||
r1_bio->sectors,
|
||||
&first_bad, &bad_sectors)
|
||||
)
|
||||
} else if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) &&
|
||||
!rdev_has_badblock(conf->mirrors[r1_bio->read_disk].rdev,
|
||||
r1_bio->sector, r1_bio->sectors)) {
|
||||
set_bit(R1BIO_MadeGood, &r1_bio->state);
|
||||
}
|
||||
|
||||
put_sync_write_buf(r1_bio, uptodate);
|
||||
}
|
||||
@ -2279,16 +2397,12 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
s = PAGE_SIZE >> 9;
|
||||
|
||||
do {
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (rdev &&
|
||||
(test_bit(In_sync, &rdev->flags) ||
|
||||
(!test_bit(Faulty, &rdev->flags) &&
|
||||
rdev->recovery_offset >= sect + s)) &&
|
||||
is_badblock(rdev, sect, s,
|
||||
&first_bad, &bad_sectors) == 0) {
|
||||
rdev_has_badblock(rdev, sect, s) == 0) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
if (sync_page_io(rdev, sect, s<<9,
|
||||
conf->tmppage, REQ_OP_READ, false))
|
||||
@ -3006,23 +3120,17 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||
|
||||
err = -EINVAL;
|
||||
spin_lock_init(&conf->device_lock);
|
||||
conf->raid_disks = mddev->raid_disks;
|
||||
rdev_for_each(rdev, mddev) {
|
||||
int disk_idx = rdev->raid_disk;
|
||||
if (disk_idx >= mddev->raid_disks
|
||||
|| disk_idx < 0)
|
||||
continue;
|
||||
if (test_bit(Replacement, &rdev->flags))
|
||||
disk = conf->mirrors + mddev->raid_disks + disk_idx;
|
||||
else
|
||||
disk = conf->mirrors + disk_idx;
|
||||
|
||||
if (disk->rdev)
|
||||
if (disk_idx >= conf->raid_disks || disk_idx < 0)
|
||||
continue;
|
||||
|
||||
if (!raid1_add_conf(conf, rdev, disk_idx,
|
||||
test_bit(Replacement, &rdev->flags)))
|
||||
goto abort;
|
||||
disk->rdev = rdev;
|
||||
disk->head_position = 0;
|
||||
disk->seq_start = MaxSector;
|
||||
}
|
||||
conf->raid_disks = mddev->raid_disks;
|
||||
conf->mddev = mddev;
|
||||
INIT_LIST_HEAD(&conf->retry_list);
|
||||
INIT_LIST_HEAD(&conf->bio_end_io_list);
|
||||
@ -3086,12 +3194,21 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int raid1_set_limits(struct mddev *mddev)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
|
||||
blk_set_stacking_limits(&lim);
|
||||
lim.max_write_zeroes_sectors = 0;
|
||||
mddev_stack_rdev_limits(mddev, &lim);
|
||||
return queue_limits_set(mddev->gendisk->queue, &lim);
|
||||
}
|
||||
|
||||
static void raid1_free(struct mddev *mddev, void *priv);
|
||||
static int raid1_run(struct mddev *mddev)
|
||||
{
|
||||
struct r1conf *conf;
|
||||
int i;
|
||||
struct md_rdev *rdev;
|
||||
int ret;
|
||||
|
||||
if (mddev->level != 1) {
|
||||
@ -3118,14 +3235,10 @@ static int raid1_run(struct mddev *mddev)
|
||||
if (IS_ERR(conf))
|
||||
return PTR_ERR(conf);
|
||||
|
||||
if (mddev->queue)
|
||||
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (!mddev->gendisk)
|
||||
continue;
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
if (!mddev_is_dm(mddev)) {
|
||||
ret = raid1_set_limits(mddev);
|
||||
if (ret)
|
||||
goto abort;
|
||||
}
|
||||
|
||||
mddev->degraded = 0;
|
||||
|
@ -71,6 +71,7 @@ struct r1conf {
|
||||
* allow for replacements.
|
||||
*/
|
||||
int raid_disks;
|
||||
int nonrot_disks;
|
||||
|
||||
spinlock_t device_lock;
|
||||
|
||||
|
@ -76,9 +76,6 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
|
||||
static void end_reshape_write(struct bio *bio);
|
||||
static void end_reshape(struct r10conf *conf);
|
||||
|
||||
#define raid10_log(md, fmt, args...) \
|
||||
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0)
|
||||
|
||||
#include "raid1-10.c"
|
||||
|
||||
#define NULL_CMD
|
||||
@ -518,11 +515,7 @@ static void raid10_end_write_request(struct bio *bio)
|
||||
* The 'master' represents the composite IO operation to
|
||||
* user-side. So if something waits for IO, then it will
|
||||
* wait for the 'master' bio.
|
||||
*/
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
/*
|
||||
*
|
||||
* Do not set R10BIO_Uptodate if the current device is
|
||||
* rebuilding or Faulty. This is because we cannot use
|
||||
* such device for properly reading the data back (we could
|
||||
@ -535,10 +528,9 @@ static void raid10_end_write_request(struct bio *bio)
|
||||
set_bit(R10BIO_Uptodate, &r10_bio->state);
|
||||
|
||||
/* Maybe we can clear some bad blocks. */
|
||||
if (is_badblock(rdev,
|
||||
r10_bio->devs[slot].addr,
|
||||
r10_bio->sectors,
|
||||
&first_bad, &bad_sectors) && !discard_error) {
|
||||
if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr,
|
||||
r10_bio->sectors) &&
|
||||
!discard_error) {
|
||||
bio_put(bio);
|
||||
if (repl)
|
||||
r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;
|
||||
@ -753,17 +745,8 @@ static struct md_rdev *read_balance(struct r10conf *conf,
|
||||
best_good_sectors = 0;
|
||||
do_balance = 1;
|
||||
clear_bit(R10BIO_FailFast, &r10_bio->state);
|
||||
/*
|
||||
* Check if we can balance. We can balance on the whole
|
||||
* device if no resync is going on (recovery is ok), or below
|
||||
* the resync window. We take the first readable disk when
|
||||
* above the resync window.
|
||||
*/
|
||||
if ((conf->mddev->recovery_cp < MaxSector
|
||||
&& (this_sector + sectors >= conf->next_resync)) ||
|
||||
(mddev_is_clustered(conf->mddev) &&
|
||||
md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
|
||||
this_sector + sectors)))
|
||||
|
||||
if (raid1_should_read_first(conf->mddev, this_sector, sectors))
|
||||
do_balance = 0;
|
||||
|
||||
for (slot = 0; slot < conf->copies ; slot++) {
|
||||
@ -1033,7 +1016,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
|
||||
ret = false;
|
||||
} else {
|
||||
conf->nr_waiting++;
|
||||
raid10_log(conf->mddev, "wait barrier");
|
||||
mddev_add_trace_msg(conf->mddev, "raid10 wait barrier");
|
||||
wait_event_barrier(conf, stop_waiting_barrier(conf));
|
||||
conf->nr_waiting--;
|
||||
}
|
||||
@ -1152,7 +1135,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
|
||||
bio_wouldblock_error(bio);
|
||||
return false;
|
||||
}
|
||||
raid10_log(conf->mddev, "wait reshape");
|
||||
mddev_add_trace_msg(conf->mddev, "raid10 wait reshape");
|
||||
wait_event(conf->wait_barrier,
|
||||
conf->reshape_progress <= bio->bi_iter.bi_sector ||
|
||||
conf->reshape_progress >= bio->bi_iter.bi_sector +
|
||||
@ -1249,10 +1232,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
test_bit(R10BIO_FailFast, &r10_bio->state))
|
||||
read_bio->bi_opf |= MD_FAILFAST;
|
||||
read_bio->bi_private = r10_bio;
|
||||
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk),
|
||||
r10_bio->sector);
|
||||
mddev_trace_remap(mddev, read_bio, r10_bio->sector);
|
||||
submit_bio_noacct(read_bio);
|
||||
return;
|
||||
}
|
||||
@ -1288,10 +1268,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
||||
&& enough(conf, devnum))
|
||||
mbio->bi_opf |= MD_FAILFAST;
|
||||
mbio->bi_private = r10_bio;
|
||||
|
||||
if (conf->mddev->gendisk)
|
||||
trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk),
|
||||
r10_bio->sector);
|
||||
mddev_trace_remap(mddev, mbio, r10_bio->sector);
|
||||
/* flush_pending_writes() needs access to the rdev so...*/
|
||||
mbio->bi_bdev = (void *)rdev;
|
||||
|
||||
@ -1330,10 +1307,7 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
}
|
||||
|
||||
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
sector_t first_bad;
|
||||
sector_t dev_sector = r10_bio->devs[i].addr;
|
||||
int bad_sectors;
|
||||
int is_bad;
|
||||
|
||||
/*
|
||||
* Discard request doesn't care the write result
|
||||
@ -1342,9 +1316,8 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
if (!r10_bio->sectors)
|
||||
continue;
|
||||
|
||||
is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors,
|
||||
&first_bad, &bad_sectors);
|
||||
if (is_bad < 0) {
|
||||
if (rdev_has_badblock(rdev, dev_sector,
|
||||
r10_bio->sectors) < 0) {
|
||||
/*
|
||||
* Mustn't write here until the bad block
|
||||
* is acknowledged
|
||||
@ -1360,8 +1333,9 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
if (unlikely(blocked_rdev)) {
|
||||
/* Have to wait for this device to get unblocked, then retry */
|
||||
allow_barrier(conf);
|
||||
raid10_log(conf->mddev, "%s wait rdev %d blocked",
|
||||
__func__, blocked_rdev->raid_disk);
|
||||
mddev_add_trace_msg(conf->mddev,
|
||||
"raid10 %s wait rdev %d blocked",
|
||||
__func__, blocked_rdev->raid_disk);
|
||||
md_wait_for_blocked_rdev(blocked_rdev, mddev);
|
||||
wait_barrier(conf, false);
|
||||
goto retry_wait;
|
||||
@ -1416,7 +1390,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
bio_wouldblock_error(bio);
|
||||
return;
|
||||
}
|
||||
raid10_log(conf->mddev, "wait reshape metadata");
|
||||
mddev_add_trace_msg(conf->mddev,
|
||||
"raid10 wait reshape metadata");
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
|
||||
|
||||
@ -2131,10 +2106,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (mddev->gendisk)
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
|
||||
err = mddev_stack_new_rdev(mddev, rdev);
|
||||
if (err)
|
||||
return err;
|
||||
p->head_position = 0;
|
||||
p->recovery_disabled = mddev->recovery_disabled - 1;
|
||||
rdev->raid_disk = mirror;
|
||||
@ -2150,10 +2124,9 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
rdev->raid_disk = repl_slot;
|
||||
err = 0;
|
||||
if (mddev->gendisk)
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
err = mddev_stack_new_rdev(mddev, rdev);
|
||||
if (err)
|
||||
return err;
|
||||
conf->fullsync = 1;
|
||||
WRITE_ONCE(p->replacement, rdev);
|
||||
}
|
||||
@ -2290,8 +2263,6 @@ static void end_sync_write(struct bio *bio)
|
||||
struct mddev *mddev = r10_bio->mddev;
|
||||
struct r10conf *conf = mddev->private;
|
||||
int d;
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
int slot;
|
||||
int repl;
|
||||
struct md_rdev *rdev = NULL;
|
||||
@ -2312,11 +2283,10 @@ static void end_sync_write(struct bio *bio)
|
||||
&rdev->mddev->recovery);
|
||||
set_bit(R10BIO_WriteError, &r10_bio->state);
|
||||
}
|
||||
} else if (is_badblock(rdev,
|
||||
r10_bio->devs[slot].addr,
|
||||
r10_bio->sectors,
|
||||
&first_bad, &bad_sectors))
|
||||
} else if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr,
|
||||
r10_bio->sectors)) {
|
||||
set_bit(R10BIO_MadeGood, &r10_bio->state);
|
||||
}
|
||||
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
|
||||
@ -2597,11 +2567,8 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
|
||||
int sectors, struct page *page, enum req_op op)
|
||||
{
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
|
||||
&& (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags)))
|
||||
if (rdev_has_badblock(rdev, sector, sectors) &&
|
||||
(op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags)))
|
||||
return -1;
|
||||
if (sync_page_io(rdev, sector, sectors << 9, page, op, false))
|
||||
/* success */
|
||||
@ -2658,16 +2625,14 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
s = PAGE_SIZE >> 9;
|
||||
|
||||
do {
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
d = r10_bio->devs[sl].devnum;
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (rdev &&
|
||||
test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags) &&
|
||||
is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
|
||||
&first_bad, &bad_sectors) == 0) {
|
||||
rdev_has_badblock(rdev,
|
||||
r10_bio->devs[sl].addr + sect,
|
||||
s) == 0) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
success = sync_page_io(rdev,
|
||||
r10_bio->devs[sl].addr +
|
||||
@ -4002,14 +3967,26 @@ static struct r10conf *setup_conf(struct mddev *mddev)
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void raid10_set_io_opt(struct r10conf *conf)
|
||||
static unsigned int raid10_nr_stripes(struct r10conf *conf)
|
||||
{
|
||||
int raid_disks = conf->geo.raid_disks;
|
||||
unsigned int raid_disks = conf->geo.raid_disks;
|
||||
|
||||
if (!(conf->geo.raid_disks % conf->geo.near_copies))
|
||||
raid_disks /= conf->geo.near_copies;
|
||||
blk_queue_io_opt(conf->mddev->queue, (conf->mddev->chunk_sectors << 9) *
|
||||
raid_disks);
|
||||
if (conf->geo.raid_disks % conf->geo.near_copies)
|
||||
return raid_disks;
|
||||
return raid_disks / conf->geo.near_copies;
|
||||
}
|
||||
|
||||
static int raid10_set_queue_limits(struct mddev *mddev)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct queue_limits lim;
|
||||
|
||||
blk_set_stacking_limits(&lim);
|
||||
lim.max_write_zeroes_sectors = 0;
|
||||
lim.io_min = mddev->chunk_sectors << 9;
|
||||
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
|
||||
mddev_stack_rdev_limits(mddev, &lim);
|
||||
return queue_limits_set(mddev->gendisk->queue, &lim);
|
||||
}
|
||||
|
||||
static int raid10_run(struct mddev *mddev)
|
||||
@ -4021,6 +3998,7 @@ static int raid10_run(struct mddev *mddev)
|
||||
sector_t size;
|
||||
sector_t min_offset_diff = 0;
|
||||
int first = 1;
|
||||
int ret = -EIO;
|
||||
|
||||
if (mddev->private == NULL) {
|
||||
conf = setup_conf(mddev);
|
||||
@ -4047,12 +4025,6 @@ static int raid10_run(struct mddev *mddev)
|
||||
}
|
||||
}
|
||||
|
||||
if (mddev->queue) {
|
||||
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
|
||||
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
|
||||
raid10_set_io_opt(conf);
|
||||
}
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
long long diff;
|
||||
|
||||
@ -4081,14 +4053,16 @@ static int raid10_run(struct mddev *mddev)
|
||||
if (first || diff < min_offset_diff)
|
||||
min_offset_diff = diff;
|
||||
|
||||
if (mddev->gendisk)
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
|
||||
disk->head_position = 0;
|
||||
first = 0;
|
||||
}
|
||||
|
||||
if (!mddev_is_dm(conf->mddev)) {
|
||||
ret = raid10_set_queue_limits(mddev);
|
||||
if (ret)
|
||||
goto out_free_conf;
|
||||
}
|
||||
|
||||
/* need to check that every block has at least one working mirror */
|
||||
if (!enough(conf, -1)) {
|
||||
pr_err("md/raid10:%s: not enough operational mirrors.\n",
|
||||
@ -4185,7 +4159,7 @@ static int raid10_run(struct mddev *mddev)
|
||||
raid10_free_conf(conf);
|
||||
mddev->private = NULL;
|
||||
out:
|
||||
return -EIO;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void raid10_free(struct mddev *mddev, void *priv)
|
||||
@ -4954,8 +4928,7 @@ static void end_reshape(struct r10conf *conf)
|
||||
conf->reshape_safe = MaxSector;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
if (conf->mddev->queue)
|
||||
raid10_set_io_opt(conf);
|
||||
mddev_update_io_opt(conf->mddev, raid10_nr_stripes(conf));
|
||||
conf->fullsync = 0;
|
||||
}
|
||||
|
||||
|
@ -1393,7 +1393,8 @@ int ppl_init_log(struct r5conf *conf)
|
||||
ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
|
||||
ppl_conf->block_size = 512;
|
||||
} else {
|
||||
ppl_conf->block_size = queue_logical_block_size(mddev->queue);
|
||||
ppl_conf->block_size =
|
||||
queue_logical_block_size(mddev->gendisk->queue);
|
||||
}
|
||||
|
||||
for (i = 0; i < ppl_conf->count; i++) {
|
||||
|
@ -36,6 +36,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/raid/pq.h>
|
||||
#include <linux/async_tx.h>
|
||||
@ -760,6 +761,7 @@ enum stripe_result {
|
||||
STRIPE_RETRY,
|
||||
STRIPE_SCHEDULE_AND_RETRY,
|
||||
STRIPE_FAIL,
|
||||
STRIPE_WAIT_RESHAPE,
|
||||
};
|
||||
|
||||
struct stripe_request_ctx {
|
||||
@ -1210,10 +1212,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
*/
|
||||
while (op_is_write(op) && rdev &&
|
||||
test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
|
||||
&first_bad, &bad_sectors);
|
||||
int bad = rdev_has_badblock(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf));
|
||||
if (!bad)
|
||||
break;
|
||||
|
||||
@ -1295,10 +1295,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
if (rrdev)
|
||||
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
|
||||
|
||||
if (conf->mddev->gendisk)
|
||||
trace_block_bio_remap(bi,
|
||||
disk_devt(conf->mddev->gendisk),
|
||||
sh->dev[i].sector);
|
||||
mddev_trace_remap(conf->mddev, bi, sh->dev[i].sector);
|
||||
if (should_defer && op_is_write(op))
|
||||
bio_list_add(&pending_bios, bi);
|
||||
else
|
||||
@ -1342,10 +1339,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
*/
|
||||
if (op == REQ_OP_DISCARD)
|
||||
rbi->bi_vcnt = 0;
|
||||
if (conf->mddev->gendisk)
|
||||
trace_block_bio_remap(rbi,
|
||||
disk_devt(conf->mddev->gendisk),
|
||||
sh->dev[i].sector);
|
||||
mddev_trace_remap(conf->mddev, rbi, sh->dev[i].sector);
|
||||
if (should_defer && op_is_write(op))
|
||||
bio_list_add(&pending_bios, rbi);
|
||||
else
|
||||
@ -2412,7 +2406,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
|
||||
atomic_inc(&conf->active_stripes);
|
||||
|
||||
raid5_release_stripe(sh);
|
||||
conf->max_nr_stripes++;
|
||||
WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes + 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -2422,12 +2416,12 @@ static int grow_stripes(struct r5conf *conf, int num)
|
||||
size_t namelen = sizeof(conf->cache_name[0]);
|
||||
int devs = max(conf->raid_disks, conf->previous_raid_disks);
|
||||
|
||||
if (conf->mddev->gendisk)
|
||||
snprintf(conf->cache_name[0], namelen,
|
||||
"raid%d-%s", conf->level, mdname(conf->mddev));
|
||||
else
|
||||
if (mddev_is_dm(conf->mddev))
|
||||
snprintf(conf->cache_name[0], namelen,
|
||||
"raid%d-%p", conf->level, conf->mddev);
|
||||
else
|
||||
snprintf(conf->cache_name[0], namelen,
|
||||
"raid%d-%s", conf->level, mdname(conf->mddev));
|
||||
snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
|
||||
|
||||
conf->active_name = 0;
|
||||
@ -2707,7 +2701,7 @@ static int drop_one_stripe(struct r5conf *conf)
|
||||
shrink_buffers(sh);
|
||||
free_stripe(conf->slab_cache, sh);
|
||||
atomic_dec(&conf->active_stripes);
|
||||
conf->max_nr_stripes--;
|
||||
WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes - 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -2855,8 +2849,6 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
struct r5conf *conf = sh->raid_conf;
|
||||
int disks = sh->disks, i;
|
||||
struct md_rdev *rdev;
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
int replacement = 0;
|
||||
|
||||
for (i = 0 ; i < disks; i++) {
|
||||
@ -2888,9 +2880,8 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
if (replacement) {
|
||||
if (bi->bi_status)
|
||||
md_error(conf->mddev, rdev);
|
||||
else if (is_badblock(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf),
|
||||
&first_bad, &bad_sectors))
|
||||
else if (rdev_has_badblock(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf)))
|
||||
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
|
||||
} else {
|
||||
if (bi->bi_status) {
|
||||
@ -2900,9 +2891,8 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
if (!test_and_set_bit(WantReplacement, &rdev->flags))
|
||||
set_bit(MD_RECOVERY_NEEDED,
|
||||
&rdev->mddev->recovery);
|
||||
} else if (is_badblock(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf),
|
||||
&first_bad, &bad_sectors)) {
|
||||
} else if (rdev_has_badblock(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf))) {
|
||||
set_bit(R5_MadeGood, &sh->dev[i].flags);
|
||||
if (test_bit(R5_ReadError, &sh->dev[i].flags))
|
||||
/* That was a successful write so make
|
||||
@ -4205,10 +4195,9 @@ static int handle_stripe_dirtying(struct r5conf *conf,
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) {
|
||||
/* prefer read-modify-write, but need to get some data */
|
||||
if (conf->mddev->queue)
|
||||
blk_add_trace_msg(conf->mddev->queue,
|
||||
"raid5 rmw %llu %d",
|
||||
(unsigned long long)sh->sector, rmw);
|
||||
mddev_add_trace_msg(conf->mddev, "raid5 rmw %llu %d",
|
||||
sh->sector, rmw);
|
||||
|
||||
for (i = disks; i--; ) {
|
||||
struct r5dev *dev = &sh->dev[i];
|
||||
if (test_bit(R5_InJournal, &dev->flags) &&
|
||||
@ -4285,10 +4274,11 @@ static int handle_stripe_dirtying(struct r5conf *conf,
|
||||
set_bit(STRIPE_DELAYED, &sh->state);
|
||||
}
|
||||
}
|
||||
if (rcw && conf->mddev->queue)
|
||||
blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
|
||||
(unsigned long long)sh->sector,
|
||||
rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
|
||||
if (rcw && !mddev_is_dm(conf->mddev))
|
||||
blk_add_trace_msg(conf->mddev->gendisk->queue,
|
||||
"raid5 rcw %llu %d %d %d",
|
||||
(unsigned long long)sh->sector, rcw, qread,
|
||||
test_bit(STRIPE_DELAYED, &sh->state));
|
||||
}
|
||||
|
||||
if (rcw > disks && rmw > disks &&
|
||||
@ -4674,8 +4664,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
/* Now to look around and see what can be done */
|
||||
for (i=disks; i--; ) {
|
||||
struct md_rdev *rdev;
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
int is_bad = 0;
|
||||
|
||||
dev = &sh->dev[i];
|
||||
@ -4719,8 +4707,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
rdev = conf->disks[i].replacement;
|
||||
if (rdev && !test_bit(Faulty, &rdev->flags) &&
|
||||
rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
|
||||
!is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
|
||||
&first_bad, &bad_sectors))
|
||||
!rdev_has_badblock(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf)))
|
||||
set_bit(R5_ReadRepl, &dev->flags);
|
||||
else {
|
||||
if (rdev && !test_bit(Faulty, &rdev->flags))
|
||||
@ -4733,8 +4721,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
if (rdev && test_bit(Faulty, &rdev->flags))
|
||||
rdev = NULL;
|
||||
if (rdev) {
|
||||
is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
|
||||
&first_bad, &bad_sectors);
|
||||
is_bad = rdev_has_badblock(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf));
|
||||
if (s->blocked_rdev == NULL
|
||||
&& (test_bit(Blocked, &rdev->flags)
|
||||
|| is_bad < 0)) {
|
||||
@ -5463,8 +5451,8 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct bio *align_bio;
|
||||
struct md_rdev *rdev;
|
||||
sector_t sector, end_sector, first_bad;
|
||||
int bad_sectors, dd_idx;
|
||||
sector_t sector, end_sector;
|
||||
int dd_idx;
|
||||
bool did_inc;
|
||||
|
||||
if (!in_chunk_boundary(mddev, raid_bio)) {
|
||||
@ -5493,8 +5481,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
|
||||
if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
|
||||
&bad_sectors)) {
|
||||
if (rdev_has_badblock(rdev, sector, bio_sectors(raid_bio))) {
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
return 0;
|
||||
}
|
||||
@ -5530,9 +5517,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
}
|
||||
|
||||
if (mddev->gendisk)
|
||||
trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk),
|
||||
raid_bio->bi_iter.bi_sector);
|
||||
mddev_trace_remap(mddev, align_bio, raid_bio->bi_iter.bi_sector);
|
||||
submit_bio_noacct(align_bio);
|
||||
return 1;
|
||||
}
|
||||
@ -5701,8 +5686,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
|
||||
}
|
||||
release_inactive_stripe_list(conf, cb->temp_inactive_list,
|
||||
NR_STRIPE_HASH_LOCKS);
|
||||
if (mddev->queue)
|
||||
trace_block_unplug(mddev->queue, cnt, !from_schedule);
|
||||
if (!mddev_is_dm(mddev))
|
||||
trace_block_unplug(mddev->gendisk->queue, cnt, !from_schedule);
|
||||
kfree(cb);
|
||||
}
|
||||
|
||||
@ -5946,7 +5931,8 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
if (ahead_of_reshape(mddev, logical_sector,
|
||||
conf->reshape_safe)) {
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
return STRIPE_SCHEDULE_AND_RETRY;
|
||||
ret = STRIPE_SCHEDULE_AND_RETRY;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
@ -6025,6 +6011,12 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
|
||||
out_release:
|
||||
raid5_release_stripe(sh);
|
||||
out:
|
||||
if (ret == STRIPE_SCHEDULE_AND_RETRY && reshape_interrupted(mddev)) {
|
||||
bi->bi_status = BLK_STS_RESOURCE;
|
||||
ret = STRIPE_WAIT_RESHAPE;
|
||||
pr_err_ratelimited("dm-raid456: io across reshape position while reshape can't make progress");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -6146,7 +6138,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
||||
while (1) {
|
||||
res = make_stripe_request(mddev, conf, &ctx, logical_sector,
|
||||
bi);
|
||||
if (res == STRIPE_FAIL)
|
||||
if (res == STRIPE_FAIL || res == STRIPE_WAIT_RESHAPE)
|
||||
break;
|
||||
|
||||
if (res == STRIPE_RETRY)
|
||||
@ -6184,6 +6176,11 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
||||
|
||||
if (rw == WRITE)
|
||||
md_write_end(mddev);
|
||||
if (res == STRIPE_WAIT_RESHAPE) {
|
||||
md_free_cloned_bio(bi);
|
||||
return false;
|
||||
}
|
||||
|
||||
bio_endio(bi);
|
||||
return true;
|
||||
}
|
||||
@ -6773,7 +6770,18 @@ static void raid5d(struct md_thread *thread)
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
md_check_recovery(mddev);
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
|
||||
/*
|
||||
* Waiting on MD_SB_CHANGE_PENDING below may deadlock
|
||||
* seeing md_check_recovery() is needed to clear
|
||||
* the flag when using mdmon.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
wait_event_lock_irq(mddev->sb_wait,
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
|
||||
conf->device_lock);
|
||||
}
|
||||
pr_debug("%d stripes handled\n", handled);
|
||||
|
||||
@ -6820,7 +6828,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
|
||||
if (size <= 16 || size > 32768)
|
||||
return -EINVAL;
|
||||
|
||||
conf->min_nr_stripes = size;
|
||||
WRITE_ONCE(conf->min_nr_stripes, size);
|
||||
mutex_lock(&conf->cache_size_mutex);
|
||||
while (size < conf->max_nr_stripes &&
|
||||
drop_one_stripe(conf))
|
||||
@ -6832,7 +6840,7 @@ raid5_set_cache_size(struct mddev *mddev, int size)
|
||||
mutex_lock(&conf->cache_size_mutex);
|
||||
while (size > conf->max_nr_stripes)
|
||||
if (!grow_one_stripe(conf, GFP_KERNEL)) {
|
||||
conf->min_nr_stripes = conf->max_nr_stripes;
|
||||
WRITE_ONCE(conf->min_nr_stripes, conf->max_nr_stripes);
|
||||
result = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
@ -6967,10 +6975,8 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len)
|
||||
pr_debug("md/raid: change stripe_size from %lu to %lu\n",
|
||||
conf->stripe_size, new);
|
||||
|
||||
if (mddev->sync_thread ||
|
||||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
mddev->reshape_position != MaxSector ||
|
||||
mddev->sysfs_active) {
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
mddev->reshape_position != MaxSector || mddev->sysfs_active) {
|
||||
err = -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -7084,7 +7090,7 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
|
||||
if (!conf)
|
||||
err = -ENODEV;
|
||||
else if (new != conf->skip_copy) {
|
||||
struct request_queue *q = mddev->queue;
|
||||
struct request_queue *q = mddev->gendisk->queue;
|
||||
|
||||
conf->skip_copy = new;
|
||||
if (new)
|
||||
@ -7390,11 +7396,13 @@ static unsigned long raid5_cache_count(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct r5conf *conf = shrink->private_data;
|
||||
int max_stripes = READ_ONCE(conf->max_nr_stripes);
|
||||
int min_stripes = READ_ONCE(conf->min_nr_stripes);
|
||||
|
||||
if (conf->max_nr_stripes < conf->min_nr_stripes)
|
||||
if (max_stripes < min_stripes)
|
||||
/* unlikely, but not impossible */
|
||||
return 0;
|
||||
return conf->max_nr_stripes - conf->min_nr_stripes;
|
||||
return max_stripes - min_stripes;
|
||||
}
|
||||
|
||||
static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
@ -7684,10 +7692,65 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void raid5_set_io_opt(struct r5conf *conf)
|
||||
static int raid5_set_limits(struct mddev *mddev)
|
||||
{
|
||||
blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) *
|
||||
(conf->raid_disks - conf->max_degraded));
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct queue_limits lim;
|
||||
int data_disks, stripe;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
/*
|
||||
* The read-ahead size must cover two whole stripes, which is
|
||||
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices.
|
||||
*/
|
||||
data_disks = conf->previous_raid_disks - conf->max_degraded;
|
||||
|
||||
/*
|
||||
* We can only discard a whole stripe. It doesn't make sense to
|
||||
* discard data disk but write parity disk
|
||||
*/
|
||||
stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));
|
||||
|
||||
blk_set_stacking_limits(&lim);
|
||||
lim.io_min = mddev->chunk_sectors << 9;
|
||||
lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
|
||||
lim.raid_partial_stripes_expensive = 1;
|
||||
lim.discard_granularity = stripe;
|
||||
lim.max_write_zeroes_sectors = 0;
|
||||
mddev_stack_rdev_limits(mddev, &lim);
|
||||
rdev_for_each(rdev, mddev)
|
||||
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset,
|
||||
mddev->gendisk->disk_name);
|
||||
|
||||
/*
|
||||
* Zeroing is required for discard, otherwise data could be lost.
|
||||
*
|
||||
* Consider a scenario: discard a stripe (the stripe could be
|
||||
* inconsistent if discard_zeroes_data is 0); write one disk of the
|
||||
* stripe (the stripe could be inconsistent again depending on which
|
||||
* disks are used to calculate parity); the disk is broken; The stripe
|
||||
* data of this disk is lost.
|
||||
*
|
||||
* We only allow DISCARD if the sysadmin has confirmed that only safe
|
||||
* devices are in use by setting a module parameter. A better idea
|
||||
* might be to turn DISCARD into WRITE_ZEROES requests, as that is
|
||||
* required to be safe.
|
||||
*/
|
||||
if (!devices_handle_discard_safely ||
|
||||
lim.max_discard_sectors < (stripe >> 9) ||
|
||||
lim.discard_granularity < stripe)
|
||||
lim.max_hw_discard_sectors = 0;
|
||||
|
||||
/*
|
||||
* Requests require having a bitmap for each stripe.
|
||||
* Limit the max sectors based on this.
|
||||
*/
|
||||
lim.max_hw_sectors = RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf);
|
||||
|
||||
/* No restrictions on the number of segments in the request */
|
||||
lim.max_segments = USHRT_MAX;
|
||||
|
||||
return queue_limits_set(mddev->gendisk->queue, &lim);
|
||||
}
|
||||
|
||||
static int raid5_run(struct mddev *mddev)
|
||||
@ -7700,6 +7763,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
int i;
|
||||
long long min_offset_diff = 0;
|
||||
int first = 1;
|
||||
int ret = -EIO;
|
||||
|
||||
if (mddev->recovery_cp != MaxSector)
|
||||
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
|
||||
@ -7948,66 +8012,10 @@ static int raid5_run(struct mddev *mddev)
|
||||
mdname(mddev));
|
||||
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
|
||||
|
||||
if (mddev->queue) {
|
||||
int chunk_size;
|
||||
/* read-ahead size must cover two whole stripes, which
|
||||
* is 2 * (datadisks) * chunksize where 'n' is the
|
||||
* number of raid devices
|
||||
*/
|
||||
int data_disks = conf->previous_raid_disks - conf->max_degraded;
|
||||
int stripe = data_disks *
|
||||
((mddev->chunk_sectors << 9) / PAGE_SIZE);
|
||||
|
||||
chunk_size = mddev->chunk_sectors << 9;
|
||||
blk_queue_io_min(mddev->queue, chunk_size);
|
||||
raid5_set_io_opt(conf);
|
||||
mddev->queue->limits.raid_partial_stripes_expensive = 1;
|
||||
/*
|
||||
* We can only discard a whole stripe. It doesn't make sense to
|
||||
* discard data disk but write parity disk
|
||||
*/
|
||||
stripe = stripe * PAGE_SIZE;
|
||||
stripe = roundup_pow_of_two(stripe);
|
||||
mddev->queue->limits.discard_granularity = stripe;
|
||||
|
||||
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->new_data_offset << 9);
|
||||
}
|
||||
|
||||
/*
|
||||
* zeroing is required, otherwise data
|
||||
* could be lost. Consider a scenario: discard a stripe
|
||||
* (the stripe could be inconsistent if
|
||||
* discard_zeroes_data is 0); write one disk of the
|
||||
* stripe (the stripe could be inconsistent again
|
||||
* depending on which disks are used to calculate
|
||||
* parity); the disk is broken; The stripe data of this
|
||||
* disk is lost.
|
||||
*
|
||||
* We only allow DISCARD if the sysadmin has confirmed that
|
||||
* only safe devices are in use by setting a module parameter.
|
||||
* A better idea might be to turn DISCARD into WRITE_ZEROES
|
||||
* requests, as that is required to be safe.
|
||||
*/
|
||||
if (!devices_handle_discard_safely ||
|
||||
mddev->queue->limits.max_discard_sectors < (stripe >> 9) ||
|
||||
mddev->queue->limits.discard_granularity < stripe)
|
||||
blk_queue_max_discard_sectors(mddev->queue, 0);
|
||||
|
||||
/*
|
||||
* Requests require having a bitmap for each stripe.
|
||||
* Limit the max sectors based on this.
|
||||
*/
|
||||
blk_queue_max_hw_sectors(mddev->queue,
|
||||
RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf));
|
||||
|
||||
/* No restrictions on the number of segments in the request */
|
||||
blk_queue_max_segments(mddev->queue, USHRT_MAX);
|
||||
if (!mddev_is_dm(mddev)) {
|
||||
ret = raid5_set_limits(mddev);
|
||||
if (ret)
|
||||
goto abort;
|
||||
}
|
||||
|
||||
if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
|
||||
@ -8020,7 +8028,7 @@ static int raid5_run(struct mddev *mddev)
|
||||
free_conf(conf);
|
||||
mddev->private = NULL;
|
||||
pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
|
||||
return -EIO;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void raid5_free(struct mddev *mddev, void *priv)
|
||||
@ -8531,8 +8539,8 @@ static void end_reshape(struct r5conf *conf)
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
wake_up(&conf->wait_for_overlap);
|
||||
|
||||
if (conf->mddev->queue)
|
||||
raid5_set_io_opt(conf);
|
||||
mddev_update_io_opt(conf->mddev,
|
||||
conf->raid_disks - conf->max_degraded);
|
||||
}
|
||||
}
|
||||
|
||||
@ -8909,6 +8917,18 @@ static int raid5_start(struct mddev *mddev)
|
||||
return r5l_start(conf->log);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is only used for dm-raid456, caller already frozen sync_thread, hence
|
||||
* if rehsape is still in progress, io that is waiting for reshape can never be
|
||||
* done now, hence wake up and handle those IO.
|
||||
*/
|
||||
static void raid5_prepare_suspend(struct mddev *mddev)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
|
||||
wake_up(&conf->wait_for_overlap);
|
||||
}
|
||||
|
||||
static struct md_personality raid6_personality =
|
||||
{
|
||||
.name = "raid6",
|
||||
@ -8932,6 +8952,7 @@ static struct md_personality raid6_personality =
|
||||
.quiesce = raid5_quiesce,
|
||||
.takeover = raid6_takeover,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
.prepare_suspend = raid5_prepare_suspend,
|
||||
};
|
||||
static struct md_personality raid5_personality =
|
||||
{
|
||||
@ -8956,6 +8977,7 @@ static struct md_personality raid5_personality =
|
||||
.quiesce = raid5_quiesce,
|
||||
.takeover = raid5_takeover,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
.prepare_suspend = raid5_prepare_suspend,
|
||||
};
|
||||
|
||||
static struct md_personality raid4_personality =
|
||||
@ -8981,6 +9003,7 @@ static struct md_personality raid4_personality =
|
||||
.quiesce = raid5_quiesce,
|
||||
.takeover = raid4_takeover,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
.prepare_suspend = raid5_prepare_suspend,
|
||||
};
|
||||
|
||||
static int __init raid5_init(void)
|
||||
|
@ -2078,6 +2078,12 @@ static const struct blk_mq_ops msb_mq_ops = {
|
||||
static int msb_init_disk(struct memstick_dev *card)
|
||||
{
|
||||
struct msb_data *msb = memstick_get_drvdata(card);
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = msb->page_size,
|
||||
.max_hw_sectors = MS_BLOCK_MAX_PAGES,
|
||||
.max_segments = MS_BLOCK_MAX_SEGS,
|
||||
.max_segment_size = MS_BLOCK_MAX_PAGES * msb->page_size,
|
||||
};
|
||||
int rc;
|
||||
unsigned long capacity;
|
||||
|
||||
@ -2093,19 +2099,13 @@ static int msb_init_disk(struct memstick_dev *card)
|
||||
if (rc)
|
||||
goto out_release_id;
|
||||
|
||||
msb->disk = blk_mq_alloc_disk(&msb->tag_set, card);
|
||||
msb->disk = blk_mq_alloc_disk(&msb->tag_set, &lim, card);
|
||||
if (IS_ERR(msb->disk)) {
|
||||
rc = PTR_ERR(msb->disk);
|
||||
goto out_free_tag_set;
|
||||
}
|
||||
msb->queue = msb->disk->queue;
|
||||
|
||||
blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES);
|
||||
blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS);
|
||||
blk_queue_max_segment_size(msb->queue,
|
||||
MS_BLOCK_MAX_PAGES * msb->page_size);
|
||||
blk_queue_logical_block_size(msb->queue, msb->page_size);
|
||||
|
||||
sprintf(msb->disk->disk_name, "msblk%d", msb->disk_id);
|
||||
msb->disk->fops = &msb_bdops;
|
||||
msb->disk->private_data = msb;
|
||||
|
@ -1103,6 +1103,12 @@ static const struct blk_mq_ops mspro_mq_ops = {
|
||||
static int mspro_block_init_disk(struct memstick_dev *card)
|
||||
{
|
||||
struct mspro_block_data *msb = memstick_get_drvdata(card);
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = msb->page_size,
|
||||
.max_hw_sectors = MSPRO_BLOCK_MAX_PAGES,
|
||||
.max_segments = MSPRO_BLOCK_MAX_SEGS,
|
||||
.max_segment_size = MSPRO_BLOCK_MAX_PAGES * msb->page_size,
|
||||
};
|
||||
struct mspro_devinfo *dev_info = NULL;
|
||||
struct mspro_sys_info *sys_info = NULL;
|
||||
struct mspro_sys_attr *s_attr = NULL;
|
||||
@ -1138,18 +1144,13 @@ static int mspro_block_init_disk(struct memstick_dev *card)
|
||||
if (rc)
|
||||
goto out_release_id;
|
||||
|
||||
msb->disk = blk_mq_alloc_disk(&msb->tag_set, card);
|
||||
msb->disk = blk_mq_alloc_disk(&msb->tag_set, &lim, card);
|
||||
if (IS_ERR(msb->disk)) {
|
||||
rc = PTR_ERR(msb->disk);
|
||||
goto out_free_tag_set;
|
||||
}
|
||||
msb->queue = msb->disk->queue;
|
||||
|
||||
blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
|
||||
blk_queue_max_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
|
||||
blk_queue_max_segment_size(msb->queue,
|
||||
MSPRO_BLOCK_MAX_PAGES * msb->page_size);
|
||||
|
||||
msb->disk->major = major;
|
||||
msb->disk->first_minor = disk_id << MSPRO_BLOCK_PART_SHIFT;
|
||||
msb->disk->minors = 1 << MSPRO_BLOCK_PART_SHIFT;
|
||||
@ -1158,8 +1159,6 @@ static int mspro_block_init_disk(struct memstick_dev *card)
|
||||
|
||||
sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
|
||||
|
||||
blk_queue_logical_block_size(msb->queue, msb->page_size);
|
||||
|
||||
capacity = be16_to_cpu(sys_info->user_block_count);
|
||||
capacity *= be16_to_cpu(sys_info->block_size);
|
||||
capacity *= msb->page_size >> 9;
|
||||
|
@ -174,8 +174,8 @@ static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp)
|
||||
return sg;
|
||||
}
|
||||
|
||||
static void mmc_queue_setup_discard(struct request_queue *q,
|
||||
struct mmc_card *card)
|
||||
static void mmc_queue_setup_discard(struct mmc_card *card,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
unsigned max_discard;
|
||||
|
||||
@ -183,15 +183,17 @@ static void mmc_queue_setup_discard(struct request_queue *q,
|
||||
if (!max_discard)
|
||||
return;
|
||||
|
||||
blk_queue_max_discard_sectors(q, max_discard);
|
||||
q->limits.discard_granularity = card->pref_erase << 9;
|
||||
lim->max_hw_discard_sectors = max_discard;
|
||||
if (mmc_can_secure_erase_trim(card))
|
||||
lim->max_secure_erase_sectors = max_discard;
|
||||
if (mmc_can_trim(card) && card->erased_byte == 0)
|
||||
lim->max_write_zeroes_sectors = max_discard;
|
||||
|
||||
/* granularity must not be greater than max. discard */
|
||||
if (card->pref_erase > max_discard)
|
||||
q->limits.discard_granularity = SECTOR_SIZE;
|
||||
if (mmc_can_secure_erase_trim(card))
|
||||
blk_queue_max_secure_erase_sectors(q, max_discard);
|
||||
if (mmc_can_trim(card) && card->erased_byte == 0)
|
||||
blk_queue_max_write_zeroes_sectors(q, max_discard);
|
||||
lim->discard_granularity = SECTOR_SIZE;
|
||||
else
|
||||
lim->discard_granularity = card->pref_erase << 9;
|
||||
}
|
||||
|
||||
static unsigned short mmc_get_max_segments(struct mmc_host *host)
|
||||
@ -341,40 +343,53 @@ static const struct blk_mq_ops mmc_mq_ops = {
|
||||
.timeout = mmc_mq_timed_out,
|
||||
};
|
||||
|
||||
static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
|
||||
static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq,
|
||||
struct mmc_card *card)
|
||||
{
|
||||
struct mmc_host *host = card->host;
|
||||
unsigned block_size = 512;
|
||||
struct queue_limits lim = { };
|
||||
struct gendisk *disk;
|
||||
|
||||
if (mmc_can_erase(card))
|
||||
mmc_queue_setup_discard(card, &lim);
|
||||
|
||||
if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask)
|
||||
lim.bounce = BLK_BOUNCE_HIGH;
|
||||
|
||||
lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512);
|
||||
|
||||
if (mmc_card_mmc(card) && card->ext_csd.data_sector_size)
|
||||
lim.logical_block_size = card->ext_csd.data_sector_size;
|
||||
else
|
||||
lim.logical_block_size = 512;
|
||||
|
||||
WARN_ON_ONCE(lim.logical_block_size != 512 &&
|
||||
lim.logical_block_size != 4096);
|
||||
|
||||
/*
|
||||
* Setting a virt_boundary implicity sets a max_segment_size, so try
|
||||
* to set the hardware one here.
|
||||
*/
|
||||
if (host->can_dma_map_merge) {
|
||||
lim.virt_boundary_mask = dma_get_merge_boundary(mmc_dev(host));
|
||||
lim.max_segments = MMC_DMA_MAP_MERGE_SEGMENTS;
|
||||
} else {
|
||||
lim.max_segment_size =
|
||||
round_down(host->max_seg_size, lim.logical_block_size);
|
||||
lim.max_segments = host->max_segs;
|
||||
}
|
||||
|
||||
disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq);
|
||||
if (IS_ERR(disk))
|
||||
return disk;
|
||||
mq->queue = disk->queue;
|
||||
|
||||
if (mmc_host_is_spi(host) && host->use_spi_crc)
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue);
|
||||
blk_queue_rq_timeout(mq->queue, 60 * HZ);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
|
||||
if (mmc_can_erase(card))
|
||||
mmc_queue_setup_discard(mq->queue, card);
|
||||
|
||||
if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask)
|
||||
blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH);
|
||||
blk_queue_max_hw_sectors(mq->queue,
|
||||
min(host->max_blk_count, host->max_req_size / 512));
|
||||
if (host->can_dma_map_merge)
|
||||
WARN(!blk_queue_can_use_dma_map_merging(mq->queue,
|
||||
mmc_dev(host)),
|
||||
"merging was advertised but not possible");
|
||||
blk_queue_max_segments(mq->queue, mmc_get_max_segments(host));
|
||||
|
||||
if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) {
|
||||
block_size = card->ext_csd.data_sector_size;
|
||||
WARN_ON(block_size != 512 && block_size != 4096);
|
||||
}
|
||||
|
||||
blk_queue_logical_block_size(mq->queue, block_size);
|
||||
/*
|
||||
* After blk_queue_can_use_dma_map_merging() was called with succeed,
|
||||
* since it calls blk_queue_virt_boundary(), the mmc should not call
|
||||
* both blk_queue_max_segment_size().
|
||||
*/
|
||||
if (!host->can_dma_map_merge)
|
||||
blk_queue_max_segment_size(mq->queue,
|
||||
round_down(host->max_seg_size, block_size));
|
||||
|
||||
dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue));
|
||||
|
||||
@ -386,6 +401,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
|
||||
init_waitqueue_head(&mq->wait);
|
||||
|
||||
mmc_crypto_setup_queue(mq->queue, host);
|
||||
return disk;
|
||||
}
|
||||
|
||||
static inline bool mmc_merge_capable(struct mmc_host *host)
|
||||
@ -447,18 +463,9 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
|
||||
disk = blk_mq_alloc_disk(&mq->tag_set, mq);
|
||||
if (IS_ERR(disk)) {
|
||||
disk = mmc_alloc_disk(mq, card);
|
||||
if (IS_ERR(disk))
|
||||
blk_mq_free_tag_set(&mq->tag_set);
|
||||
return disk;
|
||||
}
|
||||
mq->queue = disk->queue;
|
||||
|
||||
if (mmc_host_is_spi(host) && host->use_spi_crc)
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue);
|
||||
blk_queue_rq_timeout(mq->queue, 60 * HZ);
|
||||
|
||||
mmc_setup_queue(mq, card);
|
||||
return disk;
|
||||
}
|
||||
|
||||
|
@ -277,6 +277,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
{
|
||||
struct mtd_blktrans_ops *tr = new->tr;
|
||||
struct mtd_blktrans_dev *d;
|
||||
struct queue_limits lim = { };
|
||||
int last_devnum = -1;
|
||||
struct gendisk *gd;
|
||||
int ret;
|
||||
@ -332,8 +333,12 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
if (ret)
|
||||
goto out_kfree_tag_set;
|
||||
|
||||
lim.logical_block_size = tr->blksize;
|
||||
if (tr->discard)
|
||||
lim.max_hw_discard_sectors = UINT_MAX;
|
||||
|
||||
/* Create gendisk */
|
||||
gd = blk_mq_alloc_disk(new->tag_set, new);
|
||||
gd = blk_mq_alloc_disk(new->tag_set, &lim, new);
|
||||
if (IS_ERR(gd)) {
|
||||
ret = PTR_ERR(gd);
|
||||
goto out_free_tag_set;
|
||||
@ -371,14 +376,9 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
if (tr->flush)
|
||||
blk_queue_write_cache(new->rq, true, false);
|
||||
|
||||
blk_queue_logical_block_size(new->rq, tr->blksize);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
|
||||
|
||||
if (tr->discard)
|
||||
blk_queue_max_discard_sectors(new->rq, UINT_MAX);
|
||||
|
||||
gd->queue = new->rq;
|
||||
|
||||
if (new->readonly)
|
||||
|
@ -348,6 +348,9 @@ static int calc_disk_capacity(struct ubi_volume_info *vi, u64 *disk_capacity)
|
||||
|
||||
int ubiblock_create(struct ubi_volume_info *vi)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.max_segments = UBI_MAX_SG_COUNT,
|
||||
};
|
||||
struct ubiblock *dev;
|
||||
struct gendisk *gd;
|
||||
u64 disk_capacity;
|
||||
@ -393,7 +396,7 @@ int ubiblock_create(struct ubi_volume_info *vi)
|
||||
|
||||
|
||||
/* Initialize the gendisk of this ubiblock device */
|
||||
gd = blk_mq_alloc_disk(&dev->tag_set, dev);
|
||||
gd = blk_mq_alloc_disk(&dev->tag_set, &lim, dev);
|
||||
if (IS_ERR(gd)) {
|
||||
ret = PTR_ERR(gd);
|
||||
goto out_free_tags;
|
||||
@ -416,7 +419,6 @@ int ubiblock_create(struct ubi_volume_info *vi)
|
||||
dev->gd = gd;
|
||||
|
||||
dev->rq = gd->queue;
|
||||
blk_queue_max_segments(dev->rq, UBI_MAX_SG_COUNT);
|
||||
|
||||
list_add_tail(&dev->list, &ubiblock_devices);
|
||||
|
||||
|
@ -1496,19 +1496,21 @@ static int btt_blk_init(struct btt *btt)
|
||||
{
|
||||
struct nd_btt *nd_btt = btt->nd_btt;
|
||||
struct nd_namespace_common *ndns = nd_btt->ndns;
|
||||
int rc = -ENOMEM;
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = btt->sector_size,
|
||||
.max_hw_sectors = UINT_MAX,
|
||||
};
|
||||
int rc;
|
||||
|
||||
btt->btt_disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!btt->btt_disk)
|
||||
return -ENOMEM;
|
||||
btt->btt_disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(btt->btt_disk))
|
||||
return PTR_ERR(btt->btt_disk);
|
||||
|
||||
nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
|
||||
btt->btt_disk->first_minor = 0;
|
||||
btt->btt_disk->fops = &btt_fops;
|
||||
btt->btt_disk->private_data = btt;
|
||||
|
||||
blk_queue_logical_block_size(btt->btt_disk->queue, btt->sector_size);
|
||||
blk_queue_max_hw_sectors(btt->btt_disk->queue, UINT_MAX);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue);
|
||||
|
||||
|
@ -451,6 +451,11 @@ static int pmem_attach_disk(struct device *dev,
|
||||
{
|
||||
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
|
||||
struct nd_region *nd_region = to_nd_region(dev->parent);
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = pmem_sector_size(ndns),
|
||||
.physical_block_size = PAGE_SIZE,
|
||||
.max_hw_sectors = UINT_MAX,
|
||||
};
|
||||
int nid = dev_to_node(dev), fua;
|
||||
struct resource *res = &nsio->res;
|
||||
struct range bb_range;
|
||||
@ -497,9 +502,9 @@ static int pmem_attach_disk(struct device *dev,
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
disk = blk_alloc_disk(nid);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
disk = blk_alloc_disk(&lim, nid);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
q = disk->queue;
|
||||
|
||||
pmem->disk = disk;
|
||||
@ -539,9 +544,6 @@ static int pmem_attach_disk(struct device *dev,
|
||||
pmem->virt_addr = addr;
|
||||
|
||||
blk_queue_write_cache(q, true, fua);
|
||||
blk_queue_physical_block_size(q, PAGE_SIZE);
|
||||
blk_queue_logical_block_size(q, pmem_sector_size(ndns));
|
||||
blk_queue_max_hw_sectors(q, UINT_MAX);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q);
|
||||
if (pmem->pfn_flags & PFN_MAP)
|
||||
|
@ -1516,7 +1516,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
|
||||
goto put_dev;
|
||||
}
|
||||
|
||||
anv->ctrl.admin_q = blk_mq_init_queue(&anv->admin_tagset);
|
||||
anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL);
|
||||
if (IS_ERR(anv->ctrl.admin_q)) {
|
||||
ret = -ENOMEM;
|
||||
goto put_dev;
|
||||
|
@ -114,12 +114,21 @@ static DEFINE_MUTEX(nvme_subsystems_lock);
|
||||
|
||||
static DEFINE_IDA(nvme_instance_ida);
|
||||
static dev_t nvme_ctrl_base_chr_devt;
|
||||
static struct class *nvme_class;
|
||||
static struct class *nvme_subsys_class;
|
||||
static int nvme_class_uevent(const struct device *dev, struct kobj_uevent_env *env);
|
||||
static const struct class nvme_class = {
|
||||
.name = "nvme",
|
||||
.dev_uevent = nvme_class_uevent,
|
||||
};
|
||||
|
||||
static const struct class nvme_subsys_class = {
|
||||
.name = "nvme-subsystem",
|
||||
};
|
||||
|
||||
static DEFINE_IDA(nvme_ns_chr_minor_ida);
|
||||
static dev_t nvme_ns_chr_devt;
|
||||
static struct class *nvme_ns_chr_class;
|
||||
static const struct class nvme_ns_chr_class = {
|
||||
.name = "nvme-generic",
|
||||
};
|
||||
|
||||
static void nvme_put_subsystem(struct nvme_subsystem *subsys);
|
||||
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
|
||||
@ -1398,8 +1407,10 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
|
||||
|
||||
error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
|
||||
sizeof(struct nvme_id_ctrl));
|
||||
if (error)
|
||||
if (error) {
|
||||
kfree(*id);
|
||||
*id = NULL;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -1528,6 +1539,7 @@ int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
if (error) {
|
||||
dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
|
||||
kfree(*id);
|
||||
*id = NULL;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
@ -1727,12 +1739,23 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
static void nvme_init_integrity(struct gendisk *disk,
|
||||
struct nvme_ns_head *head, u32 max_integrity_segments)
|
||||
static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head)
|
||||
{
|
||||
struct blk_integrity integrity = { };
|
||||
|
||||
blk_integrity_unregister(disk);
|
||||
|
||||
if (!head->ms)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* PI can always be supported as we can ask the controller to simply
|
||||
* insert/strip it, which is not possible for other kinds of metadata.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) ||
|
||||
!(head->features & NVME_NS_METADATA_SUPPORTED))
|
||||
return nvme_ns_has_pi(head);
|
||||
|
||||
switch (head->pi_type) {
|
||||
case NVME_NS_DPS_PI_TYPE3:
|
||||
switch (head->guard_type) {
|
||||
@ -1775,53 +1798,32 @@ static void nvme_init_integrity(struct gendisk *disk,
|
||||
}
|
||||
|
||||
integrity.tuple_size = head->ms;
|
||||
integrity.pi_offset = head->pi_offset;
|
||||
blk_integrity_register(disk, &integrity);
|
||||
blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static void nvme_init_integrity(struct gendisk *disk,
|
||||
struct nvme_ns_head *head, u32 max_integrity_segments)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_BLK_DEV_INTEGRITY */
|
||||
|
||||
static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
|
||||
struct nvme_ns_head *head)
|
||||
static void nvme_config_discard(struct nvme_ns *ns, struct queue_limits *lim)
|
||||
{
|
||||
struct request_queue *queue = disk->queue;
|
||||
u32 max_discard_sectors;
|
||||
|
||||
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) {
|
||||
max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl);
|
||||
} else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
|
||||
max_discard_sectors = UINT_MAX;
|
||||
} else {
|
||||
blk_queue_max_discard_sectors(queue, 0);
|
||||
return;
|
||||
}
|
||||
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||
|
||||
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
|
||||
NVME_DSM_MAX_RANGES);
|
||||
|
||||
/*
|
||||
* If discard is already enabled, don't reset queue limits.
|
||||
*
|
||||
* This works around the fact that the block layer can't cope well with
|
||||
* updating the hardware limits when overridden through sysfs. This is
|
||||
* harmless because discard limits in NVMe are purely advisory.
|
||||
*/
|
||||
if (queue->limits.max_discard_sectors)
|
||||
return;
|
||||
|
||||
blk_queue_max_discard_sectors(queue, max_discard_sectors);
|
||||
if (ctrl->dmrl)
|
||||
blk_queue_max_discard_segments(queue, ctrl->dmrl);
|
||||
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX))
|
||||
lim->max_hw_discard_sectors =
|
||||
nvme_lba_to_sect(ns->head, ctrl->dmrsl);
|
||||
else if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
|
||||
lim->max_hw_discard_sectors = UINT_MAX;
|
||||
else
|
||||
blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
|
||||
queue->limits.discard_granularity = queue_logical_block_size(queue);
|
||||
lim->max_hw_discard_sectors = 0;
|
||||
|
||||
if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
|
||||
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
|
||||
lim->discard_granularity = lim->logical_block_size;
|
||||
|
||||
if (ctrl->dmrl)
|
||||
lim->max_discard_segments = ctrl->dmrl;
|
||||
else
|
||||
lim->max_discard_segments = NVME_DSM_MAX_RANGES;
|
||||
}
|
||||
|
||||
static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
|
||||
@ -1832,42 +1834,38 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
|
||||
a->csi == b->csi;
|
||||
}
|
||||
|
||||
static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
|
||||
struct nvme_id_ns *id)
|
||||
static int nvme_identify_ns_nvm(struct nvme_ctrl *ctrl, unsigned int nsid,
|
||||
struct nvme_id_ns_nvm **nvmp)
|
||||
{
|
||||
bool first = id->dps & NVME_NS_DPS_PI_FIRST;
|
||||
unsigned lbaf = nvme_lbaf_index(id->flbas);
|
||||
struct nvme_command c = { };
|
||||
struct nvme_command c = {
|
||||
.identify.opcode = nvme_admin_identify,
|
||||
.identify.nsid = cpu_to_le32(nsid),
|
||||
.identify.cns = NVME_ID_CNS_CS_NS,
|
||||
.identify.csi = NVME_CSI_NVM,
|
||||
};
|
||||
struct nvme_id_ns_nvm *nvm;
|
||||
int ret = 0;
|
||||
u32 elbaf;
|
||||
|
||||
head->pi_size = 0;
|
||||
head->ms = le16_to_cpu(id->lbaf[lbaf].ms);
|
||||
if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
|
||||
head->pi_size = sizeof(struct t10_pi_tuple);
|
||||
head->guard_type = NVME_NVM_NS_16B_GUARD;
|
||||
goto set_pi;
|
||||
}
|
||||
int ret;
|
||||
|
||||
nvm = kzalloc(sizeof(*nvm), GFP_KERNEL);
|
||||
if (!nvm)
|
||||
return -ENOMEM;
|
||||
|
||||
c.identify.opcode = nvme_admin_identify;
|
||||
c.identify.nsid = cpu_to_le32(head->ns_id);
|
||||
c.identify.cns = NVME_ID_CNS_CS_NS;
|
||||
c.identify.csi = NVME_CSI_NVM;
|
||||
|
||||
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm));
|
||||
if (ret)
|
||||
goto free_data;
|
||||
kfree(nvm);
|
||||
else
|
||||
*nvmp = nvm;
|
||||
return ret;
|
||||
}
|
||||
|
||||
elbaf = le32_to_cpu(nvm->elbaf[lbaf]);
|
||||
static void nvme_configure_pi_elbas(struct nvme_ns_head *head,
|
||||
struct nvme_id_ns *id, struct nvme_id_ns_nvm *nvm)
|
||||
{
|
||||
u32 elbaf = le32_to_cpu(nvm->elbaf[nvme_lbaf_index(id->flbas)]);
|
||||
|
||||
/* no support for storage tag formats right now */
|
||||
if (nvme_elbaf_sts(elbaf))
|
||||
goto free_data;
|
||||
return;
|
||||
|
||||
head->guard_type = nvme_elbaf_guard_type(elbaf);
|
||||
switch (head->guard_type) {
|
||||
@ -1880,30 +1878,31 @@ static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
free_data:
|
||||
kfree(nvm);
|
||||
set_pi:
|
||||
if (head->pi_size && (first || head->ms == head->pi_size))
|
||||
head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
|
||||
else
|
||||
head->pi_type = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ns_head *head, struct nvme_id_ns *id)
|
||||
static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
|
||||
struct nvme_ns_head *head, struct nvme_id_ns *id,
|
||||
struct nvme_id_ns_nvm *nvm)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = nvme_init_ms(ctrl, head, id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
|
||||
head->pi_type = 0;
|
||||
head->pi_size = 0;
|
||||
head->pi_offset = 0;
|
||||
head->ms = le16_to_cpu(id->lbaf[nvme_lbaf_index(id->flbas)].ms);
|
||||
if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
|
||||
return 0;
|
||||
return;
|
||||
|
||||
if (nvm && (ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
|
||||
nvme_configure_pi_elbas(head, id, nvm);
|
||||
} else {
|
||||
head->pi_size = sizeof(struct t10_pi_tuple);
|
||||
head->guard_type = NVME_NVM_NS_16B_GUARD;
|
||||
}
|
||||
|
||||
if (head->pi_size && head->ms >= head->pi_size)
|
||||
head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
|
||||
if (!(id->dps & NVME_NS_DPS_PI_FIRST))
|
||||
head->pi_offset = head->ms - head->pi_size;
|
||||
|
||||
if (ctrl->ops->flags & NVME_F_FABRICS) {
|
||||
/*
|
||||
@ -1912,7 +1911,7 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
|
||||
* remap the separate metadata buffer from the block layer.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
|
||||
return 0;
|
||||
return;
|
||||
|
||||
head->features |= NVME_NS_EXT_LBAS;
|
||||
|
||||
@ -1939,33 +1938,32 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
|
||||
else
|
||||
head->features |= NVME_NS_METADATA_SUPPORTED;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
|
||||
struct request_queue *q)
|
||||
static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
bool vwc = ctrl->vwc & NVME_CTRL_VWC_PRESENT;
|
||||
|
||||
if (ctrl->max_hw_sectors) {
|
||||
u32 max_segments =
|
||||
(ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1;
|
||||
|
||||
max_segments = min_not_zero(max_segments, ctrl->max_segments);
|
||||
blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
|
||||
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
|
||||
}
|
||||
blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
|
||||
blk_queue_dma_alignment(q, 3);
|
||||
blk_queue_write_cache(q, vwc, vwc);
|
||||
return ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> SECTOR_SHIFT) + 1;
|
||||
}
|
||||
|
||||
static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
|
||||
struct nvme_ns_head *head, struct nvme_id_ns *id)
|
||||
static void nvme_set_ctrl_limits(struct nvme_ctrl *ctrl,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
sector_t capacity = nvme_lba_to_sect(head, le64_to_cpu(id->nsze));
|
||||
lim->max_hw_sectors = ctrl->max_hw_sectors;
|
||||
lim->max_segments = min_t(u32, USHRT_MAX,
|
||||
min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments));
|
||||
lim->max_integrity_segments = ctrl->max_integrity_segments;
|
||||
lim->virt_boundary_mask = NVME_CTRL_PAGE_SIZE - 1;
|
||||
lim->max_segment_size = UINT_MAX;
|
||||
lim->dma_alignment = 3;
|
||||
}
|
||||
|
||||
static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
struct nvme_ns_head *head = ns->head;
|
||||
u32 bs = 1U << head->lba_shift;
|
||||
u32 atomic_bs, phys_bs, io_opt = 0;
|
||||
bool valid = true;
|
||||
|
||||
/*
|
||||
* The block layer can't support LBA sizes larger than the page size
|
||||
@ -1973,12 +1971,10 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
|
||||
* allow block I/O.
|
||||
*/
|
||||
if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) {
|
||||
capacity = 0;
|
||||
bs = (1 << 9);
|
||||
valid = false;
|
||||
}
|
||||
|
||||
blk_integrity_unregister(disk);
|
||||
|
||||
atomic_bs = phys_bs = bs;
|
||||
if (id->nabo == 0) {
|
||||
/*
|
||||
@ -1989,7 +1985,7 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
|
||||
if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
|
||||
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
|
||||
else
|
||||
atomic_bs = (1 + ctrl->subsys->awupf) * bs;
|
||||
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
|
||||
}
|
||||
|
||||
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
|
||||
@ -1999,36 +1995,20 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
|
||||
io_opt = bs * (1 + le16_to_cpu(id->nows));
|
||||
}
|
||||
|
||||
blk_queue_logical_block_size(disk->queue, bs);
|
||||
/*
|
||||
* Linux filesystems assume writing a single physical block is
|
||||
* an atomic operation. Hence limit the physical block size to the
|
||||
* value of the Atomic Write Unit Power Fail parameter.
|
||||
*/
|
||||
blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
|
||||
blk_queue_io_min(disk->queue, phys_bs);
|
||||
blk_queue_io_opt(disk->queue, io_opt);
|
||||
|
||||
/*
|
||||
* Register a metadata profile for PI, or the plain non-integrity NVMe
|
||||
* metadata masquerading as Type 0 if supported, otherwise reject block
|
||||
* I/O to namespaces with metadata except when the namespace supports
|
||||
* PI, as it can strip/insert in that case.
|
||||
*/
|
||||
if (head->ms) {
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
|
||||
(head->features & NVME_NS_METADATA_SUPPORTED))
|
||||
nvme_init_integrity(disk, head,
|
||||
ctrl->max_integrity_segments);
|
||||
else if (!nvme_ns_has_pi(head))
|
||||
capacity = 0;
|
||||
}
|
||||
|
||||
set_capacity_and_notify(disk, capacity);
|
||||
|
||||
nvme_config_discard(ctrl, disk, head);
|
||||
blk_queue_max_write_zeroes_sectors(disk->queue,
|
||||
ctrl->max_zeroes_sectors);
|
||||
lim->logical_block_size = bs;
|
||||
lim->physical_block_size = min(phys_bs, atomic_bs);
|
||||
lim->io_min = phys_bs;
|
||||
lim->io_opt = io_opt;
|
||||
if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
|
||||
lim->max_write_zeroes_sectors = UINT_MAX;
|
||||
else
|
||||
lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors;
|
||||
return valid;
|
||||
}
|
||||
|
||||
static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||
@ -2042,7 +2022,8 @@ static inline bool nvme_first_scan(struct gendisk *disk)
|
||||
return !disk_live(disk);
|
||||
}
|
||||
|
||||
static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
|
||||
static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||
u32 iob;
|
||||
@ -2070,38 +2051,36 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
|
||||
return;
|
||||
}
|
||||
|
||||
blk_queue_chunk_sectors(ns->queue, iob);
|
||||
lim->chunk_sectors = iob;
|
||||
}
|
||||
|
||||
static int nvme_update_ns_info_generic(struct nvme_ns *ns,
|
||||
struct nvme_ns_info *info)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
int ret;
|
||||
|
||||
blk_mq_freeze_queue(ns->disk->queue);
|
||||
nvme_set_queue_limits(ns->ctrl, ns->queue);
|
||||
lim = queue_limits_start_update(ns->disk->queue);
|
||||
nvme_set_ctrl_limits(ns->ctrl, &lim);
|
||||
ret = queue_limits_commit_update(ns->disk->queue, &lim);
|
||||
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
|
||||
if (nvme_ns_head_multipath(ns->head)) {
|
||||
blk_mq_freeze_queue(ns->head->disk->queue);
|
||||
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
|
||||
nvme_mpath_revalidate_paths(ns);
|
||||
blk_stack_limits(&ns->head->disk->queue->limits,
|
||||
&ns->queue->limits, 0);
|
||||
ns->head->disk->flags |= GENHD_FL_HIDDEN;
|
||||
blk_mq_unfreeze_queue(ns->head->disk->queue);
|
||||
}
|
||||
|
||||
/* Hide the block-interface for these devices */
|
||||
ns->disk->flags |= GENHD_FL_HIDDEN;
|
||||
set_bit(NVME_NS_READY, &ns->flags);
|
||||
|
||||
return 0;
|
||||
if (!ret)
|
||||
ret = -ENODEV;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvme_update_ns_info_block(struct nvme_ns *ns,
|
||||
struct nvme_ns_info *info)
|
||||
{
|
||||
bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT;
|
||||
struct queue_limits lim;
|
||||
struct nvme_id_ns_nvm *nvm = NULL;
|
||||
struct nvme_id_ns *id;
|
||||
sector_t capacity;
|
||||
unsigned lbaf;
|
||||
int ret;
|
||||
|
||||
@ -2113,30 +2092,52 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
|
||||
/* namespace not allocated or attached */
|
||||
info->is_removed = true;
|
||||
ret = -ENODEV;
|
||||
goto error;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) {
|
||||
ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
blk_mq_freeze_queue(ns->disk->queue);
|
||||
lbaf = nvme_lbaf_index(id->flbas);
|
||||
ns->head->lba_shift = id->lbaf[lbaf].ds;
|
||||
ns->head->nuse = le64_to_cpu(id->nuse);
|
||||
nvme_set_queue_limits(ns->ctrl, ns->queue);
|
||||
capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
|
||||
|
||||
ret = nvme_configure_metadata(ns->ctrl, ns->head, id);
|
||||
if (ret < 0) {
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
goto out;
|
||||
}
|
||||
nvme_set_chunk_sectors(ns, id);
|
||||
nvme_update_disk_info(ns->ctrl, ns->disk, ns->head, id);
|
||||
|
||||
if (ns->head->ids.csi == NVME_CSI_ZNS) {
|
||||
ret = nvme_update_zone_info(ns, lbaf);
|
||||
lim = queue_limits_start_update(ns->disk->queue);
|
||||
nvme_set_ctrl_limits(ns->ctrl, &lim);
|
||||
nvme_configure_metadata(ns->ctrl, ns->head, id, nvm);
|
||||
nvme_set_chunk_sectors(ns, id, &lim);
|
||||
if (!nvme_update_disk_info(ns, id, &lim))
|
||||
capacity = 0;
|
||||
nvme_config_discard(ns, &lim);
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
|
||||
ns->head->ids.csi == NVME_CSI_ZNS) {
|
||||
ret = nvme_update_zone_info(ns, lbaf, &lim);
|
||||
if (ret) {
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = queue_limits_commit_update(ns->disk->queue, &lim);
|
||||
if (ret) {
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Register a metadata profile for PI, or the plain non-integrity NVMe
|
||||
* metadata masquerading as Type 0 if supported, otherwise reject block
|
||||
* I/O to namespaces with metadata except when the namespace supports
|
||||
* PI, as it can strip/insert in that case.
|
||||
*/
|
||||
if (!nvme_init_integrity(ns->disk, ns->head))
|
||||
capacity = 0;
|
||||
|
||||
set_capacity_and_notify(ns->disk, capacity);
|
||||
|
||||
/*
|
||||
* Only set the DEAC bit if the device guarantees that reads from
|
||||
@ -2147,28 +2148,50 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
|
||||
if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
|
||||
ns->head->features |= NVME_NS_DEAC;
|
||||
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
|
||||
blk_queue_write_cache(ns->disk->queue, vwc, vwc);
|
||||
set_bit(NVME_NS_READY, &ns->flags);
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
|
||||
if (blk_queue_is_zoned(ns->queue)) {
|
||||
ret = nvme_revalidate_zones(ns);
|
||||
ret = blk_revalidate_disk_zones(ns->disk, NULL);
|
||||
if (ret && !nvme_first_scan(ns->disk))
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (nvme_ns_head_multipath(ns->head)) {
|
||||
blk_mq_freeze_queue(ns->head->disk->queue);
|
||||
nvme_update_disk_info(ns->ctrl, ns->head->disk, ns->head, id);
|
||||
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
|
||||
nvme_mpath_revalidate_paths(ns);
|
||||
blk_stack_limits(&ns->head->disk->queue->limits,
|
||||
&ns->queue->limits, 0);
|
||||
disk_update_readahead(ns->head->disk);
|
||||
blk_mq_unfreeze_queue(ns->head->disk->queue);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
kfree(nvm);
|
||||
kfree(id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||
{
|
||||
bool unsupported = false;
|
||||
int ret;
|
||||
|
||||
switch (info->ids.csi) {
|
||||
case NVME_CSI_ZNS:
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||
dev_info(ns->ctrl->device,
|
||||
"block device for nsid %u not supported without CONFIG_BLK_DEV_ZONED\n",
|
||||
info->nsid);
|
||||
ret = nvme_update_ns_info_generic(ns, info);
|
||||
break;
|
||||
}
|
||||
ret = nvme_update_ns_info_block(ns, info);
|
||||
break;
|
||||
case NVME_CSI_NVM:
|
||||
ret = nvme_update_ns_info_block(ns, info);
|
||||
break;
|
||||
default:
|
||||
dev_info(ns->ctrl->device,
|
||||
"block device for nsid %u not supported (csi %u)\n",
|
||||
info->nsid, info->ids.csi);
|
||||
ret = nvme_update_ns_info_generic(ns, info);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If probing fails due an unsupported feature, hide the block device,
|
||||
* but still allow other access.
|
||||
@ -2176,33 +2199,30 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
|
||||
if (ret == -ENODEV) {
|
||||
ns->disk->flags |= GENHD_FL_HIDDEN;
|
||||
set_bit(NVME_NS_READY, &ns->flags);
|
||||
unsupported = true;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
error:
|
||||
kfree(id);
|
||||
return ret;
|
||||
}
|
||||
if (!ret && nvme_ns_head_multipath(ns->head)) {
|
||||
struct queue_limits lim;
|
||||
|
||||
static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
|
||||
{
|
||||
switch (info->ids.csi) {
|
||||
case NVME_CSI_ZNS:
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
|
||||
dev_info(ns->ctrl->device,
|
||||
"block device for nsid %u not supported without CONFIG_BLK_DEV_ZONED\n",
|
||||
info->nsid);
|
||||
return nvme_update_ns_info_generic(ns, info);
|
||||
}
|
||||
return nvme_update_ns_info_block(ns, info);
|
||||
case NVME_CSI_NVM:
|
||||
return nvme_update_ns_info_block(ns, info);
|
||||
default:
|
||||
dev_info(ns->ctrl->device,
|
||||
"block device for nsid %u not supported (csi %u)\n",
|
||||
info->nsid, info->ids.csi);
|
||||
return nvme_update_ns_info_generic(ns, info);
|
||||
blk_mq_freeze_queue(ns->head->disk->queue);
|
||||
if (unsupported)
|
||||
ns->head->disk->flags |= GENHD_FL_HIDDEN;
|
||||
else
|
||||
nvme_init_integrity(ns->head->disk, ns->head);
|
||||
set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
|
||||
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
|
||||
nvme_mpath_revalidate_paths(ns);
|
||||
|
||||
lim = queue_limits_start_update(ns->head->disk->queue);
|
||||
queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
|
||||
ns->head->disk->disk_name);
|
||||
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
|
||||
blk_mq_unfreeze_queue(ns->head->disk->queue);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_SED_OPAL
|
||||
@ -2877,7 +2897,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
|
||||
subsys->awupf = le16_to_cpu(id->awupf);
|
||||
nvme_mpath_default_iopolicy(subsys);
|
||||
|
||||
subsys->dev.class = nvme_subsys_class;
|
||||
subsys->dev.class = &nvme_subsys_class;
|
||||
subsys->dev.release = nvme_release_subsystem;
|
||||
subsys->dev.groups = nvme_subsys_attrs_groups;
|
||||
dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance);
|
||||
@ -3117,11 +3137,17 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!ctrl->maxcmd) {
|
||||
dev_err(ctrl->device, "Maximum outstanding commands is 0\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
struct nvme_id_ctrl *id;
|
||||
u32 max_hw_sectors;
|
||||
bool prev_apst_enabled;
|
||||
@ -3188,7 +3214,12 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
ctrl->max_hw_sectors =
|
||||
min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
|
||||
|
||||
nvme_set_queue_limits(ctrl, ctrl->admin_q);
|
||||
lim = queue_limits_start_update(ctrl->admin_q);
|
||||
nvme_set_ctrl_limits(ctrl, &lim);
|
||||
ret = queue_limits_commit_update(ctrl->admin_q, &lim);
|
||||
if (ret)
|
||||
goto out_free;
|
||||
|
||||
ctrl->sgls = le32_to_cpu(id->sgls);
|
||||
ctrl->kas = le16_to_cpu(id->kas);
|
||||
ctrl->max_namespaces = le32_to_cpu(id->mnan);
|
||||
@ -3420,7 +3451,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
|
||||
if (minor < 0)
|
||||
return minor;
|
||||
cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
|
||||
cdev_device->class = nvme_ns_chr_class;
|
||||
cdev_device->class = &nvme_ns_chr_class;
|
||||
cdev_device->release = nvme_cdev_rel;
|
||||
device_initialize(cdev_device);
|
||||
cdev_init(cdev, fops);
|
||||
@ -3692,7 +3723,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
|
||||
if (!ns)
|
||||
return;
|
||||
|
||||
disk = blk_mq_alloc_disk(ctrl->tagset, ns);
|
||||
disk = blk_mq_alloc_disk(ctrl->tagset, NULL, ns);
|
||||
if (IS_ERR(disk))
|
||||
goto out_free_ns;
|
||||
disk->fops = &nvme_bdev_ops;
|
||||
@ -4353,6 +4384,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
|
||||
int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
|
||||
const struct blk_mq_ops *ops, unsigned int cmd_size)
|
||||
{
|
||||
struct queue_limits lim = {};
|
||||
int ret;
|
||||
|
||||
memset(set, 0, sizeof(*set));
|
||||
@ -4372,14 +4404,14 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ctrl->admin_q = blk_mq_init_queue(set);
|
||||
ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL);
|
||||
if (IS_ERR(ctrl->admin_q)) {
|
||||
ret = PTR_ERR(ctrl->admin_q);
|
||||
goto out_free_tagset;
|
||||
}
|
||||
|
||||
if (ctrl->ops->flags & NVME_F_FABRICS) {
|
||||
ctrl->fabrics_q = blk_mq_init_queue(set);
|
||||
ctrl->fabrics_q = blk_mq_alloc_queue(set, NULL, NULL);
|
||||
if (IS_ERR(ctrl->fabrics_q)) {
|
||||
ret = PTR_ERR(ctrl->fabrics_q);
|
||||
goto out_cleanup_admin_q;
|
||||
@ -4443,7 +4475,7 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
|
||||
return ret;
|
||||
|
||||
if (ctrl->ops->flags & NVME_F_FABRICS) {
|
||||
ctrl->connect_q = blk_mq_init_queue(set);
|
||||
ctrl->connect_q = blk_mq_alloc_queue(set, NULL, NULL);
|
||||
if (IS_ERR(ctrl->connect_q)) {
|
||||
ret = PTR_ERR(ctrl->connect_q);
|
||||
goto out_free_tag_set;
|
||||
@ -4613,7 +4645,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
|
||||
ctrl->device = &ctrl->ctrl_device;
|
||||
ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
|
||||
ctrl->instance);
|
||||
ctrl->device->class = nvme_class;
|
||||
ctrl->device->class = &nvme_class;
|
||||
ctrl->device->parent = ctrl->dev;
|
||||
if (ops->dev_attr_groups)
|
||||
ctrl->device->groups = ops->dev_attr_groups;
|
||||
@ -4846,42 +4878,36 @@ static int __init nvme_core_init(void)
|
||||
if (result < 0)
|
||||
goto destroy_delete_wq;
|
||||
|
||||
nvme_class = class_create("nvme");
|
||||
if (IS_ERR(nvme_class)) {
|
||||
result = PTR_ERR(nvme_class);
|
||||
result = class_register(&nvme_class);
|
||||
if (result)
|
||||
goto unregister_chrdev;
|
||||
}
|
||||
nvme_class->dev_uevent = nvme_class_uevent;
|
||||
|
||||
nvme_subsys_class = class_create("nvme-subsystem");
|
||||
if (IS_ERR(nvme_subsys_class)) {
|
||||
result = PTR_ERR(nvme_subsys_class);
|
||||
result = class_register(&nvme_subsys_class);
|
||||
if (result)
|
||||
goto destroy_class;
|
||||
}
|
||||
|
||||
result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS,
|
||||
"nvme-generic");
|
||||
if (result < 0)
|
||||
goto destroy_subsys_class;
|
||||
|
||||
nvme_ns_chr_class = class_create("nvme-generic");
|
||||
if (IS_ERR(nvme_ns_chr_class)) {
|
||||
result = PTR_ERR(nvme_ns_chr_class);
|
||||
result = class_register(&nvme_ns_chr_class);
|
||||
if (result)
|
||||
goto unregister_generic_ns;
|
||||
}
|
||||
|
||||
result = nvme_init_auth();
|
||||
if (result)
|
||||
goto destroy_ns_chr;
|
||||
return 0;
|
||||
|
||||
destroy_ns_chr:
|
||||
class_destroy(nvme_ns_chr_class);
|
||||
class_unregister(&nvme_ns_chr_class);
|
||||
unregister_generic_ns:
|
||||
unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
|
||||
destroy_subsys_class:
|
||||
class_destroy(nvme_subsys_class);
|
||||
class_unregister(&nvme_subsys_class);
|
||||
destroy_class:
|
||||
class_destroy(nvme_class);
|
||||
class_unregister(&nvme_class);
|
||||
unregister_chrdev:
|
||||
unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
|
||||
destroy_delete_wq:
|
||||
@ -4897,9 +4923,9 @@ static int __init nvme_core_init(void)
|
||||
static void __exit nvme_core_exit(void)
|
||||
{
|
||||
nvme_exit_auth();
|
||||
class_destroy(nvme_ns_chr_class);
|
||||
class_destroy(nvme_subsys_class);
|
||||
class_destroy(nvme_class);
|
||||
class_unregister(&nvme_ns_chr_class);
|
||||
class_unregister(&nvme_subsys_class);
|
||||
class_unregister(&nvme_class);
|
||||
unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
|
||||
unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
|
||||
destroy_workqueue(nvme_delete_wq);
|
||||
|
@ -638,7 +638,7 @@ static struct key *nvmf_parse_key(int key_id)
|
||||
}
|
||||
|
||||
key = key_lookup(key_id);
|
||||
if (!IS_ERR(key))
|
||||
if (IS_ERR(key))
|
||||
pr_err("key id %08x not found\n", key_id);
|
||||
else
|
||||
pr_debug("Using key id %08x\n", key_id);
|
||||
@ -1319,7 +1319,10 @@ nvmf_create_ctrl(struct device *dev, const char *buf)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static struct class *nvmf_class;
|
||||
static const struct class nvmf_class = {
|
||||
.name = "nvme-fabrics",
|
||||
};
|
||||
|
||||
static struct device *nvmf_device;
|
||||
static DEFINE_MUTEX(nvmf_dev_mutex);
|
||||
|
||||
@ -1439,15 +1442,14 @@ static int __init nvmf_init(void)
|
||||
if (!nvmf_default_host)
|
||||
return -ENOMEM;
|
||||
|
||||
nvmf_class = class_create("nvme-fabrics");
|
||||
if (IS_ERR(nvmf_class)) {
|
||||
ret = class_register(&nvmf_class);
|
||||
if (ret) {
|
||||
pr_err("couldn't register class nvme-fabrics\n");
|
||||
ret = PTR_ERR(nvmf_class);
|
||||
goto out_free_host;
|
||||
}
|
||||
|
||||
nvmf_device =
|
||||
device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
|
||||
device_create(&nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
|
||||
if (IS_ERR(nvmf_device)) {
|
||||
pr_err("couldn't create nvme-fabrics device!\n");
|
||||
ret = PTR_ERR(nvmf_device);
|
||||
@ -1463,9 +1465,9 @@ static int __init nvmf_init(void)
|
||||
return 0;
|
||||
|
||||
out_destroy_device:
|
||||
device_destroy(nvmf_class, MKDEV(0, 0));
|
||||
device_destroy(&nvmf_class, MKDEV(0, 0));
|
||||
out_destroy_class:
|
||||
class_destroy(nvmf_class);
|
||||
class_unregister(&nvmf_class);
|
||||
out_free_host:
|
||||
nvmf_host_put(nvmf_default_host);
|
||||
return ret;
|
||||
@ -1474,8 +1476,8 @@ static int __init nvmf_init(void)
|
||||
static void __exit nvmf_exit(void)
|
||||
{
|
||||
misc_deregister(&nvmf_misc);
|
||||
device_destroy(nvmf_class, MKDEV(0, 0));
|
||||
class_destroy(nvmf_class);
|
||||
device_destroy(&nvmf_class, MKDEV(0, 0));
|
||||
class_unregister(&nvmf_class);
|
||||
nvmf_host_put(nvmf_default_host);
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64);
|
||||
|
@ -516,6 +516,7 @@ static void nvme_requeue_work(struct work_struct *work)
|
||||
|
||||
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
bool vwc = false;
|
||||
|
||||
mutex_init(&head->lock);
|
||||
@ -532,9 +533,14 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
|
||||
!nvme_is_unique_nsid(ctrl, head) || !multipath)
|
||||
return 0;
|
||||
|
||||
head->disk = blk_alloc_disk(ctrl->numa_node);
|
||||
if (!head->disk)
|
||||
return -ENOMEM;
|
||||
blk_set_stacking_limits(&lim);
|
||||
lim.dma_alignment = 3;
|
||||
if (head->ids.csi != NVME_CSI_ZNS)
|
||||
lim.max_zone_append_sectors = 0;
|
||||
|
||||
head->disk = blk_alloc_disk(&lim, ctrl->numa_node);
|
||||
if (IS_ERR(head->disk))
|
||||
return PTR_ERR(head->disk);
|
||||
head->disk->fops = &nvme_ns_head_ops;
|
||||
head->disk->private_data = head;
|
||||
sprintf(head->disk->disk_name, "nvme%dn%d",
|
||||
@ -553,11 +559,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
|
||||
ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues)
|
||||
blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue);
|
||||
|
||||
/* set to a default value of 512 until the disk is validated */
|
||||
blk_queue_logical_block_size(head->disk->queue, 512);
|
||||
blk_set_stacking_limits(&head->disk->queue->limits);
|
||||
blk_queue_dma_alignment(head->disk->queue, 3);
|
||||
|
||||
/* we need to propagate up the VMC settings */
|
||||
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
|
||||
vwc = true;
|
||||
|
@ -464,6 +464,7 @@ struct nvme_ns_head {
|
||||
u16 ms;
|
||||
u16 pi_size;
|
||||
u8 pi_type;
|
||||
u8 pi_offset;
|
||||
u8 guard_type;
|
||||
u16 sgs;
|
||||
u32 sws;
|
||||
@ -1035,11 +1036,11 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
|
||||
}
|
||||
#endif /* CONFIG_NVME_MULTIPATH */
|
||||
|
||||
int nvme_revalidate_zones(struct nvme_ns *ns);
|
||||
int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
|
||||
unsigned int nr_zones, report_zones_cb cb, void *data);
|
||||
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
|
||||
struct queue_limits *lim);
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf);
|
||||
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
|
||||
struct nvme_command *cmnd,
|
||||
enum nvme_zone_mgmt_action action);
|
||||
@ -1050,13 +1051,6 @@ static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
|
||||
{
|
||||
return BLK_STS_NOTSUPP;
|
||||
}
|
||||
|
||||
static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
|
||||
{
|
||||
dev_warn(ns->ctrl->device,
|
||||
"Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n");
|
||||
return -EPROTONOSUPPORT;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
|
||||
|
@ -1006,6 +1006,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
|
||||
{
|
||||
int ret;
|
||||
bool changed;
|
||||
u16 max_queue_size;
|
||||
|
||||
ret = nvme_rdma_configure_admin_queue(ctrl, new);
|
||||
if (ret)
|
||||
@ -1030,11 +1031,16 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
|
||||
ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
|
||||
}
|
||||
|
||||
if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) {
|
||||
if (ctrl->ctrl.max_integrity_segments)
|
||||
max_queue_size = NVME_RDMA_MAX_METADATA_QUEUE_SIZE;
|
||||
else
|
||||
max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE;
|
||||
|
||||
if (ctrl->ctrl.sqsize + 1 > max_queue_size) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"ctrl sqsize %u > max queue size %u, clamping down\n",
|
||||
ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE);
|
||||
ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1;
|
||||
"ctrl sqsize %u > max queue size %u, clamping down\n",
|
||||
ctrl->ctrl.sqsize + 1, max_queue_size);
|
||||
ctrl->ctrl.sqsize = max_queue_size - 1;
|
||||
}
|
||||
|
||||
if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
|
||||
|
@ -221,14 +221,11 @@ static int ns_update_nuse(struct nvme_ns *ns)
|
||||
|
||||
ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id);
|
||||
if (ret)
|
||||
goto out_free_id;
|
||||
return ret;
|
||||
|
||||
ns->head->nuse = le64_to_cpu(id->nuse);
|
||||
|
||||
out_free_id:
|
||||
kfree(id);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t nuse_show(struct device *dev, struct device_attribute *attr,
|
||||
|
@ -7,16 +7,6 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include "nvme.h"
|
||||
|
||||
int nvme_revalidate_zones(struct nvme_ns *ns)
|
||||
{
|
||||
struct request_queue *q = ns->queue;
|
||||
|
||||
blk_queue_chunk_sectors(q, ns->head->zsze);
|
||||
blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
|
||||
|
||||
return blk_revalidate_disk_zones(ns->disk, NULL);
|
||||
}
|
||||
|
||||
static int nvme_set_max_append(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_command c = { };
|
||||
@ -45,10 +35,10 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
|
||||
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
struct nvme_effects_log *log = ns->head->effects;
|
||||
struct request_queue *q = ns->queue;
|
||||
struct nvme_command c = { };
|
||||
struct nvme_id_ns_zns *id;
|
||||
int status;
|
||||
@ -109,10 +99,12 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
|
||||
goto free_data;
|
||||
}
|
||||
|
||||
disk_set_zoned(ns->disk);
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1);
|
||||
disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1);
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
|
||||
lim->zoned = 1;
|
||||
lim->max_open_zones = le32_to_cpu(id->mor) + 1;
|
||||
lim->max_active_zones = le32_to_cpu(id->mar) + 1;
|
||||
lim->chunk_sectors = ns->head->zsze;
|
||||
lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
|
||||
free_data:
|
||||
kfree(id);
|
||||
return status;
|
||||
|
@ -428,7 +428,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
|
||||
id->cqes = (0x4 << 4) | 0x4;
|
||||
|
||||
/* no enforcement soft-limit for maxcmd - pick arbitrary high value */
|
||||
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
|
||||
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl));
|
||||
|
||||
id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES);
|
||||
id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
|
||||
|
@ -273,6 +273,32 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
|
||||
|
||||
CONFIGFS_ATTR(nvmet_, param_inline_data_size);
|
||||
|
||||
static ssize_t nvmet_param_max_queue_size_show(struct config_item *item,
|
||||
char *page)
|
||||
{
|
||||
struct nvmet_port *port = to_nvmet_port(item);
|
||||
|
||||
return snprintf(page, PAGE_SIZE, "%d\n", port->max_queue_size);
|
||||
}
|
||||
|
||||
static ssize_t nvmet_param_max_queue_size_store(struct config_item *item,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct nvmet_port *port = to_nvmet_port(item);
|
||||
int ret;
|
||||
|
||||
if (nvmet_is_port_enabled(port, __func__))
|
||||
return -EACCES;
|
||||
ret = kstrtoint(page, 0, &port->max_queue_size);
|
||||
if (ret) {
|
||||
pr_err("Invalid value '%s' for max_queue_size\n", page);
|
||||
return -EINVAL;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR(nvmet_, param_max_queue_size);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
static ssize_t nvmet_param_pi_enable_show(struct config_item *item,
|
||||
char *page)
|
||||
@ -1859,6 +1885,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
|
||||
&nvmet_attr_addr_trtype,
|
||||
&nvmet_attr_addr_tsas,
|
||||
&nvmet_attr_param_inline_data_size,
|
||||
&nvmet_attr_param_max_queue_size,
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
&nvmet_attr_param_pi_enable,
|
||||
#endif
|
||||
@ -1917,6 +1944,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
|
||||
INIT_LIST_HEAD(&port->subsystems);
|
||||
INIT_LIST_HEAD(&port->referrals);
|
||||
port->inline_data_size = -1; /* < 0 == let the transport choose */
|
||||
port->max_queue_size = -1; /* < 0 == let the transport choose */
|
||||
|
||||
port->disc_addr.portid = cpu_to_le16(portid);
|
||||
port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user