Merge branch 'for-6.5/block-late' into block-6.5

* for-6.5/block-late:
  blk-sysfs: add a new attr_group for blk_mq
  blk-iocost: move wbt_enable/disable_default() out of spinlock
  blk-wbt: cleanup rwb_enabled() and wbt_disabled()
  blk-wbt: remove dead code to handle wbt enable/disable with io inflight
  blk-wbt: don't create wbt sysfs entry if CONFIG_BLK_WBT is disabled
  blk-mq: fix two misuses on RQF_USE_SCHED
  blk-throttle: Fix io statistics for cgroup v1
  bcache: Fix bcache device claiming
  bcache: Alloc holder object before async registration
  raid10: avoid spin_lock from fastpath from raid10_unplug()
  md: fix 'delete_mutex' deadlock
  md: use mddev->external to select holder in export_rdev()
  md/raid1-10: fix casting from randomized structure in raid1_submit_write()
  md/raid10: fix the condition to call bio_end_io_acct()
This commit is contained in:
Jens Axboe 2023-06-28 16:08:19 -06:00
commit 3a08284ff2
14 changed files with 208 additions and 220 deletions

View File

@ -2086,6 +2086,9 @@ void blk_cgroup_bio_start(struct bio *bio)
struct blkg_iostat_set *bis; struct blkg_iostat_set *bis;
unsigned long flags; unsigned long flags;
if (!cgroup_subsys_on_dfl(io_cgrp_subsys))
return;
/* Root-level stats are sourced from system-wide IO stats */ /* Root-level stats are sourced from system-wide IO stats */
if (!cgroup_parent(blkcg->css.cgroup)) if (!cgroup_parent(blkcg->css.cgroup))
return; return;
@ -2116,8 +2119,7 @@ void blk_cgroup_bio_start(struct bio *bio)
} }
u64_stats_update_end_irqrestore(&bis->sync, flags); u64_stats_update_end_irqrestore(&bis->sync, flags);
if (cgroup_subsys_on_dfl(io_cgrp_subsys)) cgroup_rstat_updated(blkcg->css.cgroup, cpu);
cgroup_rstat_updated(blkcg->css.cgroup, cpu);
put_cpu(); put_cpu();
} }

View File

@ -3301,11 +3301,9 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
blk_stat_enable_accounting(disk->queue); blk_stat_enable_accounting(disk->queue);
blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = true; ioc->enabled = true;
wbt_disable_default(disk);
} else { } else {
blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = false; ioc->enabled = false;
wbt_enable_default(disk);
} }
if (user) { if (user) {
@ -3318,6 +3316,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
ioc_refresh_params(ioc, true); ioc_refresh_params(ioc, true);
spin_unlock_irq(&ioc->lock); spin_unlock_irq(&ioc->lock);
if (enable)
wbt_disable_default(disk);
else
wbt_enable_default(disk);
blk_mq_unquiesce_queue(disk->queue); blk_mq_unquiesce_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue); blk_mq_unfreeze_queue(disk->queue);

View File

@ -1280,7 +1280,11 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
if (!plug->multiple_queues && last && last->q != rq->q) if (!plug->multiple_queues && last && last->q != rq->q)
plug->multiple_queues = true; plug->multiple_queues = true;
if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED)) /*
* Any request allocated from sched tags can't be issued to
* ->queue_rqs() directly
*/
if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))
plug->has_elevator = true; plug->has_elevator = true;
rq->rq_next = NULL; rq->rq_next = NULL;
rq_list_add(&plug->mq_list, rq); rq_list_add(&plug->mq_list, rq);

View File

@ -47,19 +47,6 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
return count; return count;
} }
static ssize_t queue_var_store64(s64 *var, const char *page)
{
int err;
s64 v;
err = kstrtos64(page, 10, &v);
if (err < 0)
return err;
*var = v;
return 0;
}
static ssize_t queue_requests_show(struct request_queue *q, char *page) static ssize_t queue_requests_show(struct request_queue *q, char *page)
{ {
return queue_var_show(q->nr_requests, page); return queue_var_show(q->nr_requests, page);
@ -451,61 +438,6 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
return count; return count;
} }
static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
{
if (!wbt_rq_qos(q))
return -EINVAL;
if (wbt_disabled(q))
return sprintf(page, "0\n");
return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
}
static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
size_t count)
{
struct rq_qos *rqos;
ssize_t ret;
s64 val;
ret = queue_var_store64(&val, page);
if (ret < 0)
return ret;
if (val < -1)
return -EINVAL;
rqos = wbt_rq_qos(q);
if (!rqos) {
ret = wbt_init(q->disk);
if (ret)
return ret;
}
if (val == -1)
val = wbt_default_latency_nsec(q);
else if (val >= 0)
val *= 1000ULL;
if (wbt_get_min_lat(q) == val)
return count;
/*
* Ensure that the queue is idled, in case the latency update
* ends up either enabling or disabling wbt completely. We can't
* have IO inflight if that happens.
*/
blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
wbt_set_min_lat(q, val);
blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
return count;
}
static ssize_t queue_wc_show(struct request_queue *q, char *page) static ssize_t queue_wc_show(struct request_queue *q, char *page)
{ {
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
@ -598,7 +530,6 @@ QUEUE_RW_ENTRY(queue_wc, "write_cache");
QUEUE_RO_ENTRY(queue_fua, "fua"); QUEUE_RO_ENTRY(queue_fua, "fua");
QUEUE_RO_ENTRY(queue_dax, "dax"); QUEUE_RO_ENTRY(queue_dax, "dax");
QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask"); QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment"); QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
@ -617,8 +548,79 @@ QUEUE_RW_ENTRY(queue_iostats, "iostats");
QUEUE_RW_ENTRY(queue_random, "add_random"); QUEUE_RW_ENTRY(queue_random, "add_random");
QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
#ifdef CONFIG_BLK_WBT
static ssize_t queue_var_store64(s64 *var, const char *page)
{
int err;
s64 v;
err = kstrtos64(page, 10, &v);
if (err < 0)
return err;
*var = v;
return 0;
}
static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
{
if (!wbt_rq_qos(q))
return -EINVAL;
if (wbt_disabled(q))
return sprintf(page, "0\n");
return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
}
static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
size_t count)
{
struct rq_qos *rqos;
ssize_t ret;
s64 val;
ret = queue_var_store64(&val, page);
if (ret < 0)
return ret;
if (val < -1)
return -EINVAL;
rqos = wbt_rq_qos(q);
if (!rqos) {
ret = wbt_init(q->disk);
if (ret)
return ret;
}
if (val == -1)
val = wbt_default_latency_nsec(q);
else if (val >= 0)
val *= 1000ULL;
if (wbt_get_min_lat(q) == val)
return count;
/*
* Ensure that the queue is idled, in case the latency update
* ends up either enabling or disabling wbt completely. We can't
* have IO inflight if that happens.
*/
blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
wbt_set_min_lat(q, val);
blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
return count;
}
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
#endif
static struct attribute *queue_attrs[] = { static struct attribute *queue_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr, &queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr, &queue_max_hw_sectors_entry.attr,
&queue_max_sectors_entry.attr, &queue_max_sectors_entry.attr,
@ -626,7 +628,6 @@ static struct attribute *queue_attrs[] = {
&queue_max_discard_segments_entry.attr, &queue_max_discard_segments_entry.attr,
&queue_max_integrity_segments_entry.attr, &queue_max_integrity_segments_entry.attr,
&queue_max_segment_size_entry.attr, &queue_max_segment_size_entry.attr,
&elv_iosched_entry.attr,
&queue_hw_sector_size_entry.attr, &queue_hw_sector_size_entry.attr,
&queue_logical_block_size_entry.attr, &queue_logical_block_size_entry.attr,
&queue_physical_block_size_entry.attr, &queue_physical_block_size_entry.attr,
@ -647,7 +648,6 @@ static struct attribute *queue_attrs[] = {
&queue_max_open_zones_entry.attr, &queue_max_open_zones_entry.attr,
&queue_max_active_zones_entry.attr, &queue_max_active_zones_entry.attr,
&queue_nomerges_entry.attr, &queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
&queue_iostats_entry.attr, &queue_iostats_entry.attr,
&queue_stable_writes_entry.attr, &queue_stable_writes_entry.attr,
&queue_random_entry.attr, &queue_random_entry.attr,
@ -655,9 +655,7 @@ static struct attribute *queue_attrs[] = {
&queue_wc_entry.attr, &queue_wc_entry.attr,
&queue_fua_entry.attr, &queue_fua_entry.attr,
&queue_dax_entry.attr, &queue_dax_entry.attr,
&queue_wb_lat_entry.attr,
&queue_poll_delay_entry.attr, &queue_poll_delay_entry.attr,
&queue_io_timeout_entry.attr,
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
&blk_throtl_sample_time_entry.attr, &blk_throtl_sample_time_entry.attr,
#endif #endif
@ -666,16 +664,23 @@ static struct attribute *queue_attrs[] = {
NULL, NULL,
}; };
static struct attribute *blk_mq_queue_attrs[] = {
&queue_requests_entry.attr,
&elv_iosched_entry.attr,
&queue_rq_affinity_entry.attr,
&queue_io_timeout_entry.attr,
#ifdef CONFIG_BLK_WBT
&queue_wb_lat_entry.attr,
#endif
NULL,
};
static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
int n) int n)
{ {
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
struct request_queue *q = disk->queue; struct request_queue *q = disk->queue;
if (attr == &queue_io_timeout_entry.attr &&
(!q->mq_ops || !q->mq_ops->timeout))
return 0;
if ((attr == &queue_max_open_zones_entry.attr || if ((attr == &queue_max_open_zones_entry.attr ||
attr == &queue_max_active_zones_entry.attr) && attr == &queue_max_active_zones_entry.attr) &&
!blk_queue_is_zoned(q)) !blk_queue_is_zoned(q))
@ -684,11 +689,30 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
return attr->mode; return attr->mode;
} }
static umode_t blk_mq_queue_attr_visible(struct kobject *kobj,
struct attribute *attr, int n)
{
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
struct request_queue *q = disk->queue;
if (!queue_is_mq(q))
return 0;
if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout)
return 0;
return attr->mode;
}
static struct attribute_group queue_attr_group = { static struct attribute_group queue_attr_group = {
.attrs = queue_attrs, .attrs = queue_attrs,
.is_visible = queue_attr_visible, .is_visible = queue_attr_visible,
}; };
static struct attribute_group blk_mq_queue_attr_group = {
.attrs = blk_mq_queue_attrs,
.is_visible = blk_mq_queue_attr_visible,
};
#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
@ -733,6 +757,7 @@ static const struct sysfs_ops queue_sysfs_ops = {
static const struct attribute_group *blk_queue_attr_groups[] = { static const struct attribute_group *blk_queue_attr_groups[] = {
&queue_attr_group, &queue_attr_group,
&blk_mq_queue_attr_group,
NULL NULL
}; };

View File

@ -2178,12 +2178,6 @@ bool __blk_throtl_bio(struct bio *bio)
rcu_read_lock(); rcu_read_lock();
if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
bio->bi_iter.bi_size);
blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
}
spin_lock_irq(&q->queue_lock); spin_lock_irq(&q->queue_lock);
throtl_update_latency_buckets(td); throtl_update_latency_buckets(td);

View File

@ -185,6 +185,15 @@ static inline bool blk_should_throtl(struct bio *bio)
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
int rw = bio_data_dir(bio); int rw = bio_data_dir(bio);
if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
bio_set_flag(bio, BIO_CGROUP_ACCT);
blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
bio->bi_iter.bi_size);
}
blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
}
/* iops limit is always counted */ /* iops limit is always counted */
if (tg->has_rules_iops[rw]) if (tg->has_rules_iops[rw])
return true; return true;

View File

@ -146,7 +146,7 @@ enum {
static inline bool rwb_enabled(struct rq_wb *rwb) static inline bool rwb_enabled(struct rq_wb *rwb)
{ {
return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT && return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
rwb->wb_normal != 0; rwb->enable_state != WBT_STATE_OFF_MANUAL;
} }
static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
@ -200,15 +200,6 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
inflight = atomic_dec_return(&rqw->inflight); inflight = atomic_dec_return(&rqw->inflight);
/*
* wbt got disabled with IO in flight. Wake up any potential
* waiters, we don't have to do more than that.
*/
if (unlikely(!rwb_enabled(rwb))) {
rwb_wake_all(rwb);
return;
}
/* /*
* For discards, our limit is always the background. For writes, if * For discards, our limit is always the background. For writes, if
* the device does write back caching, drop further down before we * the device does write back caching, drop further down before we
@ -503,8 +494,7 @@ bool wbt_disabled(struct request_queue *q)
{ {
struct rq_qos *rqos = wbt_rq_qos(q); struct rq_qos *rqos = wbt_rq_qos(q);
return !rqos || RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT || return !rqos || !rwb_enabled(RQWB(rqos));
RQWB(rqos)->enable_state == WBT_STATE_OFF_MANUAL;
} }
u64 wbt_get_min_lat(struct request_queue *q) u64 wbt_get_min_lat(struct request_queue *q)
@ -545,13 +535,6 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
{ {
unsigned int limit; unsigned int limit;
/*
* If we got disabled, just return UINT_MAX. This ensures that
* we'll properly inc a new IO, and dec+wakeup at the end.
*/
if (!rwb_enabled(rwb))
return UINT_MAX;
if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD) if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD)
return rwb->wb_background; return rwb->wb_background;

View File

@ -18,10 +18,6 @@ u64 wbt_default_latency_nsec(struct request_queue *);
#else #else
static inline int wbt_init(struct gendisk *disk)
{
return -EINVAL;
}
static inline void wbt_disable_default(struct gendisk *disk) static inline void wbt_disable_default(struct gendisk *disk)
{ {
} }
@ -31,21 +27,6 @@ static inline void wbt_enable_default(struct gendisk *disk)
static inline void wbt_set_write_cache(struct request_queue *q, bool wc) static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
{ {
} }
static inline u64 wbt_get_min_lat(struct request_queue *q)
{
return 0;
}
static inline void wbt_set_min_lat(struct request_queue *q, u64 val)
{
}
static inline u64 wbt_default_latency_nsec(struct request_queue *q)
{
return 0;
}
static inline bool wbt_disabled(struct request_queue *q)
{
return true;
}
#endif /* CONFIG_BLK_WBT */ #endif /* CONFIG_BLK_WBT */

View File

@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl)
put_page(virt_to_page(dc->sb_disk)); put_page(virt_to_page(dc->sb_disk));
if (!IS_ERR_OR_NULL(dc->bdev)) if (!IS_ERR_OR_NULL(dc->bdev))
blkdev_put(dc->bdev, bcache_kobj); blkdev_put(dc->bdev, dc);
wake_up(&unregister_wait); wake_up(&unregister_wait);
@ -1453,7 +1453,6 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
memcpy(&dc->sb, sb, sizeof(struct cache_sb)); memcpy(&dc->sb, sb, sizeof(struct cache_sb));
dc->bdev = bdev; dc->bdev = bdev;
dc->bdev->bd_holder = dc;
dc->sb_disk = sb_disk; dc->sb_disk = sb_disk;
if (cached_dev_init(dc, sb->block_size << 9)) if (cached_dev_init(dc, sb->block_size << 9))
@ -2218,7 +2217,7 @@ void bch_cache_release(struct kobject *kobj)
put_page(virt_to_page(ca->sb_disk)); put_page(virt_to_page(ca->sb_disk));
if (!IS_ERR_OR_NULL(ca->bdev)) if (!IS_ERR_OR_NULL(ca->bdev))
blkdev_put(ca->bdev, bcache_kobj); blkdev_put(ca->bdev, ca);
kfree(ca); kfree(ca);
module_put(THIS_MODULE); module_put(THIS_MODULE);
@ -2345,7 +2344,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
memcpy(&ca->sb, sb, sizeof(struct cache_sb)); memcpy(&ca->sb, sb, sizeof(struct cache_sb));
ca->bdev = bdev; ca->bdev = bdev;
ca->bdev->bd_holder = ca;
ca->sb_disk = sb_disk; ca->sb_disk = sb_disk;
if (bdev_max_discard_sectors((bdev))) if (bdev_max_discard_sectors((bdev)))
@ -2359,7 +2357,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
* call blkdev_put() to bdev in bch_cache_release(). So we * call blkdev_put() to bdev in bch_cache_release(). So we
* explicitly call blkdev_put() here. * explicitly call blkdev_put() here.
*/ */
blkdev_put(bdev, bcache_kobj); blkdev_put(bdev, ca);
if (ret == -ENOMEM) if (ret == -ENOMEM)
err = "cache_alloc(): -ENOMEM"; err = "cache_alloc(): -ENOMEM";
else if (ret == -EPERM) else if (ret == -EPERM)
@ -2448,6 +2446,7 @@ struct async_reg_args {
struct cache_sb *sb; struct cache_sb *sb;
struct cache_sb_disk *sb_disk; struct cache_sb_disk *sb_disk;
struct block_device *bdev; struct block_device *bdev;
void *holder;
}; };
static void register_bdev_worker(struct work_struct *work) static void register_bdev_worker(struct work_struct *work)
@ -2455,22 +2454,13 @@ static void register_bdev_worker(struct work_struct *work)
int fail = false; int fail = false;
struct async_reg_args *args = struct async_reg_args *args =
container_of(work, struct async_reg_args, reg_work.work); container_of(work, struct async_reg_args, reg_work.work);
struct cached_dev *dc;
dc = kzalloc(sizeof(*dc), GFP_KERNEL);
if (!dc) {
fail = true;
put_page(virt_to_page(args->sb_disk));
blkdev_put(args->bdev, bcache_kobj);
goto out;
}
mutex_lock(&bch_register_lock); mutex_lock(&bch_register_lock);
if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder)
< 0)
fail = true; fail = true;
mutex_unlock(&bch_register_lock); mutex_unlock(&bch_register_lock);
out:
if (fail) if (fail)
pr_info("error %s: fail to register backing device\n", pr_info("error %s: fail to register backing device\n",
args->path); args->path);
@ -2485,21 +2475,11 @@ static void register_cache_worker(struct work_struct *work)
int fail = false; int fail = false;
struct async_reg_args *args = struct async_reg_args *args =
container_of(work, struct async_reg_args, reg_work.work); container_of(work, struct async_reg_args, reg_work.work);
struct cache *ca;
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca) {
fail = true;
put_page(virt_to_page(args->sb_disk));
blkdev_put(args->bdev, bcache_kobj);
goto out;
}
/* blkdev_put() will be called in bch_cache_release() */ /* blkdev_put() will be called in bch_cache_release() */
if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder))
fail = true; fail = true;
out:
if (fail) if (fail)
pr_info("error %s: fail to register cache device\n", pr_info("error %s: fail to register cache device\n",
args->path); args->path);
@ -2520,6 +2500,13 @@ static void register_device_async(struct async_reg_args *args)
queue_delayed_work(system_wq, &args->reg_work, 10); queue_delayed_work(system_wq, &args->reg_work, 10);
} }
static void *alloc_holder_object(struct cache_sb *sb)
{
if (SB_IS_BDEV(sb))
return kzalloc(sizeof(struct cached_dev), GFP_KERNEL);
return kzalloc(sizeof(struct cache), GFP_KERNEL);
}
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
const char *buffer, size_t size) const char *buffer, size_t size)
{ {
@ -2527,9 +2514,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
char *path = NULL; char *path = NULL;
struct cache_sb *sb; struct cache_sb *sb;
struct cache_sb_disk *sb_disk; struct cache_sb_disk *sb_disk;
struct block_device *bdev; struct block_device *bdev, *bdev2;
void *holder = NULL;
ssize_t ret; ssize_t ret;
bool async_registration = false; bool async_registration = false;
bool quiet = false;
#ifdef CONFIG_BCACHE_ASYNC_REGISTRATION #ifdef CONFIG_BCACHE_ASYNC_REGISTRATION
async_registration = true; async_registration = true;
@ -2558,10 +2547,34 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
ret = -EINVAL; ret = -EINVAL;
err = "failed to open device"; err = "failed to open device";
bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ | BLK_OPEN_WRITE, bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ, NULL, NULL);
bcache_kobj, NULL); if (IS_ERR(bdev))
goto out_free_sb;
err = "failed to set blocksize";
if (set_blocksize(bdev, 4096))
goto out_blkdev_put;
err = read_super(sb, bdev, &sb_disk);
if (err)
goto out_blkdev_put;
holder = alloc_holder_object(sb);
if (!holder) {
ret = -ENOMEM;
err = "cannot allocate memory";
goto out_put_sb_page;
}
/* Now reopen in exclusive mode with proper holder */
bdev2 = blkdev_get_by_dev(bdev->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
holder, NULL);
blkdev_put(bdev, NULL);
bdev = bdev2;
if (IS_ERR(bdev)) { if (IS_ERR(bdev)) {
if (bdev == ERR_PTR(-EBUSY)) { ret = PTR_ERR(bdev);
bdev = NULL;
if (ret == -EBUSY) {
dev_t dev; dev_t dev;
mutex_lock(&bch_register_lock); mutex_lock(&bch_register_lock);
@ -2571,20 +2584,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
else else
err = "device busy"; err = "device busy";
mutex_unlock(&bch_register_lock); mutex_unlock(&bch_register_lock);
if (attr == &ksysfs_register_quiet) if (attr == &ksysfs_register_quiet) {
goto done; quiet = true;
ret = size;
}
} }
goto out_free_sb; goto out_free_holder;
} }
err = "failed to set blocksize";
if (set_blocksize(bdev, 4096))
goto out_blkdev_put;
err = read_super(sb, bdev, &sb_disk);
if (err)
goto out_blkdev_put;
err = "failed to register device"; err = "failed to register device";
if (async_registration) { if (async_registration) {
@ -2595,59 +2602,46 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!args) { if (!args) {
ret = -ENOMEM; ret = -ENOMEM;
err = "cannot allocate memory"; err = "cannot allocate memory";
goto out_put_sb_page; goto out_free_holder;
} }
args->path = path; args->path = path;
args->sb = sb; args->sb = sb;
args->sb_disk = sb_disk; args->sb_disk = sb_disk;
args->bdev = bdev; args->bdev = bdev;
args->holder = holder;
register_device_async(args); register_device_async(args);
/* No wait and returns to user space */ /* No wait and returns to user space */
goto async_done; goto async_done;
} }
if (SB_IS_BDEV(sb)) { if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
if (!dc) {
ret = -ENOMEM;
err = "cannot allocate memory";
goto out_put_sb_page;
}
mutex_lock(&bch_register_lock); mutex_lock(&bch_register_lock);
ret = register_bdev(sb, sb_disk, bdev, dc); ret = register_bdev(sb, sb_disk, bdev, holder);
mutex_unlock(&bch_register_lock); mutex_unlock(&bch_register_lock);
/* blkdev_put() will be called in cached_dev_free() */ /* blkdev_put() will be called in cached_dev_free() */
if (ret < 0) if (ret < 0)
goto out_free_sb; goto out_free_sb;
} else { } else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca) {
ret = -ENOMEM;
err = "cannot allocate memory";
goto out_put_sb_page;
}
/* blkdev_put() will be called in bch_cache_release() */ /* blkdev_put() will be called in bch_cache_release() */
ret = register_cache(sb, sb_disk, bdev, ca); ret = register_cache(sb, sb_disk, bdev, holder);
if (ret) if (ret)
goto out_free_sb; goto out_free_sb;
} }
done:
kfree(sb); kfree(sb);
kfree(path); kfree(path);
module_put(THIS_MODULE); module_put(THIS_MODULE);
async_done: async_done:
return size; return size;
out_free_holder:
kfree(holder);
out_put_sb_page: out_put_sb_page:
put_page(virt_to_page(sb_disk)); put_page(virt_to_page(sb_disk));
out_blkdev_put: out_blkdev_put:
blkdev_put(bdev, register_bcache); if (bdev)
blkdev_put(bdev, holder);
out_free_sb: out_free_sb:
kfree(sb); kfree(sb);
out_free_path: out_free_path:
@ -2656,7 +2650,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
out_module_put: out_module_put:
module_put(THIS_MODULE); module_put(THIS_MODULE);
out: out:
pr_info("error %s: %s\n", path?path:"", err); if (!quiet)
pr_info("error %s: %s\n", path?path:"", err);
return ret; return ret;
} }

View File

@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev)
{ {
mutex_init(&mddev->open_mutex); mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex); mutex_init(&mddev->reconfig_mutex);
mutex_init(&mddev->delete_mutex);
mutex_init(&mddev->bitmap_info.mutex); mutex_init(&mddev->bitmap_info.mutex);
INIT_LIST_HEAD(&mddev->disks); INIT_LIST_HEAD(&mddev->disks);
INIT_LIST_HEAD(&mddev->all_mddevs); INIT_LIST_HEAD(&mddev->all_mddevs);
@ -749,26 +748,15 @@ static void mddev_free(struct mddev *mddev)
static const struct attribute_group md_redundancy_group; static const struct attribute_group md_redundancy_group;
static void md_free_rdev(struct mddev *mddev) void mddev_unlock(struct mddev *mddev)
{ {
struct md_rdev *rdev; struct md_rdev *rdev;
struct md_rdev *tmp; struct md_rdev *tmp;
LIST_HEAD(delete);
mutex_lock(&mddev->delete_mutex); if (!list_empty(&mddev->deleting))
if (list_empty(&mddev->deleting)) list_splice_init(&mddev->deleting, &delete);
goto out;
list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) {
list_del_init(&rdev->same_set);
kobject_del(&rdev->kobj);
export_rdev(rdev, mddev);
}
out:
mutex_unlock(&mddev->delete_mutex);
}
void mddev_unlock(struct mddev *mddev)
{
if (mddev->to_remove) { if (mddev->to_remove) {
/* These cannot be removed under reconfig_mutex as /* These cannot be removed under reconfig_mutex as
* an access to the files will try to take reconfig_mutex * an access to the files will try to take reconfig_mutex
@ -808,7 +796,11 @@ void mddev_unlock(struct mddev *mddev)
} else } else
mutex_unlock(&mddev->reconfig_mutex); mutex_unlock(&mddev->reconfig_mutex);
md_free_rdev(mddev); list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
list_del_init(&rdev->same_set);
kobject_del(&rdev->kobj);
export_rdev(rdev, mddev);
}
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
wake_up(&mddev->sb_wait); wake_up(&mddev->sb_wait);
@ -2458,7 +2450,7 @@ static void export_rdev(struct md_rdev *rdev, struct mddev *mddev)
if (test_bit(AutoDetected, &rdev->flags)) if (test_bit(AutoDetected, &rdev->flags))
md_autodetect_dev(rdev->bdev->bd_dev); md_autodetect_dev(rdev->bdev->bd_dev);
#endif #endif
blkdev_put(rdev->bdev, mddev->major_version == -2 ? &claim_rdev : rdev); blkdev_put(rdev->bdev, mddev->external ? &claim_rdev : rdev);
rdev->bdev = NULL; rdev->bdev = NULL;
kobject_put(&rdev->kobj); kobject_put(&rdev->kobj);
} }
@ -2488,9 +2480,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
* reconfig_mutex is held, hence it can't be called under * reconfig_mutex is held, hence it can't be called under
* reconfig_mutex and it's delayed to mddev_unlock(). * reconfig_mutex and it's delayed to mddev_unlock().
*/ */
mutex_lock(&mddev->delete_mutex);
list_add(&rdev->same_set, &mddev->deleting); list_add(&rdev->same_set, &mddev->deleting);
mutex_unlock(&mddev->delete_mutex);
} }
static void export_array(struct mddev *mddev) static void export_array(struct mddev *mddev)
@ -6140,7 +6130,7 @@ static void md_clean(struct mddev *mddev)
mddev->resync_min = 0; mddev->resync_min = 0;
mddev->resync_max = MaxSector; mddev->resync_max = MaxSector;
mddev->reshape_position = MaxSector; mddev->reshape_position = MaxSector;
mddev->external = 0; /* we still need mddev->external in export_rdev, do not clear it yet */
mddev->persistent = 0; mddev->persistent = 0;
mddev->level = LEVEL_NONE; mddev->level = LEVEL_NONE;
mddev->clevel[0] = 0; mddev->clevel[0] = 0;

View File

@ -531,11 +531,9 @@ struct mddev {
/* /*
* Temporarily store rdev that will be finally removed when * Temporarily store rdev that will be finally removed when
* reconfig_mutex is unlocked. * reconfig_mutex is unlocked, protected by reconfig_mutex.
*/ */
struct list_head deleting; struct list_head deleting;
/* Protect the deleting list */
struct mutex delete_mutex;
bool has_superblocks:1; bool has_superblocks:1;
bool fail_last_dev:1; bool fail_last_dev:1;

View File

@ -116,7 +116,7 @@ static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp,
static inline void raid1_submit_write(struct bio *bio) static inline void raid1_submit_write(struct bio *bio)
{ {
struct md_rdev *rdev = (struct md_rdev *)bio->bi_bdev; struct md_rdev *rdev = (void *)bio->bi_bdev;
bio->bi_next = NULL; bio->bi_next = NULL;
bio_set_dev(bio, rdev->bdev); bio_set_dev(bio, rdev->bdev);

View File

@ -325,7 +325,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) if (r10_bio->start_time)
bio_end_io_acct(bio, r10_bio->start_time); bio_end_io_acct(bio, r10_bio->start_time);
bio_endio(bio); bio_endio(bio);
/* /*
@ -1118,7 +1118,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending); bio_list_merge(&conf->pending_bio_list, &plug->pending);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
kfree(plug); kfree(plug);
return; return;
@ -1127,7 +1127,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
/* we aren't scheduling, so we can do the write-out directly. */ /* we aren't scheduling, so we can do the write-out directly. */
bio = bio_list_get(&plug->pending); bio = bio_list_get(&plug->pending);
raid1_prepare_flush_writes(mddev->bitmap); raid1_prepare_flush_writes(mddev->bitmap);
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
while (bio) { /* submit pending writes */ while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next; struct bio *next = bio->bi_next;

View File

@ -852,7 +852,11 @@ static inline bool blk_mq_add_to_batch(struct request *req,
struct io_comp_batch *iob, int ioerror, struct io_comp_batch *iob, int ioerror,
void (*complete)(struct io_comp_batch *)) void (*complete)(struct io_comp_batch *))
{ {
if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror || /*
* blk_mq_end_request_batch() can't end request allocated from
* sched tags
*/
if (!iob || (req->rq_flags & RQF_SCHED_TAGS) || ioerror ||
(req->end_io && !blk_rq_is_passthrough(req))) (req->end_io && !blk_rq_is_passthrough(req)))
return false; return false;