mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 10:43:43 +00:00
block-6.5-2023-07-03
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmSjJ2IQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpsQMEACQiUBw81tXvetYhz3P/4KrrjvUobgqMU0w jtrxqMgPee9FbqCShpj76c+La5wu23DnlrCXoHZxFQuiQLnsX5xFV66NYVi+W1CN k5MHP7f2e9V0T7qJ9UoHFRV1k22LF4X6T8njEZimxsm/uXfpav/knkhI7nUDnB1K wxlu9akD2Bo/X9O2NTS+X6qjoawZ6rDWN15THMXlC45VzJPLmIcs07Ev+mvw21KE XqasoZrxEO0S8dWxmJgJGqnRIOQptTS5U+0OPBZT8H220Qp/1q0pQHPw6iLXNrkc w1a2W1Bge012gjJt7gCMkdDnZb76sKiyGuMbFME7DoRbLCQeaOtoSfmg7NoRI2gp 74TCSr7dPWZUVUy5Tmsy0DCv0552vIbnlQ69W6Xwx8YkplM3FPiMpWrQ5JWEHdvv Zl84mLP6Yyo54JVuk9zi8q/2L0HfyfMDj4UM/mNs8hwmcUSbPO2TKdIWDaq8xPuS Ed+D+kg6XFux8tLnCSDLNbaD5JE+ak9gTVhNdRa/zFE04o/OeidscKEqRSYTkdXL 2p34qtw5kEQocO4Pa3eUGO6KJCDTR36Rms5p6ZFybL4O2oZYrAbRi1TGDxaG2Hag GCr2vaFbmz1zbGuMpFhLha5B7HeDLs+PHOn+B1iUNjEr9RC0EOHV7moJKqjxlnCh 4mBkK/Nlyg== =kSeX -----END PGP SIGNATURE----- Merge tag 'block-6.5-2023-07-03' of git://git.kernel.dk/linux Pull more block updates from Jens Axboe: "Mostly items that came in a bit late for the initial pull request, wanted to make sure they had the appropriate amount of linux-next soak before going upstream. Outside of stragglers, just generic fixes for either merge window items, or longer standing bugs" * tag 'block-6.5-2023-07-03' of git://git.kernel.dk/linux: (25 commits) md/raid0: add discard support for the 'original' layout nvme: disable controller on reset state failure nvme: sync timeout work on failed reset nvme: ensure unquiesce on teardown cdrom/gdrom: Fix build error nvme: improved uring polling block: add request polling helper nvme-mpath: fix I/O failure with EAGAIN when failing over I/O nvme: host: fix command name spelling blk-sysfs: add a new attr_group for blk_mq blk-iocost: move wbt_enable/disable_default() out of spinlock blk-wbt: cleanup rwb_enabled() and wbt_disabled() blk-wbt: remove dead code to handle wbt enable/disable with io inflight blk-wbt: don't create wbt sysfs entry if CONFIG_BLK_WBT is disabled blk-mq: fix two misuses on RQF_USE_SCHED blk-throttle: Fix io statistics for cgroup v1 bcache: Fix bcache device claiming bcache: Alloc holder object before async registration raid10: avoid spin_lock from fastpath from raid10_unplug() md: fix 'delete_mutex' deadlock ...
This commit is contained in:
commit
e50df24979
@ -2086,6 +2086,9 @@ void blk_cgroup_bio_start(struct bio *bio)
|
|||||||
struct blkg_iostat_set *bis;
|
struct blkg_iostat_set *bis;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
|
if (!cgroup_subsys_on_dfl(io_cgrp_subsys))
|
||||||
|
return;
|
||||||
|
|
||||||
/* Root-level stats are sourced from system-wide IO stats */
|
/* Root-level stats are sourced from system-wide IO stats */
|
||||||
if (!cgroup_parent(blkcg->css.cgroup))
|
if (!cgroup_parent(blkcg->css.cgroup))
|
||||||
return;
|
return;
|
||||||
@ -2116,8 +2119,7 @@ void blk_cgroup_bio_start(struct bio *bio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
u64_stats_update_end_irqrestore(&bis->sync, flags);
|
u64_stats_update_end_irqrestore(&bis->sync, flags);
|
||||||
if (cgroup_subsys_on_dfl(io_cgrp_subsys))
|
cgroup_rstat_updated(blkcg->css.cgroup, cpu);
|
||||||
cgroup_rstat_updated(blkcg->css.cgroup, cpu);
|
|
||||||
put_cpu();
|
put_cpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3301,11 +3301,9 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
|||||||
blk_stat_enable_accounting(disk->queue);
|
blk_stat_enable_accounting(disk->queue);
|
||||||
blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
|
blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
|
||||||
ioc->enabled = true;
|
ioc->enabled = true;
|
||||||
wbt_disable_default(disk);
|
|
||||||
} else {
|
} else {
|
||||||
blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
|
blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
|
||||||
ioc->enabled = false;
|
ioc->enabled = false;
|
||||||
wbt_enable_default(disk);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (user) {
|
if (user) {
|
||||||
@ -3318,6 +3316,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
|||||||
ioc_refresh_params(ioc, true);
|
ioc_refresh_params(ioc, true);
|
||||||
spin_unlock_irq(&ioc->lock);
|
spin_unlock_irq(&ioc->lock);
|
||||||
|
|
||||||
|
if (enable)
|
||||||
|
wbt_disable_default(disk);
|
||||||
|
else
|
||||||
|
wbt_enable_default(disk);
|
||||||
|
|
||||||
blk_mq_unquiesce_queue(disk->queue);
|
blk_mq_unquiesce_queue(disk->queue);
|
||||||
blk_mq_unfreeze_queue(disk->queue);
|
blk_mq_unfreeze_queue(disk->queue);
|
||||||
|
|
||||||
|
@ -49,17 +49,8 @@ static void blk_mq_request_bypass_insert(struct request *rq,
|
|||||||
blk_insert_t flags);
|
blk_insert_t flags);
|
||||||
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||||
struct list_head *list);
|
struct list_head *list);
|
||||||
|
static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
||||||
static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
|
struct io_comp_batch *iob, unsigned int flags);
|
||||||
blk_qc_t qc)
|
|
||||||
{
|
|
||||||
return xa_load(&q->hctx_table, qc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline blk_qc_t blk_rq_to_qc(struct request *rq)
|
|
||||||
{
|
|
||||||
return rq->mq_hctx->queue_num;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if any of the ctx, dispatch list or elevator
|
* Check if any of the ctx, dispatch list or elevator
|
||||||
@ -1248,7 +1239,7 @@ void blk_mq_start_request(struct request *rq)
|
|||||||
q->integrity.profile->prepare_fn(rq);
|
q->integrity.profile->prepare_fn(rq);
|
||||||
#endif
|
#endif
|
||||||
if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
|
if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
|
||||||
WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq));
|
WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_mq_start_request);
|
EXPORT_SYMBOL(blk_mq_start_request);
|
||||||
|
|
||||||
@ -1280,7 +1271,11 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
|||||||
|
|
||||||
if (!plug->multiple_queues && last && last->q != rq->q)
|
if (!plug->multiple_queues && last && last->q != rq->q)
|
||||||
plug->multiple_queues = true;
|
plug->multiple_queues = true;
|
||||||
if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
|
/*
|
||||||
|
* Any request allocated from sched tags can't be issued to
|
||||||
|
* ->queue_rqs() directly
|
||||||
|
*/
|
||||||
|
if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))
|
||||||
plug->has_elevator = true;
|
plug->has_elevator = true;
|
||||||
rq->rq_next = NULL;
|
rq->rq_next = NULL;
|
||||||
rq_list_add(&plug->mq_list, rq);
|
rq_list_add(&plug->mq_list, rq);
|
||||||
@ -1350,7 +1345,7 @@ EXPORT_SYMBOL_GPL(blk_rq_is_poll);
|
|||||||
static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
|
static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
|
||||||
{
|
{
|
||||||
do {
|
do {
|
||||||
blk_mq_poll(rq->q, blk_rq_to_qc(rq), NULL, 0);
|
blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0);
|
||||||
cond_resched();
|
cond_resched();
|
||||||
} while (!completion_done(wait));
|
} while (!completion_done(wait));
|
||||||
}
|
}
|
||||||
@ -4745,10 +4740,9 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
|
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
|
||||||
|
|
||||||
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
|
static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
||||||
unsigned int flags)
|
struct io_comp_batch *iob, unsigned int flags)
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
|
|
||||||
long state = get_current_state();
|
long state = get_current_state();
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
@ -4773,6 +4767,32 @@ int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie,
|
||||||
|
struct io_comp_batch *iob, unsigned int flags)
|
||||||
|
{
|
||||||
|
struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie);
|
||||||
|
|
||||||
|
return blk_hctx_poll(q, hctx, iob, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
|
||||||
|
unsigned int poll_flags)
|
||||||
|
{
|
||||||
|
struct request_queue *q = rq->q;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!blk_rq_is_poll(rq))
|
||||||
|
return 0;
|
||||||
|
if (!percpu_ref_tryget(&q->q_usage_counter))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags);
|
||||||
|
blk_queue_exit(q);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blk_rq_poll);
|
||||||
|
|
||||||
unsigned int blk_mq_rq_cpu(struct request *rq)
|
unsigned int blk_mq_rq_cpu(struct request *rq)
|
||||||
{
|
{
|
||||||
return rq->mq_ctx->cpu;
|
return rq->mq_ctx->cpu;
|
||||||
|
@ -47,19 +47,6 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t queue_var_store64(s64 *var, const char *page)
|
|
||||||
{
|
|
||||||
int err;
|
|
||||||
s64 v;
|
|
||||||
|
|
||||||
err = kstrtos64(page, 10, &v);
|
|
||||||
if (err < 0)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
*var = v;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t queue_requests_show(struct request_queue *q, char *page)
|
static ssize_t queue_requests_show(struct request_queue *q, char *page)
|
||||||
{
|
{
|
||||||
return queue_var_show(q->nr_requests, page);
|
return queue_var_show(q->nr_requests, page);
|
||||||
@ -451,61 +438,6 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
|
|
||||||
{
|
|
||||||
if (!wbt_rq_qos(q))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
if (wbt_disabled(q))
|
|
||||||
return sprintf(page, "0\n");
|
|
||||||
|
|
||||||
return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
|
|
||||||
size_t count)
|
|
||||||
{
|
|
||||||
struct rq_qos *rqos;
|
|
||||||
ssize_t ret;
|
|
||||||
s64 val;
|
|
||||||
|
|
||||||
ret = queue_var_store64(&val, page);
|
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
|
||||||
if (val < -1)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
rqos = wbt_rq_qos(q);
|
|
||||||
if (!rqos) {
|
|
||||||
ret = wbt_init(q->disk);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (val == -1)
|
|
||||||
val = wbt_default_latency_nsec(q);
|
|
||||||
else if (val >= 0)
|
|
||||||
val *= 1000ULL;
|
|
||||||
|
|
||||||
if (wbt_get_min_lat(q) == val)
|
|
||||||
return count;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Ensure that the queue is idled, in case the latency update
|
|
||||||
* ends up either enabling or disabling wbt completely. We can't
|
|
||||||
* have IO inflight if that happens.
|
|
||||||
*/
|
|
||||||
blk_mq_freeze_queue(q);
|
|
||||||
blk_mq_quiesce_queue(q);
|
|
||||||
|
|
||||||
wbt_set_min_lat(q, val);
|
|
||||||
|
|
||||||
blk_mq_unquiesce_queue(q);
|
|
||||||
blk_mq_unfreeze_queue(q);
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t queue_wc_show(struct request_queue *q, char *page)
|
static ssize_t queue_wc_show(struct request_queue *q, char *page)
|
||||||
{
|
{
|
||||||
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
|
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
|
||||||
@ -598,7 +530,6 @@ QUEUE_RW_ENTRY(queue_wc, "write_cache");
|
|||||||
QUEUE_RO_ENTRY(queue_fua, "fua");
|
QUEUE_RO_ENTRY(queue_fua, "fua");
|
||||||
QUEUE_RO_ENTRY(queue_dax, "dax");
|
QUEUE_RO_ENTRY(queue_dax, "dax");
|
||||||
QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
|
QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
|
||||||
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
|
|
||||||
QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
|
QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
|
||||||
QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
|
QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
|
||||||
|
|
||||||
@ -617,8 +548,79 @@ QUEUE_RW_ENTRY(queue_iostats, "iostats");
|
|||||||
QUEUE_RW_ENTRY(queue_random, "add_random");
|
QUEUE_RW_ENTRY(queue_random, "add_random");
|
||||||
QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
|
QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
|
||||||
|
|
||||||
|
#ifdef CONFIG_BLK_WBT
|
||||||
|
static ssize_t queue_var_store64(s64 *var, const char *page)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
s64 v;
|
||||||
|
|
||||||
|
err = kstrtos64(page, 10, &v);
|
||||||
|
if (err < 0)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
*var = v;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
|
||||||
|
{
|
||||||
|
if (!wbt_rq_qos(q))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (wbt_disabled(q))
|
||||||
|
return sprintf(page, "0\n");
|
||||||
|
|
||||||
|
return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
|
||||||
|
size_t count)
|
||||||
|
{
|
||||||
|
struct rq_qos *rqos;
|
||||||
|
ssize_t ret;
|
||||||
|
s64 val;
|
||||||
|
|
||||||
|
ret = queue_var_store64(&val, page);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
if (val < -1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
rqos = wbt_rq_qos(q);
|
||||||
|
if (!rqos) {
|
||||||
|
ret = wbt_init(q->disk);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val == -1)
|
||||||
|
val = wbt_default_latency_nsec(q);
|
||||||
|
else if (val >= 0)
|
||||||
|
val *= 1000ULL;
|
||||||
|
|
||||||
|
if (wbt_get_min_lat(q) == val)
|
||||||
|
return count;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure that the queue is idled, in case the latency update
|
||||||
|
* ends up either enabling or disabling wbt completely. We can't
|
||||||
|
* have IO inflight if that happens.
|
||||||
|
*/
|
||||||
|
blk_mq_freeze_queue(q);
|
||||||
|
blk_mq_quiesce_queue(q);
|
||||||
|
|
||||||
|
wbt_set_min_lat(q, val);
|
||||||
|
|
||||||
|
blk_mq_unquiesce_queue(q);
|
||||||
|
blk_mq_unfreeze_queue(q);
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
|
||||||
|
#endif
|
||||||
|
|
||||||
static struct attribute *queue_attrs[] = {
|
static struct attribute *queue_attrs[] = {
|
||||||
&queue_requests_entry.attr,
|
|
||||||
&queue_ra_entry.attr,
|
&queue_ra_entry.attr,
|
||||||
&queue_max_hw_sectors_entry.attr,
|
&queue_max_hw_sectors_entry.attr,
|
||||||
&queue_max_sectors_entry.attr,
|
&queue_max_sectors_entry.attr,
|
||||||
@ -626,7 +628,6 @@ static struct attribute *queue_attrs[] = {
|
|||||||
&queue_max_discard_segments_entry.attr,
|
&queue_max_discard_segments_entry.attr,
|
||||||
&queue_max_integrity_segments_entry.attr,
|
&queue_max_integrity_segments_entry.attr,
|
||||||
&queue_max_segment_size_entry.attr,
|
&queue_max_segment_size_entry.attr,
|
||||||
&elv_iosched_entry.attr,
|
|
||||||
&queue_hw_sector_size_entry.attr,
|
&queue_hw_sector_size_entry.attr,
|
||||||
&queue_logical_block_size_entry.attr,
|
&queue_logical_block_size_entry.attr,
|
||||||
&queue_physical_block_size_entry.attr,
|
&queue_physical_block_size_entry.attr,
|
||||||
@ -647,7 +648,6 @@ static struct attribute *queue_attrs[] = {
|
|||||||
&queue_max_open_zones_entry.attr,
|
&queue_max_open_zones_entry.attr,
|
||||||
&queue_max_active_zones_entry.attr,
|
&queue_max_active_zones_entry.attr,
|
||||||
&queue_nomerges_entry.attr,
|
&queue_nomerges_entry.attr,
|
||||||
&queue_rq_affinity_entry.attr,
|
|
||||||
&queue_iostats_entry.attr,
|
&queue_iostats_entry.attr,
|
||||||
&queue_stable_writes_entry.attr,
|
&queue_stable_writes_entry.attr,
|
||||||
&queue_random_entry.attr,
|
&queue_random_entry.attr,
|
||||||
@ -655,9 +655,7 @@ static struct attribute *queue_attrs[] = {
|
|||||||
&queue_wc_entry.attr,
|
&queue_wc_entry.attr,
|
||||||
&queue_fua_entry.attr,
|
&queue_fua_entry.attr,
|
||||||
&queue_dax_entry.attr,
|
&queue_dax_entry.attr,
|
||||||
&queue_wb_lat_entry.attr,
|
|
||||||
&queue_poll_delay_entry.attr,
|
&queue_poll_delay_entry.attr,
|
||||||
&queue_io_timeout_entry.attr,
|
|
||||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||||
&blk_throtl_sample_time_entry.attr,
|
&blk_throtl_sample_time_entry.attr,
|
||||||
#endif
|
#endif
|
||||||
@ -666,16 +664,23 @@ static struct attribute *queue_attrs[] = {
|
|||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct attribute *blk_mq_queue_attrs[] = {
|
||||||
|
&queue_requests_entry.attr,
|
||||||
|
&elv_iosched_entry.attr,
|
||||||
|
&queue_rq_affinity_entry.attr,
|
||||||
|
&queue_io_timeout_entry.attr,
|
||||||
|
#ifdef CONFIG_BLK_WBT
|
||||||
|
&queue_wb_lat_entry.attr,
|
||||||
|
#endif
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
|
static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
|
||||||
int n)
|
int n)
|
||||||
{
|
{
|
||||||
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
|
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
|
||||||
struct request_queue *q = disk->queue;
|
struct request_queue *q = disk->queue;
|
||||||
|
|
||||||
if (attr == &queue_io_timeout_entry.attr &&
|
|
||||||
(!q->mq_ops || !q->mq_ops->timeout))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if ((attr == &queue_max_open_zones_entry.attr ||
|
if ((attr == &queue_max_open_zones_entry.attr ||
|
||||||
attr == &queue_max_active_zones_entry.attr) &&
|
attr == &queue_max_active_zones_entry.attr) &&
|
||||||
!blk_queue_is_zoned(q))
|
!blk_queue_is_zoned(q))
|
||||||
@ -684,11 +689,30 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
|
|||||||
return attr->mode;
|
return attr->mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static umode_t blk_mq_queue_attr_visible(struct kobject *kobj,
|
||||||
|
struct attribute *attr, int n)
|
||||||
|
{
|
||||||
|
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
|
||||||
|
struct request_queue *q = disk->queue;
|
||||||
|
|
||||||
|
if (!queue_is_mq(q))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return attr->mode;
|
||||||
|
}
|
||||||
|
|
||||||
static struct attribute_group queue_attr_group = {
|
static struct attribute_group queue_attr_group = {
|
||||||
.attrs = queue_attrs,
|
.attrs = queue_attrs,
|
||||||
.is_visible = queue_attr_visible,
|
.is_visible = queue_attr_visible,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct attribute_group blk_mq_queue_attr_group = {
|
||||||
|
.attrs = blk_mq_queue_attrs,
|
||||||
|
.is_visible = blk_mq_queue_attr_visible,
|
||||||
|
};
|
||||||
|
|
||||||
#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
|
#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
|
||||||
|
|
||||||
@ -733,6 +757,7 @@ static const struct sysfs_ops queue_sysfs_ops = {
|
|||||||
|
|
||||||
static const struct attribute_group *blk_queue_attr_groups[] = {
|
static const struct attribute_group *blk_queue_attr_groups[] = {
|
||||||
&queue_attr_group,
|
&queue_attr_group,
|
||||||
|
&blk_mq_queue_attr_group,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2178,12 +2178,6 @@ bool __blk_throtl_bio(struct bio *bio)
|
|||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
|
|
||||||
blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
|
|
||||||
bio->bi_iter.bi_size);
|
|
||||||
blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_lock_irq(&q->queue_lock);
|
spin_lock_irq(&q->queue_lock);
|
||||||
|
|
||||||
throtl_update_latency_buckets(td);
|
throtl_update_latency_buckets(td);
|
||||||
|
@ -185,6 +185,15 @@ static inline bool blk_should_throtl(struct bio *bio)
|
|||||||
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
|
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
|
||||||
int rw = bio_data_dir(bio);
|
int rw = bio_data_dir(bio);
|
||||||
|
|
||||||
|
if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
|
||||||
|
if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
|
||||||
|
bio_set_flag(bio, BIO_CGROUP_ACCT);
|
||||||
|
blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
|
||||||
|
bio->bi_iter.bi_size);
|
||||||
|
}
|
||||||
|
blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
|
||||||
|
}
|
||||||
|
|
||||||
/* iops limit is always counted */
|
/* iops limit is always counted */
|
||||||
if (tg->has_rules_iops[rw])
|
if (tg->has_rules_iops[rw])
|
||||||
return true;
|
return true;
|
||||||
|
@ -146,7 +146,7 @@ enum {
|
|||||||
static inline bool rwb_enabled(struct rq_wb *rwb)
|
static inline bool rwb_enabled(struct rq_wb *rwb)
|
||||||
{
|
{
|
||||||
return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
|
return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
|
||||||
rwb->wb_normal != 0;
|
rwb->enable_state != WBT_STATE_OFF_MANUAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
|
static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
|
||||||
@ -200,15 +200,6 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
|
|||||||
|
|
||||||
inflight = atomic_dec_return(&rqw->inflight);
|
inflight = atomic_dec_return(&rqw->inflight);
|
||||||
|
|
||||||
/*
|
|
||||||
* wbt got disabled with IO in flight. Wake up any potential
|
|
||||||
* waiters, we don't have to do more than that.
|
|
||||||
*/
|
|
||||||
if (unlikely(!rwb_enabled(rwb))) {
|
|
||||||
rwb_wake_all(rwb);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For discards, our limit is always the background. For writes, if
|
* For discards, our limit is always the background. For writes, if
|
||||||
* the device does write back caching, drop further down before we
|
* the device does write back caching, drop further down before we
|
||||||
@ -503,8 +494,7 @@ bool wbt_disabled(struct request_queue *q)
|
|||||||
{
|
{
|
||||||
struct rq_qos *rqos = wbt_rq_qos(q);
|
struct rq_qos *rqos = wbt_rq_qos(q);
|
||||||
|
|
||||||
return !rqos || RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT ||
|
return !rqos || !rwb_enabled(RQWB(rqos));
|
||||||
RQWB(rqos)->enable_state == WBT_STATE_OFF_MANUAL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 wbt_get_min_lat(struct request_queue *q)
|
u64 wbt_get_min_lat(struct request_queue *q)
|
||||||
@ -545,13 +535,6 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
|
|||||||
{
|
{
|
||||||
unsigned int limit;
|
unsigned int limit;
|
||||||
|
|
||||||
/*
|
|
||||||
* If we got disabled, just return UINT_MAX. This ensures that
|
|
||||||
* we'll properly inc a new IO, and dec+wakeup at the end.
|
|
||||||
*/
|
|
||||||
if (!rwb_enabled(rwb))
|
|
||||||
return UINT_MAX;
|
|
||||||
|
|
||||||
if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD)
|
if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD)
|
||||||
return rwb->wb_background;
|
return rwb->wb_background;
|
||||||
|
|
||||||
|
@ -18,10 +18,6 @@ u64 wbt_default_latency_nsec(struct request_queue *);
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static inline int wbt_init(struct gendisk *disk)
|
|
||||||
{
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
static inline void wbt_disable_default(struct gendisk *disk)
|
static inline void wbt_disable_default(struct gendisk *disk)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -31,21 +27,6 @@ static inline void wbt_enable_default(struct gendisk *disk)
|
|||||||
static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
|
static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline u64 wbt_get_min_lat(struct request_queue *q)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
static inline void wbt_set_min_lat(struct request_queue *q, u64 val)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
static inline u64 wbt_default_latency_nsec(struct request_queue *q)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
static inline bool wbt_disabled(struct request_queue *q)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* CONFIG_BLK_WBT */
|
#endif /* CONFIG_BLK_WBT */
|
||||||
|
|
||||||
|
@ -481,7 +481,7 @@ static int gdrom_bdops_open(struct gendisk *disk, blk_mode_t mode)
|
|||||||
disk_check_media_change(disk);
|
disk_check_media_change(disk);
|
||||||
|
|
||||||
mutex_lock(&gdrom_mutex);
|
mutex_lock(&gdrom_mutex);
|
||||||
ret = cdrom_open(gd.cd_info);
|
ret = cdrom_open(gd.cd_info, mode);
|
||||||
mutex_unlock(&gdrom_mutex);
|
mutex_unlock(&gdrom_mutex);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -489,7 +489,7 @@ static int gdrom_bdops_open(struct gendisk *disk, blk_mode_t mode)
|
|||||||
static void gdrom_bdops_release(struct gendisk *disk)
|
static void gdrom_bdops_release(struct gendisk *disk)
|
||||||
{
|
{
|
||||||
mutex_lock(&gdrom_mutex);
|
mutex_lock(&gdrom_mutex);
|
||||||
cdrom_release(gd.cd_info, mode);
|
cdrom_release(gd.cd_info);
|
||||||
mutex_unlock(&gdrom_mutex);
|
mutex_unlock(&gdrom_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl)
|
|||||||
put_page(virt_to_page(dc->sb_disk));
|
put_page(virt_to_page(dc->sb_disk));
|
||||||
|
|
||||||
if (!IS_ERR_OR_NULL(dc->bdev))
|
if (!IS_ERR_OR_NULL(dc->bdev))
|
||||||
blkdev_put(dc->bdev, bcache_kobj);
|
blkdev_put(dc->bdev, dc);
|
||||||
|
|
||||||
wake_up(&unregister_wait);
|
wake_up(&unregister_wait);
|
||||||
|
|
||||||
@ -1453,7 +1453,6 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
|
|||||||
|
|
||||||
memcpy(&dc->sb, sb, sizeof(struct cache_sb));
|
memcpy(&dc->sb, sb, sizeof(struct cache_sb));
|
||||||
dc->bdev = bdev;
|
dc->bdev = bdev;
|
||||||
dc->bdev->bd_holder = dc;
|
|
||||||
dc->sb_disk = sb_disk;
|
dc->sb_disk = sb_disk;
|
||||||
|
|
||||||
if (cached_dev_init(dc, sb->block_size << 9))
|
if (cached_dev_init(dc, sb->block_size << 9))
|
||||||
@ -2218,7 +2217,7 @@ void bch_cache_release(struct kobject *kobj)
|
|||||||
put_page(virt_to_page(ca->sb_disk));
|
put_page(virt_to_page(ca->sb_disk));
|
||||||
|
|
||||||
if (!IS_ERR_OR_NULL(ca->bdev))
|
if (!IS_ERR_OR_NULL(ca->bdev))
|
||||||
blkdev_put(ca->bdev, bcache_kobj);
|
blkdev_put(ca->bdev, ca);
|
||||||
|
|
||||||
kfree(ca);
|
kfree(ca);
|
||||||
module_put(THIS_MODULE);
|
module_put(THIS_MODULE);
|
||||||
@ -2345,7 +2344,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
|
|||||||
|
|
||||||
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
|
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
|
||||||
ca->bdev = bdev;
|
ca->bdev = bdev;
|
||||||
ca->bdev->bd_holder = ca;
|
|
||||||
ca->sb_disk = sb_disk;
|
ca->sb_disk = sb_disk;
|
||||||
|
|
||||||
if (bdev_max_discard_sectors((bdev)))
|
if (bdev_max_discard_sectors((bdev)))
|
||||||
@ -2359,7 +2357,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
|
|||||||
* call blkdev_put() to bdev in bch_cache_release(). So we
|
* call blkdev_put() to bdev in bch_cache_release(). So we
|
||||||
* explicitly call blkdev_put() here.
|
* explicitly call blkdev_put() here.
|
||||||
*/
|
*/
|
||||||
blkdev_put(bdev, bcache_kobj);
|
blkdev_put(bdev, ca);
|
||||||
if (ret == -ENOMEM)
|
if (ret == -ENOMEM)
|
||||||
err = "cache_alloc(): -ENOMEM";
|
err = "cache_alloc(): -ENOMEM";
|
||||||
else if (ret == -EPERM)
|
else if (ret == -EPERM)
|
||||||
@ -2448,6 +2446,7 @@ struct async_reg_args {
|
|||||||
struct cache_sb *sb;
|
struct cache_sb *sb;
|
||||||
struct cache_sb_disk *sb_disk;
|
struct cache_sb_disk *sb_disk;
|
||||||
struct block_device *bdev;
|
struct block_device *bdev;
|
||||||
|
void *holder;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void register_bdev_worker(struct work_struct *work)
|
static void register_bdev_worker(struct work_struct *work)
|
||||||
@ -2455,22 +2454,13 @@ static void register_bdev_worker(struct work_struct *work)
|
|||||||
int fail = false;
|
int fail = false;
|
||||||
struct async_reg_args *args =
|
struct async_reg_args *args =
|
||||||
container_of(work, struct async_reg_args, reg_work.work);
|
container_of(work, struct async_reg_args, reg_work.work);
|
||||||
struct cached_dev *dc;
|
|
||||||
|
|
||||||
dc = kzalloc(sizeof(*dc), GFP_KERNEL);
|
|
||||||
if (!dc) {
|
|
||||||
fail = true;
|
|
||||||
put_page(virt_to_page(args->sb_disk));
|
|
||||||
blkdev_put(args->bdev, bcache_kobj);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_lock(&bch_register_lock);
|
mutex_lock(&bch_register_lock);
|
||||||
if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0)
|
if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder)
|
||||||
|
< 0)
|
||||||
fail = true;
|
fail = true;
|
||||||
mutex_unlock(&bch_register_lock);
|
mutex_unlock(&bch_register_lock);
|
||||||
|
|
||||||
out:
|
|
||||||
if (fail)
|
if (fail)
|
||||||
pr_info("error %s: fail to register backing device\n",
|
pr_info("error %s: fail to register backing device\n",
|
||||||
args->path);
|
args->path);
|
||||||
@ -2485,21 +2475,11 @@ static void register_cache_worker(struct work_struct *work)
|
|||||||
int fail = false;
|
int fail = false;
|
||||||
struct async_reg_args *args =
|
struct async_reg_args *args =
|
||||||
container_of(work, struct async_reg_args, reg_work.work);
|
container_of(work, struct async_reg_args, reg_work.work);
|
||||||
struct cache *ca;
|
|
||||||
|
|
||||||
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
|
||||||
if (!ca) {
|
|
||||||
fail = true;
|
|
||||||
put_page(virt_to_page(args->sb_disk));
|
|
||||||
blkdev_put(args->bdev, bcache_kobj);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* blkdev_put() will be called in bch_cache_release() */
|
/* blkdev_put() will be called in bch_cache_release() */
|
||||||
if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0)
|
if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder))
|
||||||
fail = true;
|
fail = true;
|
||||||
|
|
||||||
out:
|
|
||||||
if (fail)
|
if (fail)
|
||||||
pr_info("error %s: fail to register cache device\n",
|
pr_info("error %s: fail to register cache device\n",
|
||||||
args->path);
|
args->path);
|
||||||
@ -2520,6 +2500,13 @@ static void register_device_async(struct async_reg_args *args)
|
|||||||
queue_delayed_work(system_wq, &args->reg_work, 10);
|
queue_delayed_work(system_wq, &args->reg_work, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void *alloc_holder_object(struct cache_sb *sb)
|
||||||
|
{
|
||||||
|
if (SB_IS_BDEV(sb))
|
||||||
|
return kzalloc(sizeof(struct cached_dev), GFP_KERNEL);
|
||||||
|
return kzalloc(sizeof(struct cache), GFP_KERNEL);
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
|
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
|
||||||
const char *buffer, size_t size)
|
const char *buffer, size_t size)
|
||||||
{
|
{
|
||||||
@ -2527,9 +2514,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
|
|||||||
char *path = NULL;
|
char *path = NULL;
|
||||||
struct cache_sb *sb;
|
struct cache_sb *sb;
|
||||||
struct cache_sb_disk *sb_disk;
|
struct cache_sb_disk *sb_disk;
|
||||||
struct block_device *bdev;
|
struct block_device *bdev, *bdev2;
|
||||||
|
void *holder = NULL;
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
bool async_registration = false;
|
bool async_registration = false;
|
||||||
|
bool quiet = false;
|
||||||
|
|
||||||
#ifdef CONFIG_BCACHE_ASYNC_REGISTRATION
|
#ifdef CONFIG_BCACHE_ASYNC_REGISTRATION
|
||||||
async_registration = true;
|
async_registration = true;
|
||||||
@ -2558,10 +2547,34 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
|
|||||||
|
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
err = "failed to open device";
|
err = "failed to open device";
|
||||||
bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ | BLK_OPEN_WRITE,
|
bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ, NULL, NULL);
|
||||||
bcache_kobj, NULL);
|
if (IS_ERR(bdev))
|
||||||
|
goto out_free_sb;
|
||||||
|
|
||||||
|
err = "failed to set blocksize";
|
||||||
|
if (set_blocksize(bdev, 4096))
|
||||||
|
goto out_blkdev_put;
|
||||||
|
|
||||||
|
err = read_super(sb, bdev, &sb_disk);
|
||||||
|
if (err)
|
||||||
|
goto out_blkdev_put;
|
||||||
|
|
||||||
|
holder = alloc_holder_object(sb);
|
||||||
|
if (!holder) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
err = "cannot allocate memory";
|
||||||
|
goto out_put_sb_page;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now reopen in exclusive mode with proper holder */
|
||||||
|
bdev2 = blkdev_get_by_dev(bdev->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
|
||||||
|
holder, NULL);
|
||||||
|
blkdev_put(bdev, NULL);
|
||||||
|
bdev = bdev2;
|
||||||
if (IS_ERR(bdev)) {
|
if (IS_ERR(bdev)) {
|
||||||
if (bdev == ERR_PTR(-EBUSY)) {
|
ret = PTR_ERR(bdev);
|
||||||
|
bdev = NULL;
|
||||||
|
if (ret == -EBUSY) {
|
||||||
dev_t dev;
|
dev_t dev;
|
||||||
|
|
||||||
mutex_lock(&bch_register_lock);
|
mutex_lock(&bch_register_lock);
|
||||||
@ -2571,20 +2584,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
|
|||||||
else
|
else
|
||||||
err = "device busy";
|
err = "device busy";
|
||||||
mutex_unlock(&bch_register_lock);
|
mutex_unlock(&bch_register_lock);
|
||||||
if (attr == &ksysfs_register_quiet)
|
if (attr == &ksysfs_register_quiet) {
|
||||||
goto done;
|
quiet = true;
|
||||||
|
ret = size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
goto out_free_sb;
|
goto out_free_holder;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = "failed to set blocksize";
|
|
||||||
if (set_blocksize(bdev, 4096))
|
|
||||||
goto out_blkdev_put;
|
|
||||||
|
|
||||||
err = read_super(sb, bdev, &sb_disk);
|
|
||||||
if (err)
|
|
||||||
goto out_blkdev_put;
|
|
||||||
|
|
||||||
err = "failed to register device";
|
err = "failed to register device";
|
||||||
|
|
||||||
if (async_registration) {
|
if (async_registration) {
|
||||||
@ -2595,59 +2602,46 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
|
|||||||
if (!args) {
|
if (!args) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
err = "cannot allocate memory";
|
err = "cannot allocate memory";
|
||||||
goto out_put_sb_page;
|
goto out_free_holder;
|
||||||
}
|
}
|
||||||
|
|
||||||
args->path = path;
|
args->path = path;
|
||||||
args->sb = sb;
|
args->sb = sb;
|
||||||
args->sb_disk = sb_disk;
|
args->sb_disk = sb_disk;
|
||||||
args->bdev = bdev;
|
args->bdev = bdev;
|
||||||
|
args->holder = holder;
|
||||||
register_device_async(args);
|
register_device_async(args);
|
||||||
/* No wait and returns to user space */
|
/* No wait and returns to user space */
|
||||||
goto async_done;
|
goto async_done;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (SB_IS_BDEV(sb)) {
|
if (SB_IS_BDEV(sb)) {
|
||||||
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
|
|
||||||
|
|
||||||
if (!dc) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
err = "cannot allocate memory";
|
|
||||||
goto out_put_sb_page;
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_lock(&bch_register_lock);
|
mutex_lock(&bch_register_lock);
|
||||||
ret = register_bdev(sb, sb_disk, bdev, dc);
|
ret = register_bdev(sb, sb_disk, bdev, holder);
|
||||||
mutex_unlock(&bch_register_lock);
|
mutex_unlock(&bch_register_lock);
|
||||||
/* blkdev_put() will be called in cached_dev_free() */
|
/* blkdev_put() will be called in cached_dev_free() */
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out_free_sb;
|
goto out_free_sb;
|
||||||
} else {
|
} else {
|
||||||
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
|
||||||
|
|
||||||
if (!ca) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
err = "cannot allocate memory";
|
|
||||||
goto out_put_sb_page;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* blkdev_put() will be called in bch_cache_release() */
|
/* blkdev_put() will be called in bch_cache_release() */
|
||||||
ret = register_cache(sb, sb_disk, bdev, ca);
|
ret = register_cache(sb, sb_disk, bdev, holder);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_free_sb;
|
goto out_free_sb;
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
|
||||||
kfree(sb);
|
kfree(sb);
|
||||||
kfree(path);
|
kfree(path);
|
||||||
module_put(THIS_MODULE);
|
module_put(THIS_MODULE);
|
||||||
async_done:
|
async_done:
|
||||||
return size;
|
return size;
|
||||||
|
|
||||||
|
out_free_holder:
|
||||||
|
kfree(holder);
|
||||||
out_put_sb_page:
|
out_put_sb_page:
|
||||||
put_page(virt_to_page(sb_disk));
|
put_page(virt_to_page(sb_disk));
|
||||||
out_blkdev_put:
|
out_blkdev_put:
|
||||||
blkdev_put(bdev, register_bcache);
|
if (bdev)
|
||||||
|
blkdev_put(bdev, holder);
|
||||||
out_free_sb:
|
out_free_sb:
|
||||||
kfree(sb);
|
kfree(sb);
|
||||||
out_free_path:
|
out_free_path:
|
||||||
@ -2656,7 +2650,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
|
|||||||
out_module_put:
|
out_module_put:
|
||||||
module_put(THIS_MODULE);
|
module_put(THIS_MODULE);
|
||||||
out:
|
out:
|
||||||
pr_info("error %s: %s\n", path?path:"", err);
|
if (!quiet)
|
||||||
|
pr_info("error %s: %s\n", path?path:"", err);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev)
|
|||||||
{
|
{
|
||||||
mutex_init(&mddev->open_mutex);
|
mutex_init(&mddev->open_mutex);
|
||||||
mutex_init(&mddev->reconfig_mutex);
|
mutex_init(&mddev->reconfig_mutex);
|
||||||
mutex_init(&mddev->delete_mutex);
|
|
||||||
mutex_init(&mddev->bitmap_info.mutex);
|
mutex_init(&mddev->bitmap_info.mutex);
|
||||||
INIT_LIST_HEAD(&mddev->disks);
|
INIT_LIST_HEAD(&mddev->disks);
|
||||||
INIT_LIST_HEAD(&mddev->all_mddevs);
|
INIT_LIST_HEAD(&mddev->all_mddevs);
|
||||||
@ -749,26 +748,15 @@ static void mddev_free(struct mddev *mddev)
|
|||||||
|
|
||||||
static const struct attribute_group md_redundancy_group;
|
static const struct attribute_group md_redundancy_group;
|
||||||
|
|
||||||
static void md_free_rdev(struct mddev *mddev)
|
void mddev_unlock(struct mddev *mddev)
|
||||||
{
|
{
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
struct md_rdev *tmp;
|
struct md_rdev *tmp;
|
||||||
|
LIST_HEAD(delete);
|
||||||
|
|
||||||
mutex_lock(&mddev->delete_mutex);
|
if (!list_empty(&mddev->deleting))
|
||||||
if (list_empty(&mddev->deleting))
|
list_splice_init(&mddev->deleting, &delete);
|
||||||
goto out;
|
|
||||||
|
|
||||||
list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) {
|
|
||||||
list_del_init(&rdev->same_set);
|
|
||||||
kobject_del(&rdev->kobj);
|
|
||||||
export_rdev(rdev, mddev);
|
|
||||||
}
|
|
||||||
out:
|
|
||||||
mutex_unlock(&mddev->delete_mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
void mddev_unlock(struct mddev *mddev)
|
|
||||||
{
|
|
||||||
if (mddev->to_remove) {
|
if (mddev->to_remove) {
|
||||||
/* These cannot be removed under reconfig_mutex as
|
/* These cannot be removed under reconfig_mutex as
|
||||||
* an access to the files will try to take reconfig_mutex
|
* an access to the files will try to take reconfig_mutex
|
||||||
@ -808,7 +796,11 @@ void mddev_unlock(struct mddev *mddev)
|
|||||||
} else
|
} else
|
||||||
mutex_unlock(&mddev->reconfig_mutex);
|
mutex_unlock(&mddev->reconfig_mutex);
|
||||||
|
|
||||||
md_free_rdev(mddev);
|
list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
|
||||||
|
list_del_init(&rdev->same_set);
|
||||||
|
kobject_del(&rdev->kobj);
|
||||||
|
export_rdev(rdev, mddev);
|
||||||
|
}
|
||||||
|
|
||||||
md_wakeup_thread(mddev->thread);
|
md_wakeup_thread(mddev->thread);
|
||||||
wake_up(&mddev->sb_wait);
|
wake_up(&mddev->sb_wait);
|
||||||
@ -2458,7 +2450,7 @@ static void export_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
|||||||
if (test_bit(AutoDetected, &rdev->flags))
|
if (test_bit(AutoDetected, &rdev->flags))
|
||||||
md_autodetect_dev(rdev->bdev->bd_dev);
|
md_autodetect_dev(rdev->bdev->bd_dev);
|
||||||
#endif
|
#endif
|
||||||
blkdev_put(rdev->bdev, mddev->major_version == -2 ? &claim_rdev : rdev);
|
blkdev_put(rdev->bdev, mddev->external ? &claim_rdev : rdev);
|
||||||
rdev->bdev = NULL;
|
rdev->bdev = NULL;
|
||||||
kobject_put(&rdev->kobj);
|
kobject_put(&rdev->kobj);
|
||||||
}
|
}
|
||||||
@ -2488,9 +2480,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
|
|||||||
* reconfig_mutex is held, hence it can't be called under
|
* reconfig_mutex is held, hence it can't be called under
|
||||||
* reconfig_mutex and it's delayed to mddev_unlock().
|
* reconfig_mutex and it's delayed to mddev_unlock().
|
||||||
*/
|
*/
|
||||||
mutex_lock(&mddev->delete_mutex);
|
|
||||||
list_add(&rdev->same_set, &mddev->deleting);
|
list_add(&rdev->same_set, &mddev->deleting);
|
||||||
mutex_unlock(&mddev->delete_mutex);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void export_array(struct mddev *mddev)
|
static void export_array(struct mddev *mddev)
|
||||||
@ -6140,7 +6130,7 @@ static void md_clean(struct mddev *mddev)
|
|||||||
mddev->resync_min = 0;
|
mddev->resync_min = 0;
|
||||||
mddev->resync_max = MaxSector;
|
mddev->resync_max = MaxSector;
|
||||||
mddev->reshape_position = MaxSector;
|
mddev->reshape_position = MaxSector;
|
||||||
mddev->external = 0;
|
/* we still need mddev->external in export_rdev, do not clear it yet */
|
||||||
mddev->persistent = 0;
|
mddev->persistent = 0;
|
||||||
mddev->level = LEVEL_NONE;
|
mddev->level = LEVEL_NONE;
|
||||||
mddev->clevel[0] = 0;
|
mddev->clevel[0] = 0;
|
||||||
|
@ -531,11 +531,9 @@ struct mddev {
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Temporarily store rdev that will be finally removed when
|
* Temporarily store rdev that will be finally removed when
|
||||||
* reconfig_mutex is unlocked.
|
* reconfig_mutex is unlocked, protected by reconfig_mutex.
|
||||||
*/
|
*/
|
||||||
struct list_head deleting;
|
struct list_head deleting;
|
||||||
/* Protect the deleting list */
|
|
||||||
struct mutex delete_mutex;
|
|
||||||
|
|
||||||
bool has_superblocks:1;
|
bool has_superblocks:1;
|
||||||
bool fail_last_dev:1;
|
bool fail_last_dev:1;
|
||||||
|
@ -270,6 +270,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
|||||||
goto abort;
|
goto abort;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (conf->layout == RAID0_ORIG_LAYOUT) {
|
||||||
|
for (i = 1; i < conf->nr_strip_zones; i++) {
|
||||||
|
sector_t first_sector = conf->strip_zone[i-1].zone_end;
|
||||||
|
|
||||||
|
sector_div(first_sector, mddev->chunk_sectors);
|
||||||
|
zone = conf->strip_zone + i;
|
||||||
|
/* disk_shift is first disk index used in the zone */
|
||||||
|
zone->disk_shift = sector_div(first_sector,
|
||||||
|
zone->nb_dev);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pr_debug("md/raid0:%s: done.\n", mdname(mddev));
|
pr_debug("md/raid0:%s: done.\n", mdname(mddev));
|
||||||
*private_conf = conf;
|
*private_conf = conf;
|
||||||
|
|
||||||
@ -431,6 +443,20 @@ static int raid0_run(struct mddev *mddev)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert disk_index to the disk order in which it is read/written.
|
||||||
|
* For example, if we have 4 disks, they are numbered 0,1,2,3. If we
|
||||||
|
* write the disks starting at disk 3, then the read/write order would
|
||||||
|
* be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift()
|
||||||
|
* to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map
|
||||||
|
* to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in
|
||||||
|
* that 'output' space to understand the read/write disk ordering.
|
||||||
|
*/
|
||||||
|
static int map_disk_shift(int disk_index, int num_disks, int disk_shift)
|
||||||
|
{
|
||||||
|
return ((disk_index + num_disks - disk_shift) % num_disks);
|
||||||
|
}
|
||||||
|
|
||||||
static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
||||||
{
|
{
|
||||||
struct r0conf *conf = mddev->private;
|
struct r0conf *conf = mddev->private;
|
||||||
@ -444,7 +470,9 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
|||||||
sector_t end_disk_offset;
|
sector_t end_disk_offset;
|
||||||
unsigned int end_disk_index;
|
unsigned int end_disk_index;
|
||||||
unsigned int disk;
|
unsigned int disk;
|
||||||
|
sector_t orig_start, orig_end;
|
||||||
|
|
||||||
|
orig_start = start;
|
||||||
zone = find_zone(conf, &start);
|
zone = find_zone(conf, &start);
|
||||||
|
|
||||||
if (bio_end_sector(bio) > zone->zone_end) {
|
if (bio_end_sector(bio) > zone->zone_end) {
|
||||||
@ -458,6 +486,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
|||||||
} else
|
} else
|
||||||
end = bio_end_sector(bio);
|
end = bio_end_sector(bio);
|
||||||
|
|
||||||
|
orig_end = end;
|
||||||
if (zone != conf->strip_zone)
|
if (zone != conf->strip_zone)
|
||||||
end = end - zone[-1].zone_end;
|
end = end - zone[-1].zone_end;
|
||||||
|
|
||||||
@ -469,13 +498,26 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
|||||||
last_stripe_index = end;
|
last_stripe_index = end;
|
||||||
sector_div(last_stripe_index, stripe_size);
|
sector_div(last_stripe_index, stripe_size);
|
||||||
|
|
||||||
start_disk_index = (int)(start - first_stripe_index * stripe_size) /
|
/* In the first zone the original and alternate layouts are the same */
|
||||||
mddev->chunk_sectors;
|
if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) {
|
||||||
|
sector_div(orig_start, mddev->chunk_sectors);
|
||||||
|
start_disk_index = sector_div(orig_start, zone->nb_dev);
|
||||||
|
start_disk_index = map_disk_shift(start_disk_index,
|
||||||
|
zone->nb_dev,
|
||||||
|
zone->disk_shift);
|
||||||
|
sector_div(orig_end, mddev->chunk_sectors);
|
||||||
|
end_disk_index = sector_div(orig_end, zone->nb_dev);
|
||||||
|
end_disk_index = map_disk_shift(end_disk_index,
|
||||||
|
zone->nb_dev, zone->disk_shift);
|
||||||
|
} else {
|
||||||
|
start_disk_index = (int)(start - first_stripe_index * stripe_size) /
|
||||||
|
mddev->chunk_sectors;
|
||||||
|
end_disk_index = (int)(end - last_stripe_index * stripe_size) /
|
||||||
|
mddev->chunk_sectors;
|
||||||
|
}
|
||||||
start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
|
start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
|
||||||
mddev->chunk_sectors) +
|
mddev->chunk_sectors) +
|
||||||
first_stripe_index * mddev->chunk_sectors;
|
first_stripe_index * mddev->chunk_sectors;
|
||||||
end_disk_index = (int)(end - last_stripe_index * stripe_size) /
|
|
||||||
mddev->chunk_sectors;
|
|
||||||
end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
|
end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
|
||||||
mddev->chunk_sectors) +
|
mddev->chunk_sectors) +
|
||||||
last_stripe_index * mddev->chunk_sectors;
|
last_stripe_index * mddev->chunk_sectors;
|
||||||
@ -483,18 +525,22 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
|
|||||||
for (disk = 0; disk < zone->nb_dev; disk++) {
|
for (disk = 0; disk < zone->nb_dev; disk++) {
|
||||||
sector_t dev_start, dev_end;
|
sector_t dev_start, dev_end;
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
|
int compare_disk;
|
||||||
|
|
||||||
if (disk < start_disk_index)
|
compare_disk = map_disk_shift(disk, zone->nb_dev,
|
||||||
|
zone->disk_shift);
|
||||||
|
|
||||||
|
if (compare_disk < start_disk_index)
|
||||||
dev_start = (first_stripe_index + 1) *
|
dev_start = (first_stripe_index + 1) *
|
||||||
mddev->chunk_sectors;
|
mddev->chunk_sectors;
|
||||||
else if (disk > start_disk_index)
|
else if (compare_disk > start_disk_index)
|
||||||
dev_start = first_stripe_index * mddev->chunk_sectors;
|
dev_start = first_stripe_index * mddev->chunk_sectors;
|
||||||
else
|
else
|
||||||
dev_start = start_disk_offset;
|
dev_start = start_disk_offset;
|
||||||
|
|
||||||
if (disk < end_disk_index)
|
if (compare_disk < end_disk_index)
|
||||||
dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
|
dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
|
||||||
else if (disk > end_disk_index)
|
else if (compare_disk > end_disk_index)
|
||||||
dev_end = last_stripe_index * mddev->chunk_sectors;
|
dev_end = last_stripe_index * mddev->chunk_sectors;
|
||||||
else
|
else
|
||||||
dev_end = end_disk_offset;
|
dev_end = end_disk_offset;
|
||||||
|
@ -6,6 +6,7 @@ struct strip_zone {
|
|||||||
sector_t zone_end; /* Start of the next zone (in sectors) */
|
sector_t zone_end; /* Start of the next zone (in sectors) */
|
||||||
sector_t dev_start; /* Zone offset in real dev (in sectors) */
|
sector_t dev_start; /* Zone offset in real dev (in sectors) */
|
||||||
int nb_dev; /* # of devices attached to the zone */
|
int nb_dev; /* # of devices attached to the zone */
|
||||||
|
int disk_shift; /* start disk for the original layout */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Linux 3.14 (20d0189b101) made an unintended change to
|
/* Linux 3.14 (20d0189b101) made an unintended change to
|
||||||
|
@ -116,7 +116,7 @@ static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp,
|
|||||||
|
|
||||||
static inline void raid1_submit_write(struct bio *bio)
|
static inline void raid1_submit_write(struct bio *bio)
|
||||||
{
|
{
|
||||||
struct md_rdev *rdev = (struct md_rdev *)bio->bi_bdev;
|
struct md_rdev *rdev = (void *)bio->bi_bdev;
|
||||||
|
|
||||||
bio->bi_next = NULL;
|
bio->bi_next = NULL;
|
||||||
bio_set_dev(bio, rdev->bdev);
|
bio_set_dev(bio, rdev->bdev);
|
||||||
|
@ -325,7 +325,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
|
|||||||
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
|
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
|
||||||
bio->bi_status = BLK_STS_IOERR;
|
bio->bi_status = BLK_STS_IOERR;
|
||||||
|
|
||||||
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
|
if (r10_bio->start_time)
|
||||||
bio_end_io_acct(bio, r10_bio->start_time);
|
bio_end_io_acct(bio, r10_bio->start_time);
|
||||||
bio_endio(bio);
|
bio_endio(bio);
|
||||||
/*
|
/*
|
||||||
@ -1118,7 +1118,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
|||||||
spin_lock_irq(&conf->device_lock);
|
spin_lock_irq(&conf->device_lock);
|
||||||
bio_list_merge(&conf->pending_bio_list, &plug->pending);
|
bio_list_merge(&conf->pending_bio_list, &plug->pending);
|
||||||
spin_unlock_irq(&conf->device_lock);
|
spin_unlock_irq(&conf->device_lock);
|
||||||
wake_up(&conf->wait_barrier);
|
wake_up_barrier(conf);
|
||||||
md_wakeup_thread(mddev->thread);
|
md_wakeup_thread(mddev->thread);
|
||||||
kfree(plug);
|
kfree(plug);
|
||||||
return;
|
return;
|
||||||
@ -1127,7 +1127,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
|||||||
/* we aren't scheduling, so we can do the write-out directly. */
|
/* we aren't scheduling, so we can do the write-out directly. */
|
||||||
bio = bio_list_get(&plug->pending);
|
bio = bio_list_get(&plug->pending);
|
||||||
raid1_prepare_flush_writes(mddev->bitmap);
|
raid1_prepare_flush_writes(mddev->bitmap);
|
||||||
wake_up(&conf->wait_barrier);
|
wake_up_barrier(conf);
|
||||||
|
|
||||||
while (bio) { /* submit pending writes */
|
while (bio) { /* submit pending writes */
|
||||||
struct bio *next = bio->bi_next;
|
struct bio *next = bio->bi_next;
|
||||||
|
@ -12,7 +12,7 @@ static const char * const nvme_ops[] = {
|
|||||||
[nvme_cmd_read] = "Read",
|
[nvme_cmd_read] = "Read",
|
||||||
[nvme_cmd_write_uncor] = "Write Uncorrectable",
|
[nvme_cmd_write_uncor] = "Write Uncorrectable",
|
||||||
[nvme_cmd_compare] = "Compare",
|
[nvme_cmd_compare] = "Compare",
|
||||||
[nvme_cmd_write_zeroes] = "Write Zeros",
|
[nvme_cmd_write_zeroes] = "Write Zeroes",
|
||||||
[nvme_cmd_dsm] = "Dataset Management",
|
[nvme_cmd_dsm] = "Dataset Management",
|
||||||
[nvme_cmd_verify] = "Verify",
|
[nvme_cmd_verify] = "Verify",
|
||||||
[nvme_cmd_resv_register] = "Reservation Register",
|
[nvme_cmd_resv_register] = "Reservation Register",
|
||||||
|
@ -1134,8 +1134,11 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
|
|||||||
mutex_unlock(&ctrl->scan_lock);
|
mutex_unlock(&ctrl->scan_lock);
|
||||||
}
|
}
|
||||||
if (effects & NVME_CMD_EFFECTS_CCC) {
|
if (effects & NVME_CMD_EFFECTS_CCC) {
|
||||||
dev_info(ctrl->device,
|
if (!test_and_set_bit(NVME_CTRL_DIRTY_CAPABILITY,
|
||||||
|
&ctrl->flags)) {
|
||||||
|
dev_info(ctrl->device,
|
||||||
"controller capabilities changed, reset may be required to take effect.\n");
|
"controller capabilities changed, reset may be required to take effect.\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
|
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
|
||||||
nvme_queue_scan(ctrl);
|
nvme_queue_scan(ctrl);
|
||||||
@ -3177,6 +3180,7 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags);
|
||||||
ctrl->identified = true;
|
ctrl->identified = true;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -505,7 +505,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
|
|||||||
{
|
{
|
||||||
struct io_uring_cmd *ioucmd = req->end_io_data;
|
struct io_uring_cmd *ioucmd = req->end_io_data;
|
||||||
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
|
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
|
||||||
void *cookie = READ_ONCE(ioucmd->cookie);
|
|
||||||
|
|
||||||
req->bio = pdu->bio;
|
req->bio = pdu->bio;
|
||||||
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
|
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
|
||||||
@ -518,10 +517,12 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
|
|||||||
* For iopoll, complete it directly.
|
* For iopoll, complete it directly.
|
||||||
* Otherwise, move the completion to task work.
|
* Otherwise, move the completion to task work.
|
||||||
*/
|
*/
|
||||||
if (cookie != NULL && blk_rq_is_poll(req))
|
if (blk_rq_is_poll(req)) {
|
||||||
|
WRITE_ONCE(ioucmd->cookie, NULL);
|
||||||
nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
|
nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
|
||||||
else
|
} else {
|
||||||
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
|
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
|
||||||
|
}
|
||||||
|
|
||||||
return RQ_END_IO_FREE;
|
return RQ_END_IO_FREE;
|
||||||
}
|
}
|
||||||
@ -531,7 +532,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
|
|||||||
{
|
{
|
||||||
struct io_uring_cmd *ioucmd = req->end_io_data;
|
struct io_uring_cmd *ioucmd = req->end_io_data;
|
||||||
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
|
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
|
||||||
void *cookie = READ_ONCE(ioucmd->cookie);
|
|
||||||
|
|
||||||
req->bio = pdu->bio;
|
req->bio = pdu->bio;
|
||||||
pdu->req = req;
|
pdu->req = req;
|
||||||
@ -540,10 +540,12 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
|
|||||||
* For iopoll, complete it directly.
|
* For iopoll, complete it directly.
|
||||||
* Otherwise, move the completion to task work.
|
* Otherwise, move the completion to task work.
|
||||||
*/
|
*/
|
||||||
if (cookie != NULL && blk_rq_is_poll(req))
|
if (blk_rq_is_poll(req)) {
|
||||||
|
WRITE_ONCE(ioucmd->cookie, NULL);
|
||||||
nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
|
nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
|
||||||
else
|
} else {
|
||||||
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
|
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
|
||||||
|
}
|
||||||
|
|
||||||
return RQ_END_IO_NONE;
|
return RQ_END_IO_NONE;
|
||||||
}
|
}
|
||||||
@ -599,7 +601,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
|||||||
if (issue_flags & IO_URING_F_IOPOLL)
|
if (issue_flags & IO_URING_F_IOPOLL)
|
||||||
rq_flags |= REQ_POLLED;
|
rq_flags |= REQ_POLLED;
|
||||||
|
|
||||||
retry:
|
|
||||||
req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags);
|
req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags);
|
||||||
if (IS_ERR(req))
|
if (IS_ERR(req))
|
||||||
return PTR_ERR(req);
|
return PTR_ERR(req);
|
||||||
@ -613,17 +614,11 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) {
|
if (blk_rq_is_poll(req)) {
|
||||||
if (unlikely(!req->bio)) {
|
ioucmd->flags |= IORING_URING_CMD_POLLED;
|
||||||
/* we can't poll this, so alloc regular req instead */
|
WRITE_ONCE(ioucmd->cookie, req);
|
||||||
blk_mq_free_request(req);
|
|
||||||
rq_flags &= ~REQ_POLLED;
|
|
||||||
goto retry;
|
|
||||||
} else {
|
|
||||||
WRITE_ONCE(ioucmd->cookie, req->bio);
|
|
||||||
req->bio->bi_opf |= REQ_POLLED;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* to free bio on completion, as req->bio will be null at that time */
|
/* to free bio on completion, as req->bio will be null at that time */
|
||||||
pdu->bio = req->bio;
|
pdu->bio = req->bio;
|
||||||
pdu->meta_len = d.metadata_len;
|
pdu->meta_len = d.metadata_len;
|
||||||
@ -785,18 +780,16 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
|
|||||||
struct io_comp_batch *iob,
|
struct io_comp_batch *iob,
|
||||||
unsigned int poll_flags)
|
unsigned int poll_flags)
|
||||||
{
|
{
|
||||||
struct bio *bio;
|
struct request *req;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
struct nvme_ns *ns;
|
|
||||||
struct request_queue *q;
|
if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
|
||||||
|
return 0;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
bio = READ_ONCE(ioucmd->cookie);
|
req = READ_ONCE(ioucmd->cookie);
|
||||||
ns = container_of(file_inode(ioucmd->file)->i_cdev,
|
if (req && blk_rq_is_poll(req))
|
||||||
struct nvme_ns, cdev);
|
ret = blk_rq_poll(req, iob, poll_flags);
|
||||||
q = ns->queue;
|
|
||||||
if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev)
|
|
||||||
ret = bio_poll(bio, iob, poll_flags);
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -890,31 +883,6 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
|
|||||||
srcu_read_unlock(&head->srcu, srcu_idx);
|
srcu_read_unlock(&head->srcu, srcu_idx);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
|
|
||||||
struct io_comp_batch *iob,
|
|
||||||
unsigned int poll_flags)
|
|
||||||
{
|
|
||||||
struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
|
|
||||||
struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
|
|
||||||
int srcu_idx = srcu_read_lock(&head->srcu);
|
|
||||||
struct nvme_ns *ns = nvme_find_path(head);
|
|
||||||
struct bio *bio;
|
|
||||||
int ret = 0;
|
|
||||||
struct request_queue *q;
|
|
||||||
|
|
||||||
if (ns) {
|
|
||||||
rcu_read_lock();
|
|
||||||
bio = READ_ONCE(ioucmd->cookie);
|
|
||||||
q = ns->queue;
|
|
||||||
if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio
|
|
||||||
&& bio->bi_bdev)
|
|
||||||
ret = bio_poll(bio, iob, poll_flags);
|
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
|
||||||
srcu_read_unlock(&head->srcu, srcu_idx);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_NVME_MULTIPATH */
|
#endif /* CONFIG_NVME_MULTIPATH */
|
||||||
|
|
||||||
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
|
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
|
||||||
|
@ -106,6 +106,14 @@ void nvme_failover_req(struct request *req)
|
|||||||
bio->bi_opf &= ~REQ_POLLED;
|
bio->bi_opf &= ~REQ_POLLED;
|
||||||
bio->bi_cookie = BLK_QC_T_NONE;
|
bio->bi_cookie = BLK_QC_T_NONE;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* The alternate request queue that we may end up submitting
|
||||||
|
* the bio to may be frozen temporarily, in this case REQ_NOWAIT
|
||||||
|
* will fail the I/O immediately with EAGAIN to the issuer.
|
||||||
|
* We are not in the issuer context which cannot block. Clear
|
||||||
|
* the flag to avoid spurious EAGAIN I/O failures.
|
||||||
|
*/
|
||||||
|
bio->bi_opf &= ~REQ_NOWAIT;
|
||||||
}
|
}
|
||||||
blk_steal_bios(&ns->head->requeue_list, req);
|
blk_steal_bios(&ns->head->requeue_list, req);
|
||||||
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
|
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
|
||||||
@ -470,7 +478,7 @@ static const struct file_operations nvme_ns_head_chr_fops = {
|
|||||||
.unlocked_ioctl = nvme_ns_head_chr_ioctl,
|
.unlocked_ioctl = nvme_ns_head_chr_ioctl,
|
||||||
.compat_ioctl = compat_ptr_ioctl,
|
.compat_ioctl = compat_ptr_ioctl,
|
||||||
.uring_cmd = nvme_ns_head_chr_uring_cmd,
|
.uring_cmd = nvme_ns_head_chr_uring_cmd,
|
||||||
.uring_cmd_iopoll = nvme_ns_head_chr_uring_cmd_iopoll,
|
.uring_cmd_iopoll = nvme_ns_chr_uring_cmd_iopoll,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
|
static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
|
||||||
|
@ -250,6 +250,7 @@ enum nvme_ctrl_flags {
|
|||||||
NVME_CTRL_STARTED_ONCE = 2,
|
NVME_CTRL_STARTED_ONCE = 2,
|
||||||
NVME_CTRL_STOPPED = 3,
|
NVME_CTRL_STOPPED = 3,
|
||||||
NVME_CTRL_SKIP_ID_CNS_CS = 4,
|
NVME_CTRL_SKIP_ID_CNS_CS = 4,
|
||||||
|
NVME_CTRL_DIRTY_CAPABILITY = 5,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nvme_ctrl {
|
struct nvme_ctrl {
|
||||||
@ -856,8 +857,6 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
|
|||||||
unsigned long arg);
|
unsigned long arg);
|
||||||
int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
|
int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
|
||||||
struct io_comp_batch *iob, unsigned int poll_flags);
|
struct io_comp_batch *iob, unsigned int poll_flags);
|
||||||
int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
|
|
||||||
struct io_comp_batch *iob, unsigned int poll_flags);
|
|
||||||
int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
|
int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
|
||||||
unsigned int issue_flags);
|
unsigned int issue_flags);
|
||||||
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
|
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
|
||||||
|
@ -2690,7 +2690,8 @@ static void nvme_reset_work(struct work_struct *work)
|
|||||||
if (dev->ctrl.state != NVME_CTRL_RESETTING) {
|
if (dev->ctrl.state != NVME_CTRL_RESETTING) {
|
||||||
dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
|
dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
|
||||||
dev->ctrl.state);
|
dev->ctrl.state);
|
||||||
return;
|
result = -ENODEV;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2777,7 +2778,9 @@ static void nvme_reset_work(struct work_struct *work)
|
|||||||
result);
|
result);
|
||||||
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
|
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
|
||||||
nvme_dev_disable(dev, true);
|
nvme_dev_disable(dev, true);
|
||||||
|
nvme_sync_queues(&dev->ctrl);
|
||||||
nvme_mark_namespaces_dead(&dev->ctrl);
|
nvme_mark_namespaces_dead(&dev->ctrl);
|
||||||
|
nvme_unquiesce_io_queues(&dev->ctrl);
|
||||||
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
|
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,8 +79,8 @@ struct nvmet_ns {
|
|||||||
struct completion disable_done;
|
struct completion disable_done;
|
||||||
mempool_t *bvec_pool;
|
mempool_t *bvec_pool;
|
||||||
|
|
||||||
int use_p2pmem;
|
|
||||||
struct pci_dev *p2p_dev;
|
struct pci_dev *p2p_dev;
|
||||||
|
int use_p2pmem;
|
||||||
int pi_type;
|
int pi_type;
|
||||||
int metadata_size;
|
int metadata_size;
|
||||||
u8 csi;
|
u8 csi;
|
||||||
|
@ -715,6 +715,8 @@ int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
|
|||||||
void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
|
void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
|
||||||
|
|
||||||
void blk_mq_free_request(struct request *rq);
|
void blk_mq_free_request(struct request *rq);
|
||||||
|
int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
|
||||||
|
unsigned int poll_flags);
|
||||||
|
|
||||||
bool blk_mq_queue_inflight(struct request_queue *q);
|
bool blk_mq_queue_inflight(struct request_queue *q);
|
||||||
|
|
||||||
@ -852,7 +854,11 @@ static inline bool blk_mq_add_to_batch(struct request *req,
|
|||||||
struct io_comp_batch *iob, int ioerror,
|
struct io_comp_batch *iob, int ioerror,
|
||||||
void (*complete)(struct io_comp_batch *))
|
void (*complete)(struct io_comp_batch *))
|
||||||
{
|
{
|
||||||
if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror ||
|
/*
|
||||||
|
* blk_mq_end_request_batch() can't end request allocated from
|
||||||
|
* sched tags
|
||||||
|
*/
|
||||||
|
if (!iob || (req->rq_flags & RQF_SCHED_TAGS) || ioerror ||
|
||||||
(req->end_io && !blk_rq_is_passthrough(req)))
|
(req->end_io && !blk_rq_is_passthrough(req)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -244,8 +244,10 @@ enum io_uring_op {
|
|||||||
* sqe->uring_cmd_flags
|
* sqe->uring_cmd_flags
|
||||||
* IORING_URING_CMD_FIXED use registered buffer; pass this flag
|
* IORING_URING_CMD_FIXED use registered buffer; pass this flag
|
||||||
* along with setting sqe->buf_index.
|
* along with setting sqe->buf_index.
|
||||||
|
* IORING_URING_CMD_POLLED driver use only
|
||||||
*/
|
*/
|
||||||
#define IORING_URING_CMD_FIXED (1U << 0)
|
#define IORING_URING_CMD_FIXED (1U << 0)
|
||||||
|
#define IORING_URING_CMD_POLLED (1U << 31)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user