mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-09 06:43:09 +00:00
for-5.5/block-20191121
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl3WxrEQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpuH5D/9qQKfIIuQDUNO4Xx+dIHimTDCrfiEOeO9e CRaMuSj+yMxLDMwfX8RnDmR17H3ZVoiIY1CT24U9ZkA5iDjeAH4xmzkH30US7LR7 /64YVZTxB0OrWppRK8RiIhaJJZDQ6+HPUQsn6PRaLVuFHi2unMoTQnj/ZQKz03QA Pl8Xx7qBtH1JwYCzQ21f/uryAcNg9eWabRLN2f1uiOXLmvRxOfh6Z/iaezlaZlmL qeJdcdLjjvOgOPwEOfNjfS6pd+XBz3gdEhn0l+11nHITxWZmVBwsWTKyUQlCmKnl yuCWDVyx5d6zCnlrLYG0l2Fn2lr9SwAkdkq3YAKV03hA/6s6P9q9bm31VvOf828x 7gmr4YVz68y7H9bM0QAHCvDpjll0aIEUw6XFzSOCDtZ9B6/pppYQWzMU71J05eyF 8DOKv2M2EVNLUjf6u0RDyolnWGU0kIjt5ryWE3OsGcezAVa2wYstgUJTKbrn1YgT j+4KTpaI+sg8GKDFauvxcSa6gwoRp6jweFNW+7vC090/shXmrGmVLOnQZKRuHho/ O4W8y/1/deM8CCIAETpiNxA8RV5U/EZygrFGDFc7yzTtVDGHY356M/B4Bmm2qkVu K3WgeZp8Fc0lH0QF6Pp9ZlBkZEpGNCAPVsPkXIsxQXbctftkn3KY//uIubfpFEB1 PpHSicvkww== =HYYq -----END PGP SIGNATURE----- Merge tag 'for-5.5/block-20191121' of git://git.kernel.dk/linux-block Pull core block updates from Jens Axboe: "Due to more granular branches, this one is small and will be followed with other core branches that add specific features. I meant to just have a core and drivers branch, but external dependencies we ended up adding a few more that are also core. The changes are: - Fixes and improvements for the zoned device support (Ajay, Damien) - sed-opal table writing and datastore UID (Revanth) - blk-cgroup (and bfq) blk-cgroup stat fixes (Tejun) - Improvements to the block stats tracking (Pavel) - Fix for overruning sysfs buffer for large number of CPUs (Ming) - Optimization for small IO (Ming, Christoph) - Fix typo in RWH lifetime hint (Eugene) - Dead code removal and documentation (Bart) - Reduction in memory usage for queue and tag set (Bart) - Kerneldoc header documentation (André) - Device/partition revalidation fixes (Jan) - Stats tracking for flush requests (Konstantin) - Various other little fixes here and there (et al)" * tag 'for-5.5/block-20191121' of git://git.kernel.dk/linux-block: (48 commits) Revert "block: split bio if the only bvec's length is > SZ_4K" block: add iostat counters for flush requests block,bfq: Skip tracing hooks if possible block: sed-opal: Introduce SUM_SET_LIST parameter and append it using 'add_token_u64' blk-cgroup: cgroup_rstat_updated() shouldn't be called on cgroup1 block: Don't disable interrupts in trigger_softirq() sbitmap: Delete sbitmap_any_bit_clear() blk-mq: Delete blk_mq_has_free_tags() and blk_mq_can_queue() block: split bio if the only bvec's length is > SZ_4K block: still try to split bio if the bvec crosses pages blk-cgroup: separate out blkg_rwstat under CONFIG_BLK_CGROUP_RWSTAT blk-cgroup: reimplement basic IO stats using cgroup rstat blk-cgroup: remove now unused blkg_print_stat_{bytes|ios}_recursive() blk-throtl: stop using blkg->stat_bytes and ->stat_ios bfq-iosched: stop using blkg->stat_bytes and ->stat_ios bfq-iosched: relocate bfqg_*rwstat*() helpers block: add zone open, close and finish ioctl support block: add zone open, close and finish operations block: Simplify REQ_OP_ZONE_RESET_ALL handling block: Remove REQ_OP_ZONE_RESET plugging ...
This commit is contained in:
commit
ff6814b078
@ -29,4 +29,9 @@ Description:
|
||||
17 - sectors discarded
|
||||
18 - time spent discarding
|
||||
|
||||
Kernel 5.5+ appends two more fields for flush requests:
|
||||
|
||||
19 - flush requests completed successfully
|
||||
20 - time spent flushing
|
||||
|
||||
For more details refer to Documentation/admin-guide/iostats.rst
|
||||
|
@ -15,6 +15,12 @@ Description:
|
||||
9 - I/Os currently in progress
|
||||
10 - time spent doing I/Os (ms)
|
||||
11 - weighted time spent doing I/Os (ms)
|
||||
12 - discards completed
|
||||
13 - discards merged
|
||||
14 - sectors discarded
|
||||
15 - time spent discarding (ms)
|
||||
16 - flush requests completed
|
||||
17 - time spent flushing (ms)
|
||||
For more details refer Documentation/admin-guide/iostats.rst
|
||||
|
||||
|
||||
|
@ -121,6 +121,15 @@ Field 15 -- # of milliseconds spent discarding
|
||||
This is the total number of milliseconds spent by all discards (as
|
||||
measured from __make_request() to end_that_request_last()).
|
||||
|
||||
Field 16 -- # of flush requests completed
|
||||
This is the total number of flush requests completed successfully.
|
||||
|
||||
Block layer combines flush requests and executes at most one at a time.
|
||||
This counts flush requests executed by disk. Not tracked for partitions.
|
||||
|
||||
Field 17 -- # of milliseconds spent flushing
|
||||
This is the total number of milliseconds spent by all flush requests.
|
||||
|
||||
To avoid introducing performance bottlenecks, no locks are held while
|
||||
modifying these counters. This implies that minor inaccuracies may be
|
||||
introduced when changes collide, so (for instance) adding up all the
|
||||
|
@ -41,6 +41,8 @@ discard I/Os requests number of discard I/Os processed
|
||||
discard merges requests number of discard I/Os merged with in-queue I/O
|
||||
discard sectors sectors number of sectors discarded
|
||||
discard ticks milliseconds total wait time for discard requests
|
||||
flush I/Os requests number of flush I/Os processed
|
||||
flush ticks milliseconds total wait time for flush requests
|
||||
=============== ============= =================================================
|
||||
|
||||
read I/Os, write I/Os, discard I/0s
|
||||
@ -48,6 +50,14 @@ read I/Os, write I/Os, discard I/0s
|
||||
|
||||
These values increment when an I/O request completes.
|
||||
|
||||
flush I/Os
|
||||
==========
|
||||
|
||||
These values increment when an flush I/O request completes.
|
||||
|
||||
Block layer combines flush requests and executes at most one at a time.
|
||||
This counts flush requests executed by disk. Not tracked for partitions.
|
||||
|
||||
read merges, write merges, discard merges
|
||||
=========================================
|
||||
|
||||
@ -62,8 +72,8 @@ discarded from this block device. The "sectors" in question are the
|
||||
standard UNIX 512-byte sectors, not any device- or filesystem-specific
|
||||
block size. The counters are incremented when the I/O completes.
|
||||
|
||||
read ticks, write ticks, discard ticks
|
||||
======================================
|
||||
read ticks, write ticks, discard ticks, flush ticks
|
||||
===================================================
|
||||
|
||||
These values count the number of milliseconds that I/O requests have
|
||||
waited on this block device. If there are multiple I/O requests waiting,
|
||||
|
@ -32,6 +32,9 @@ config BLK_RQ_ALLOC_TIME
|
||||
config BLK_SCSI_REQUEST
|
||||
bool
|
||||
|
||||
config BLK_CGROUP_RWSTAT
|
||||
bool
|
||||
|
||||
config BLK_DEV_BSG
|
||||
bool "Block layer SG support v4"
|
||||
default y
|
||||
@ -86,6 +89,7 @@ config BLK_DEV_ZONED
|
||||
config BLK_DEV_THROTTLING
|
||||
bool "Block layer bio throttling support"
|
||||
depends on BLK_CGROUP=y
|
||||
select BLK_CGROUP_RWSTAT
|
||||
---help---
|
||||
Block layer bio throttling support. It can be used to limit
|
||||
the IO rate to a device. IO rate policies are per cgroup and
|
||||
|
@ -31,6 +31,7 @@ config IOSCHED_BFQ
|
||||
config BFQ_GROUP_IOSCHED
|
||||
bool "BFQ hierarchical scheduling support"
|
||||
depends on IOSCHED_BFQ && BLK_CGROUP
|
||||
select BLK_CGROUP_RWSTAT
|
||||
---help---
|
||||
|
||||
Enable hierarchical scheduling in BFQ, using the blkio
|
||||
|
@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
|
||||
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
|
||||
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
|
||||
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
|
||||
obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
|
||||
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o
|
||||
|
@ -347,6 +347,14 @@ void bfqg_and_blkg_put(struct bfq_group *bfqg)
|
||||
bfqg_put(bfqg);
|
||||
}
|
||||
|
||||
void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
|
||||
|
||||
blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
|
||||
blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
|
||||
}
|
||||
|
||||
/* @stats = 0 */
|
||||
static void bfqg_stats_reset(struct bfqg_stats *stats)
|
||||
{
|
||||
@ -431,6 +439,8 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
|
||||
|
||||
static void bfqg_stats_exit(struct bfqg_stats *stats)
|
||||
{
|
||||
blkg_rwstat_exit(&stats->bytes);
|
||||
blkg_rwstat_exit(&stats->ios);
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
blkg_rwstat_exit(&stats->merged);
|
||||
blkg_rwstat_exit(&stats->service_time);
|
||||
@ -448,6 +458,10 @@ static void bfqg_stats_exit(struct bfqg_stats *stats)
|
||||
|
||||
static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
|
||||
{
|
||||
if (blkg_rwstat_init(&stats->bytes, gfp) ||
|
||||
blkg_rwstat_init(&stats->ios, gfp))
|
||||
return -ENOMEM;
|
||||
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
if (blkg_rwstat_init(&stats->merged, gfp) ||
|
||||
blkg_rwstat_init(&stats->service_time, gfp) ||
|
||||
@ -1057,6 +1071,30 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
|
||||
return bfq_io_set_device_weight(of, buf, nbytes, off);
|
||||
}
|
||||
|
||||
static int bfqg_print_rwstat(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
|
||||
&blkcg_policy_bfq, seq_cft(sf)->private, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
struct blkg_rwstat_sample sum;
|
||||
|
||||
blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
|
||||
return __blkg_prfill_rwstat(sf, pd, &sum);
|
||||
}
|
||||
|
||||
static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
|
||||
seq_cft(sf)->private, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
static int bfqg_print_stat(struct seq_file *sf, void *v)
|
||||
{
|
||||
@ -1065,13 +1103,6 @@ static int bfqg_print_stat(struct seq_file *sf, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bfqg_print_rwstat(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
|
||||
&blkcg_policy_bfq, seq_cft(sf)->private, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
@ -1097,15 +1128,6 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
|
||||
return __blkg_prfill_u64(sf, pd, sum);
|
||||
}
|
||||
|
||||
static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
struct blkg_rwstat_sample sum;
|
||||
|
||||
blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
|
||||
return __blkg_prfill_rwstat(sf, pd, &sum);
|
||||
}
|
||||
|
||||
static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
@ -1114,18 +1136,11 @@ static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
|
||||
seq_cft(sf)->private, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
int off)
|
||||
{
|
||||
u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
|
||||
struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
|
||||
u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
|
||||
|
||||
return __blkg_prfill_u64(sf, pd, sum >> 9);
|
||||
}
|
||||
@ -1142,8 +1157,8 @@ static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
|
||||
{
|
||||
struct blkg_rwstat_sample tmp;
|
||||
|
||||
blkg_rwstat_recursive_sum(pd->blkg, NULL,
|
||||
offsetof(struct blkcg_gq, stat_bytes), &tmp);
|
||||
blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
|
||||
offsetof(struct bfq_group, stats.bytes), &tmp);
|
||||
|
||||
return __blkg_prfill_u64(sf, pd,
|
||||
(tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
|
||||
@ -1226,13 +1241,13 @@ struct cftype bfq_blkcg_legacy_files[] = {
|
||||
/* statistics, covers only the tasks in the bfqg */
|
||||
{
|
||||
.name = "bfq.io_service_bytes",
|
||||
.private = (unsigned long)&blkcg_policy_bfq,
|
||||
.seq_show = blkg_print_stat_bytes,
|
||||
.private = offsetof(struct bfq_group, stats.bytes),
|
||||
.seq_show = bfqg_print_rwstat,
|
||||
},
|
||||
{
|
||||
.name = "bfq.io_serviced",
|
||||
.private = (unsigned long)&blkcg_policy_bfq,
|
||||
.seq_show = blkg_print_stat_ios,
|
||||
.private = offsetof(struct bfq_group, stats.ios),
|
||||
.seq_show = bfqg_print_rwstat,
|
||||
},
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
{
|
||||
@ -1269,13 +1284,13 @@ struct cftype bfq_blkcg_legacy_files[] = {
|
||||
/* the same statistics which cover the bfqg and its descendants */
|
||||
{
|
||||
.name = "bfq.io_service_bytes_recursive",
|
||||
.private = (unsigned long)&blkcg_policy_bfq,
|
||||
.seq_show = blkg_print_stat_bytes_recursive,
|
||||
.private = offsetof(struct bfq_group, stats.bytes),
|
||||
.seq_show = bfqg_print_rwstat_recursive,
|
||||
},
|
||||
{
|
||||
.name = "bfq.io_serviced_recursive",
|
||||
.private = (unsigned long)&blkcg_policy_bfq,
|
||||
.seq_show = blkg_print_stat_ios_recursive,
|
||||
.private = offsetof(struct bfq_group, stats.ios),
|
||||
.seq_show = bfqg_print_rwstat_recursive,
|
||||
},
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
{
|
||||
|
@ -5484,6 +5484,10 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool idle_timer_disabled = false;
|
||||
unsigned int cmd_flags;
|
||||
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio)
|
||||
bfqg_stats_update_legacy_io(q, rq);
|
||||
#endif
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
if (blk_mq_sched_try_insert_merge(q, rq)) {
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
#include "blk-cgroup-rwstat.h"
|
||||
|
||||
#define BFQ_IOPRIO_CLASSES 3
|
||||
#define BFQ_CL_IDLE_TIMEOUT (HZ/5)
|
||||
|
||||
@ -809,6 +811,9 @@ struct bfq_stat {
|
||||
};
|
||||
|
||||
struct bfqg_stats {
|
||||
/* basic stats */
|
||||
struct blkg_rwstat bytes;
|
||||
struct blkg_rwstat ios;
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
/* number of ios merged */
|
||||
struct blkg_rwstat merged;
|
||||
@ -956,6 +961,7 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
|
||||
|
||||
/* ---------------- cgroups-support interface ---------------- */
|
||||
|
||||
void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq);
|
||||
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
|
||||
unsigned int op);
|
||||
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
|
||||
@ -1062,6 +1068,8 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
|
||||
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
|
||||
char pid_str[MAX_PID_STR_LENGTH]; \
|
||||
if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \
|
||||
break; \
|
||||
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \
|
||||
blk_add_cgroup_trace_msg((bfqd)->queue, \
|
||||
bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
|
||||
@ -1078,6 +1086,8 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
|
||||
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
|
||||
char pid_str[MAX_PID_STR_LENGTH]; \
|
||||
if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \
|
||||
break; \
|
||||
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \
|
||||
blk_add_trace_msg((bfqd)->queue, "bfq%s%c " fmt, pid_str, \
|
||||
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
|
||||
|
129
block/blk-cgroup-rwstat.c
Normal file
129
block/blk-cgroup-rwstat.c
Normal file
@ -0,0 +1,129 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT.
|
||||
* Do not use in new code.
|
||||
*/
|
||||
#include "blk-cgroup-rwstat.h"
|
||||
|
||||
int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++) {
|
||||
ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
|
||||
if (ret) {
|
||||
while (--i >= 0)
|
||||
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
|
||||
return ret;
|
||||
}
|
||||
atomic64_set(&rwstat->aux_cnt[i], 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_rwstat_init);
|
||||
|
||||
void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_rwstat_exit);
|
||||
|
||||
/**
|
||||
* __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
|
||||
* @sf: seq_file to print to
|
||||
* @pd: policy private data of interest
|
||||
* @rwstat: rwstat to print
|
||||
*
|
||||
* Print @rwstat to @sf for the device assocaited with @pd.
|
||||
*/
|
||||
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
const struct blkg_rwstat_sample *rwstat)
|
||||
{
|
||||
static const char *rwstr[] = {
|
||||
[BLKG_RWSTAT_READ] = "Read",
|
||||
[BLKG_RWSTAT_WRITE] = "Write",
|
||||
[BLKG_RWSTAT_SYNC] = "Sync",
|
||||
[BLKG_RWSTAT_ASYNC] = "Async",
|
||||
[BLKG_RWSTAT_DISCARD] = "Discard",
|
||||
};
|
||||
const char *dname = blkg_dev_name(pd->blkg);
|
||||
u64 v;
|
||||
int i;
|
||||
|
||||
if (!dname)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
|
||||
rwstat->cnt[i]);
|
||||
|
||||
v = rwstat->cnt[BLKG_RWSTAT_READ] +
|
||||
rwstat->cnt[BLKG_RWSTAT_WRITE] +
|
||||
rwstat->cnt[BLKG_RWSTAT_DISCARD];
|
||||
seq_printf(sf, "%s Total %llu\n", dname, v);
|
||||
return v;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
|
||||
|
||||
/**
|
||||
* blkg_prfill_rwstat - prfill callback for blkg_rwstat
|
||||
* @sf: seq_file to print to
|
||||
* @pd: policy private data of interest
|
||||
* @off: offset to the blkg_rwstat in @pd
|
||||
*
|
||||
* prfill callback for printing a blkg_rwstat.
|
||||
*/
|
||||
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
int off)
|
||||
{
|
||||
struct blkg_rwstat_sample rwstat = { };
|
||||
|
||||
blkg_rwstat_read((void *)pd + off, &rwstat);
|
||||
return __blkg_prfill_rwstat(sf, pd, &rwstat);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
|
||||
|
||||
/**
|
||||
* blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
|
||||
* @blkg: blkg of interest
|
||||
* @pol: blkcg_policy which contains the blkg_rwstat
|
||||
* @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
|
||||
* @sum: blkg_rwstat_sample structure containing the results
|
||||
*
|
||||
* Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
|
||||
* online descendants and their aux counts. The caller must be holding the
|
||||
* queue lock for online tests.
|
||||
*
|
||||
* If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
|
||||
* is at @off bytes into @blkg's blkg_policy_data of the policy.
|
||||
*/
|
||||
void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
|
||||
int off, struct blkg_rwstat_sample *sum)
|
||||
{
|
||||
struct blkcg_gq *pos_blkg;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
unsigned int i;
|
||||
|
||||
lockdep_assert_held(&blkg->q->queue_lock);
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
|
||||
struct blkg_rwstat *rwstat;
|
||||
|
||||
if (!pos_blkg->online)
|
||||
continue;
|
||||
|
||||
if (pol)
|
||||
rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
|
||||
else
|
||||
rwstat = (void *)pos_blkg + off;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
|
149
block/blk-cgroup-rwstat.h
Normal file
149
block/blk-cgroup-rwstat.h
Normal file
@ -0,0 +1,149 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT.
|
||||
* Do not use in new code.
|
||||
*/
|
||||
#ifndef _BLK_CGROUP_RWSTAT_H
|
||||
#define _BLK_CGROUP_RWSTAT_H
|
||||
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
enum blkg_rwstat_type {
|
||||
BLKG_RWSTAT_READ,
|
||||
BLKG_RWSTAT_WRITE,
|
||||
BLKG_RWSTAT_SYNC,
|
||||
BLKG_RWSTAT_ASYNC,
|
||||
BLKG_RWSTAT_DISCARD,
|
||||
|
||||
BLKG_RWSTAT_NR,
|
||||
BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
|
||||
};
|
||||
|
||||
/*
|
||||
* blkg_[rw]stat->aux_cnt is excluded for local stats but included for
|
||||
* recursive. Used to carry stats of dead children.
|
||||
*/
|
||||
struct blkg_rwstat {
|
||||
struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
|
||||
atomic64_t aux_cnt[BLKG_RWSTAT_NR];
|
||||
};
|
||||
|
||||
struct blkg_rwstat_sample {
|
||||
u64 cnt[BLKG_RWSTAT_NR];
|
||||
};
|
||||
|
||||
static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat,
|
||||
unsigned int idx)
|
||||
{
|
||||
return atomic64_read(&rwstat->aux_cnt[idx]) +
|
||||
percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]);
|
||||
}
|
||||
|
||||
int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp);
|
||||
void blkg_rwstat_exit(struct blkg_rwstat *rwstat);
|
||||
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
const struct blkg_rwstat_sample *rwstat);
|
||||
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
int off);
|
||||
void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
|
||||
int off, struct blkg_rwstat_sample *sum);
|
||||
|
||||
|
||||
/**
|
||||
* blkg_rwstat_add - add a value to a blkg_rwstat
|
||||
* @rwstat: target blkg_rwstat
|
||||
* @op: REQ_OP and flags
|
||||
* @val: value to add
|
||||
*
|
||||
* Add @val to @rwstat. The counters are chosen according to @rw. The
|
||||
* caller is responsible for synchronizing calls to this function.
|
||||
*/
|
||||
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
|
||||
unsigned int op, uint64_t val)
|
||||
{
|
||||
struct percpu_counter *cnt;
|
||||
|
||||
if (op_is_discard(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
|
||||
else if (op_is_write(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
|
||||
else
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
|
||||
|
||||
percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
|
||||
|
||||
if (op_is_sync(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
|
||||
else
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
|
||||
|
||||
percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_read - read the current values of a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to read
|
||||
*
|
||||
* Read the current snapshot of @rwstat and return it in the aux counts.
|
||||
*/
|
||||
static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat,
|
||||
struct blkg_rwstat_sample *result)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
result->cnt[i] =
|
||||
percpu_counter_sum_positive(&rwstat->cpu_cnt[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_total - read the total count of a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to read
|
||||
*
|
||||
* Return the total count of @rwstat regardless of the IO direction. This
|
||||
* function can be called without synchronization and takes care of u64
|
||||
* atomicity.
|
||||
*/
|
||||
static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
struct blkg_rwstat_sample tmp = { };
|
||||
|
||||
blkg_rwstat_read(rwstat, &tmp);
|
||||
return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_reset - reset a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to reset
|
||||
*/
|
||||
static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++) {
|
||||
percpu_counter_set(&rwstat->cpu_cnt[i], 0);
|
||||
atomic64_set(&rwstat->aux_cnt[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
|
||||
* @to: the destination blkg_rwstat
|
||||
* @from: the source
|
||||
*
|
||||
* Add @from's count including the aux one to @to's aux count.
|
||||
*/
|
||||
static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
|
||||
struct blkg_rwstat *from)
|
||||
{
|
||||
u64 sum[BLKG_RWSTAT_NR];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
|
||||
&to->aux_cnt[i]);
|
||||
}
|
||||
#endif /* _BLK_CGROUP_RWSTAT_H */
|
@ -80,8 +80,7 @@ static void blkg_free(struct blkcg_gq *blkg)
|
||||
if (blkg->pd[i])
|
||||
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
|
||||
|
||||
blkg_rwstat_exit(&blkg->stat_ios);
|
||||
blkg_rwstat_exit(&blkg->stat_bytes);
|
||||
free_percpu(blkg->iostat_cpu);
|
||||
percpu_ref_exit(&blkg->refcnt);
|
||||
kfree(blkg);
|
||||
}
|
||||
@ -146,7 +145,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
int i;
|
||||
int i, cpu;
|
||||
|
||||
/* alloc and init base part */
|
||||
blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
|
||||
@ -156,8 +155,8 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||
if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
|
||||
goto err_free;
|
||||
|
||||
if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) ||
|
||||
blkg_rwstat_init(&blkg->stat_ios, gfp_mask))
|
||||
blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
|
||||
if (!blkg->iostat_cpu)
|
||||
goto err_free;
|
||||
|
||||
blkg->q = q;
|
||||
@ -167,6 +166,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
|
||||
blkg->blkcg = blkcg;
|
||||
|
||||
u64_stats_init(&blkg->iostat.sync);
|
||||
for_each_possible_cpu(cpu)
|
||||
u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
struct blkg_policy_data *pd;
|
||||
@ -393,7 +396,6 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
static void blkg_destroy(struct blkcg_gq *blkg)
|
||||
{
|
||||
struct blkcg *blkcg = blkg->blkcg;
|
||||
struct blkcg_gq *parent = blkg->parent;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&blkg->q->queue_lock);
|
||||
@ -410,11 +412,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
|
||||
pol->pd_offline_fn(blkg->pd[i]);
|
||||
}
|
||||
|
||||
if (parent) {
|
||||
blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
|
||||
blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
|
||||
}
|
||||
|
||||
blkg->online = false;
|
||||
|
||||
radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
|
||||
@ -464,7 +461,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg_gq *blkg;
|
||||
int i;
|
||||
int i, cpu;
|
||||
|
||||
mutex_lock(&blkcg_pol_mutex);
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
@ -475,8 +472,12 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
|
||||
* anyway. If you get hit by a race, retry.
|
||||
*/
|
||||
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
blkg_rwstat_reset(&blkg->stat_bytes);
|
||||
blkg_rwstat_reset(&blkg->stat_ios);
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct blkg_iostat_set *bis =
|
||||
per_cpu_ptr(blkg->iostat_cpu, cpu);
|
||||
memset(bis, 0, sizeof(*bis));
|
||||
}
|
||||
memset(&blkg->iostat, 0, sizeof(blkg->iostat));
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
@ -560,186 +561,6 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
|
||||
|
||||
/**
|
||||
* __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
|
||||
* @sf: seq_file to print to
|
||||
* @pd: policy private data of interest
|
||||
* @rwstat: rwstat to print
|
||||
*
|
||||
* Print @rwstat to @sf for the device assocaited with @pd.
|
||||
*/
|
||||
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
const struct blkg_rwstat_sample *rwstat)
|
||||
{
|
||||
static const char *rwstr[] = {
|
||||
[BLKG_RWSTAT_READ] = "Read",
|
||||
[BLKG_RWSTAT_WRITE] = "Write",
|
||||
[BLKG_RWSTAT_SYNC] = "Sync",
|
||||
[BLKG_RWSTAT_ASYNC] = "Async",
|
||||
[BLKG_RWSTAT_DISCARD] = "Discard",
|
||||
};
|
||||
const char *dname = blkg_dev_name(pd->blkg);
|
||||
u64 v;
|
||||
int i;
|
||||
|
||||
if (!dname)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
|
||||
rwstat->cnt[i]);
|
||||
|
||||
v = rwstat->cnt[BLKG_RWSTAT_READ] +
|
||||
rwstat->cnt[BLKG_RWSTAT_WRITE] +
|
||||
rwstat->cnt[BLKG_RWSTAT_DISCARD];
|
||||
seq_printf(sf, "%s Total %llu\n", dname, v);
|
||||
return v;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
|
||||
|
||||
/**
|
||||
* blkg_prfill_rwstat - prfill callback for blkg_rwstat
|
||||
* @sf: seq_file to print to
|
||||
* @pd: policy private data of interest
|
||||
* @off: offset to the blkg_rwstat in @pd
|
||||
*
|
||||
* prfill callback for printing a blkg_rwstat.
|
||||
*/
|
||||
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
int off)
|
||||
{
|
||||
struct blkg_rwstat_sample rwstat = { };
|
||||
|
||||
blkg_rwstat_read((void *)pd + off, &rwstat);
|
||||
return __blkg_prfill_rwstat(sf, pd, &rwstat);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
|
||||
|
||||
static u64 blkg_prfill_rwstat_field(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
struct blkg_rwstat_sample rwstat = { };
|
||||
|
||||
blkg_rwstat_read((void *)pd->blkg + off, &rwstat);
|
||||
return __blkg_prfill_rwstat(sf, pd, &rwstat);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_print_stat_bytes - seq_show callback for blkg->stat_bytes
|
||||
* @sf: seq_file to print to
|
||||
* @v: unused
|
||||
*
|
||||
* To be used as cftype->seq_show to print blkg->stat_bytes.
|
||||
* cftype->private must be set to the blkcg_policy.
|
||||
*/
|
||||
int blkg_print_stat_bytes(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
|
||||
offsetof(struct blkcg_gq, stat_bytes), true);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_print_stat_bytes);
|
||||
|
||||
/**
|
||||
* blkg_print_stat_bytes - seq_show callback for blkg->stat_ios
|
||||
* @sf: seq_file to print to
|
||||
* @v: unused
|
||||
*
|
||||
* To be used as cftype->seq_show to print blkg->stat_ios. cftype->private
|
||||
* must be set to the blkcg_policy.
|
||||
*/
|
||||
int blkg_print_stat_ios(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
|
||||
offsetof(struct blkcg_gq, stat_ios), true);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_print_stat_ios);
|
||||
|
||||
static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd,
|
||||
int off)
|
||||
{
|
||||
struct blkg_rwstat_sample rwstat;
|
||||
|
||||
blkg_rwstat_recursive_sum(pd->blkg, NULL, off, &rwstat);
|
||||
return __blkg_prfill_rwstat(sf, pd, &rwstat);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_print_stat_bytes_recursive - recursive version of blkg_print_stat_bytes
|
||||
* @sf: seq_file to print to
|
||||
* @v: unused
|
||||
*/
|
||||
int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
blkg_prfill_rwstat_field_recursive,
|
||||
(void *)seq_cft(sf)->private,
|
||||
offsetof(struct blkcg_gq, stat_bytes), true);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_print_stat_bytes_recursive);
|
||||
|
||||
/**
|
||||
* blkg_print_stat_ios_recursive - recursive version of blkg_print_stat_ios
|
||||
* @sf: seq_file to print to
|
||||
* @v: unused
|
||||
*/
|
||||
int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
blkg_prfill_rwstat_field_recursive,
|
||||
(void *)seq_cft(sf)->private,
|
||||
offsetof(struct blkcg_gq, stat_ios), true);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive);
|
||||
|
||||
/**
|
||||
* blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
|
||||
* @blkg: blkg of interest
|
||||
* @pol: blkcg_policy which contains the blkg_rwstat
|
||||
* @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
|
||||
* @sum: blkg_rwstat_sample structure containing the results
|
||||
*
|
||||
* Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
|
||||
* online descendants and their aux counts. The caller must be holding the
|
||||
* queue lock for online tests.
|
||||
*
|
||||
* If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
|
||||
* is at @off bytes into @blkg's blkg_policy_data of the policy.
|
||||
*/
|
||||
void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
|
||||
int off, struct blkg_rwstat_sample *sum)
|
||||
{
|
||||
struct blkcg_gq *pos_blkg;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
unsigned int i;
|
||||
|
||||
lockdep_assert_held(&blkg->q->queue_lock);
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
|
||||
struct blkg_rwstat *rwstat;
|
||||
|
||||
if (!pos_blkg->online)
|
||||
continue;
|
||||
|
||||
if (pol)
|
||||
rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
|
||||
else
|
||||
rwstat = (void *)pos_blkg + off;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
|
||||
|
||||
/* Performs queue bypass and policy enabled checks then looks up blkg. */
|
||||
static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
|
||||
const struct blkcg_policy *pol,
|
||||
@ -923,16 +744,18 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
|
||||
struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
cgroup_rstat_flush(blkcg->css.cgroup);
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
struct blkg_iostat_set *bis = &blkg->iostat;
|
||||
const char *dname;
|
||||
char *buf;
|
||||
struct blkg_rwstat_sample rwstat;
|
||||
u64 rbytes, wbytes, rios, wios, dbytes, dios;
|
||||
size_t size = seq_get_buf(sf, &buf), off = 0;
|
||||
int i;
|
||||
bool has_stats = false;
|
||||
unsigned seq;
|
||||
|
||||
spin_lock_irq(&blkg->q->queue_lock);
|
||||
|
||||
@ -951,17 +774,16 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
|
||||
*/
|
||||
off += scnprintf(buf+off, size-off, "%s ", dname);
|
||||
|
||||
blkg_rwstat_recursive_sum(blkg, NULL,
|
||||
offsetof(struct blkcg_gq, stat_bytes), &rwstat);
|
||||
rbytes = rwstat.cnt[BLKG_RWSTAT_READ];
|
||||
wbytes = rwstat.cnt[BLKG_RWSTAT_WRITE];
|
||||
dbytes = rwstat.cnt[BLKG_RWSTAT_DISCARD];
|
||||
do {
|
||||
seq = u64_stats_fetch_begin(&bis->sync);
|
||||
|
||||
blkg_rwstat_recursive_sum(blkg, NULL,
|
||||
offsetof(struct blkcg_gq, stat_ios), &rwstat);
|
||||
rios = rwstat.cnt[BLKG_RWSTAT_READ];
|
||||
wios = rwstat.cnt[BLKG_RWSTAT_WRITE];
|
||||
dios = rwstat.cnt[BLKG_RWSTAT_DISCARD];
|
||||
rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
|
||||
wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
|
||||
dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
|
||||
rios = bis->cur.ios[BLKG_IOSTAT_READ];
|
||||
wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
|
||||
dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
|
||||
} while (u64_stats_fetch_retry(&bis->sync, seq));
|
||||
|
||||
if (rbytes || wbytes || rios || wios) {
|
||||
has_stats = true;
|
||||
@ -1297,6 +1119,77 @@ static int blkcg_can_attach(struct cgroup_taskset *tset)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] = src->bytes[i];
|
||||
dst->ios[i] = src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] += src->bytes[i];
|
||||
dst->ios[i] += src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] -= src->bytes[i];
|
||||
dst->ios[i] -= src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
struct blkcg_gq *parent = blkg->parent;
|
||||
struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
|
||||
struct blkg_iostat cur, delta;
|
||||
unsigned seq;
|
||||
|
||||
/* fetch the current per-cpu values */
|
||||
do {
|
||||
seq = u64_stats_fetch_begin(&bisc->sync);
|
||||
blkg_iostat_set(&cur, &bisc->cur);
|
||||
} while (u64_stats_fetch_retry(&bisc->sync, seq));
|
||||
|
||||
/* propagate percpu delta to global */
|
||||
u64_stats_update_begin(&blkg->iostat.sync);
|
||||
blkg_iostat_set(&delta, &cur);
|
||||
blkg_iostat_sub(&delta, &bisc->last);
|
||||
blkg_iostat_add(&blkg->iostat.cur, &delta);
|
||||
blkg_iostat_add(&bisc->last, &delta);
|
||||
u64_stats_update_end(&blkg->iostat.sync);
|
||||
|
||||
/* propagate global delta to parent */
|
||||
if (parent) {
|
||||
u64_stats_update_begin(&parent->iostat.sync);
|
||||
blkg_iostat_set(&delta, &blkg->iostat.cur);
|
||||
blkg_iostat_sub(&delta, &blkg->iostat.last);
|
||||
blkg_iostat_add(&parent->iostat.cur, &delta);
|
||||
blkg_iostat_add(&blkg->iostat.last, &delta);
|
||||
u64_stats_update_end(&parent->iostat.sync);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blkcg_bind(struct cgroup_subsys_state *root_css)
|
||||
{
|
||||
int i;
|
||||
@ -1329,6 +1222,7 @@ struct cgroup_subsys io_cgrp_subsys = {
|
||||
.css_offline = blkcg_css_offline,
|
||||
.css_free = blkcg_css_free,
|
||||
.can_attach = blkcg_can_attach,
|
||||
.css_rstat_flush = blkcg_rstat_flush,
|
||||
.bind = blkcg_bind,
|
||||
.dfl_cftypes = blkcg_files,
|
||||
.legacy_cftypes = blkcg_legacy_files,
|
||||
|
@ -132,6 +132,9 @@ static const char *const blk_op_name[] = {
|
||||
REQ_OP_NAME(SECURE_ERASE),
|
||||
REQ_OP_NAME(ZONE_RESET),
|
||||
REQ_OP_NAME(ZONE_RESET_ALL),
|
||||
REQ_OP_NAME(ZONE_OPEN),
|
||||
REQ_OP_NAME(ZONE_CLOSE),
|
||||
REQ_OP_NAME(ZONE_FINISH),
|
||||
REQ_OP_NAME(WRITE_SAME),
|
||||
REQ_OP_NAME(WRITE_ZEROES),
|
||||
REQ_OP_NAME(SCSI_IN),
|
||||
@ -336,14 +339,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
|
||||
*/
|
||||
void blk_cleanup_queue(struct request_queue *q)
|
||||
{
|
||||
WARN_ON_ONCE(blk_queue_registered(q));
|
||||
|
||||
/* mark @q DYING, no new request or merges will be allowed afterwards */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
blk_set_queue_dying(q);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DYING, q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
/*
|
||||
* Drain all requests queued before DYING marking. Set DEAD flag to
|
||||
@ -849,10 +852,10 @@ static inline int blk_partition_remap(struct bio *bio)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Zone reset does not include bi_size so bio_sectors() is always 0.
|
||||
* Include a test for the reset op code and perform the remap if needed.
|
||||
* Zone management bios do not have a sector count but they do have
|
||||
* a start sector filled out and need to be remapped.
|
||||
*/
|
||||
if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
|
||||
if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) {
|
||||
if (bio_check_eod(bio, part_nr_sects_read(p)))
|
||||
goto out;
|
||||
bio->bi_iter.bi_sector += p->start_sect;
|
||||
@ -936,6 +939,9 @@ generic_make_request_checks(struct bio *bio)
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_ZONE_RESET:
|
||||
case REQ_OP_ZONE_OPEN:
|
||||
case REQ_OP_ZONE_CLOSE:
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
if (!blk_queue_is_zoned(q))
|
||||
goto not_supported;
|
||||
break;
|
||||
|
@ -55,6 +55,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
|
||||
rq->rq_disk = bd_disk;
|
||||
rq->end_io = done;
|
||||
|
||||
blk_account_io_start(rq, true);
|
||||
|
||||
/*
|
||||
* don't check dying flag for MQ because the request won't
|
||||
* be reused after dying flag is set
|
||||
|
@ -136,6 +136,17 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front)
|
||||
blk_mq_add_to_requeue_list(rq, add_front, true);
|
||||
}
|
||||
|
||||
static void blk_account_io_flush(struct request *rq)
|
||||
{
|
||||
struct hd_struct *part = &rq->rq_disk->part0;
|
||||
|
||||
part_stat_lock();
|
||||
part_stat_inc(part, ios[STAT_FLUSH]);
|
||||
part_stat_add(part, nsecs[STAT_FLUSH],
|
||||
ktime_get_ns() - rq->start_time_ns);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_flush_complete_seq - complete flush sequence
|
||||
* @rq: PREFLUSH/FUA request being sequenced
|
||||
@ -185,7 +196,7 @@ static void blk_flush_complete_seq(struct request *rq,
|
||||
|
||||
case REQ_FSEQ_DONE:
|
||||
/*
|
||||
* @rq was previously adjusted by blk_flush_issue() for
|
||||
* @rq was previously adjusted by blk_insert_flush() for
|
||||
* flush sequencing and may already have gone through the
|
||||
* flush data request completion path. Restore @rq for
|
||||
* normal completion and end it.
|
||||
@ -212,6 +223,8 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
blk_account_io_flush(flush_rq);
|
||||
|
||||
/* release the tag's ownership to the req cloned from */
|
||||
spin_lock_irqsave(&fq->mq_flush_lock, flags);
|
||||
|
||||
|
@ -293,7 +293,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
|
||||
void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
unsigned int *nr_segs)
|
||||
{
|
||||
struct bio *split;
|
||||
struct bio *split = NULL;
|
||||
|
||||
switch (bio_op(*bio)) {
|
||||
case REQ_OP_DISCARD:
|
||||
@ -309,6 +309,21 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
nr_segs);
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* All drivers must accept single-segments bios that are <=
|
||||
* PAGE_SIZE. This is a quick and dirty check that relies on
|
||||
* the fact that bi_io_vec[0] is always valid if a bio has data.
|
||||
* The check might lead to occasional false negatives when bios
|
||||
* are cloned, but compared to the performance impact of cloned
|
||||
* bios themselves the loop below doesn't matter anyway.
|
||||
*/
|
||||
if (!q->limits.chunk_sectors &&
|
||||
(*bio)->bi_vcnt == 1 &&
|
||||
((*bio)->bi_io_vec[0].bv_len +
|
||||
(*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
|
||||
*nr_segs = 1;
|
||||
break;
|
||||
}
|
||||
split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
|
||||
break;
|
||||
}
|
||||
|
@ -74,10 +74,8 @@ static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
|
||||
if (!entry->show)
|
||||
return -EIO;
|
||||
|
||||
res = -ENOENT;
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (!blk_queue_dying(q))
|
||||
res = entry->show(ctx, page);
|
||||
res = entry->show(ctx, page);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
@ -97,10 +95,8 @@ static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
|
||||
if (!entry->store)
|
||||
return -EIO;
|
||||
|
||||
res = -ENOENT;
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (!blk_queue_dying(q))
|
||||
res = entry->store(ctx, page, length);
|
||||
res = entry->store(ctx, page, length);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
@ -120,10 +116,8 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
|
||||
if (!entry->show)
|
||||
return -EIO;
|
||||
|
||||
res = -ENOENT;
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (!blk_queue_dying(q))
|
||||
res = entry->show(hctx, page);
|
||||
res = entry->show(hctx, page);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
@ -144,10 +138,8 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
|
||||
if (!entry->store)
|
||||
return -EIO;
|
||||
|
||||
res = -ENOENT;
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (!blk_queue_dying(q))
|
||||
res = entry->store(hctx, page, length);
|
||||
res = entry->store(hctx, page, length);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
@ -166,20 +158,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
|
||||
{
|
||||
const size_t size = PAGE_SIZE - 1;
|
||||
unsigned int i, first = 1;
|
||||
ssize_t ret = 0;
|
||||
int ret = 0, pos = 0;
|
||||
|
||||
for_each_cpu(i, hctx->cpumask) {
|
||||
if (first)
|
||||
ret += sprintf(ret + page, "%u", i);
|
||||
ret = snprintf(pos + page, size - pos, "%u", i);
|
||||
else
|
||||
ret += sprintf(ret + page, ", %u", i);
|
||||
ret = snprintf(pos + page, size - pos, ", %u", i);
|
||||
|
||||
if (ret >= size - pos)
|
||||
break;
|
||||
|
||||
first = 0;
|
||||
pos += ret;
|
||||
}
|
||||
|
||||
ret += sprintf(ret + page, "\n");
|
||||
return ret;
|
||||
ret = snprintf(pos + page, size + 1 - pos, "\n");
|
||||
return pos + ret;
|
||||
}
|
||||
|
||||
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
|
||||
|
@ -15,14 +15,6 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
|
||||
{
|
||||
if (!tags)
|
||||
return true;
|
||||
|
||||
return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
|
||||
}
|
||||
|
||||
/*
|
||||
* If a previously inactive queue goes active, bump the active user count.
|
||||
* We need to do this before try to allocate driver tag, then even if fail
|
||||
|
@ -28,7 +28,6 @@ extern void blk_mq_free_tags(struct blk_mq_tags *tags);
|
||||
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
|
||||
extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
|
||||
struct blk_mq_ctx *ctx, unsigned int tag);
|
||||
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
|
||||
extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_tags **tags,
|
||||
unsigned int depth, bool can_grow);
|
||||
|
148
block/blk-mq.c
148
block/blk-mq.c
@ -93,7 +93,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
struct mq_inflight {
|
||||
struct hd_struct *part;
|
||||
unsigned int *inflight;
|
||||
unsigned int inflight[2];
|
||||
};
|
||||
|
||||
static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
|
||||
@ -102,45 +102,29 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
|
||||
{
|
||||
struct mq_inflight *mi = priv;
|
||||
|
||||
/*
|
||||
* index[0] counts the specific partition that was asked for.
|
||||
*/
|
||||
if (rq->part == mi->part)
|
||||
mi->inflight[0]++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
|
||||
{
|
||||
unsigned inflight[2];
|
||||
struct mq_inflight mi = { .part = part, .inflight = inflight, };
|
||||
|
||||
inflight[0] = inflight[1] = 0;
|
||||
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
|
||||
|
||||
return inflight[0];
|
||||
}
|
||||
|
||||
static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq, void *priv,
|
||||
bool reserved)
|
||||
{
|
||||
struct mq_inflight *mi = priv;
|
||||
|
||||
if (rq->part == mi->part)
|
||||
mi->inflight[rq_data_dir(rq)]++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
|
||||
{
|
||||
struct mq_inflight mi = { .part = part };
|
||||
|
||||
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
|
||||
|
||||
return mi.inflight[0] + mi.inflight[1];
|
||||
}
|
||||
|
||||
void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
|
||||
unsigned int inflight[2])
|
||||
{
|
||||
struct mq_inflight mi = { .part = part, .inflight = inflight, };
|
||||
struct mq_inflight mi = { .part = part };
|
||||
|
||||
inflight[0] = inflight[1] = 0;
|
||||
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
|
||||
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
|
||||
inflight[0] = mi.inflight[0];
|
||||
inflight[1] = mi.inflight[1];
|
||||
}
|
||||
|
||||
void blk_freeze_queue_start(struct request_queue *q)
|
||||
@ -276,12 +260,6 @@ void blk_mq_wake_waiters(struct request_queue *q)
|
||||
blk_mq_tag_wakeup_all(hctx->tags, true);
|
||||
}
|
||||
|
||||
bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
return blk_mq_has_free_tags(hctx->tags);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_can_queue);
|
||||
|
||||
/*
|
||||
* Only need start/end time stamping if we have iostat or
|
||||
* blk stats enabled, or using an IO scheduler.
|
||||
@ -663,18 +641,6 @@ bool blk_mq_complete_request(struct request *rq)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_complete_request);
|
||||
|
||||
int blk_mq_request_started(struct request *rq)
|
||||
{
|
||||
return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_request_started);
|
||||
|
||||
int blk_mq_request_completed(struct request *rq)
|
||||
{
|
||||
return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_request_completed);
|
||||
|
||||
void blk_mq_start_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
@ -1064,7 +1030,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
|
||||
bool shared;
|
||||
|
||||
if (rq->tag != -1)
|
||||
goto done;
|
||||
return true;
|
||||
|
||||
if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
|
||||
data.flags |= BLK_MQ_REQ_RESERVED;
|
||||
@ -1079,7 +1045,6 @@ bool blk_mq_get_driver_tag(struct request *rq)
|
||||
data.hctx->tags->rqs[rq->tag] = rq;
|
||||
}
|
||||
|
||||
done:
|
||||
return rq->tag != -1;
|
||||
}
|
||||
|
||||
@ -1486,7 +1451,7 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
|
||||
|
||||
bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
{
|
||||
int srcu_idx;
|
||||
bool need_run;
|
||||
@ -1504,12 +1469,8 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
blk_mq_hctx_has_pending(hctx);
|
||||
hctx_unlock(hctx, srcu_idx);
|
||||
|
||||
if (need_run) {
|
||||
if (need_run)
|
||||
__blk_mq_delay_run_hw_queue(hctx, async, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_run_hw_queue);
|
||||
|
||||
@ -2789,6 +2750,23 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||
int i, j, end;
|
||||
struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
|
||||
|
||||
if (q->nr_hw_queues < set->nr_hw_queues) {
|
||||
struct blk_mq_hw_ctx **new_hctxs;
|
||||
|
||||
new_hctxs = kcalloc_node(set->nr_hw_queues,
|
||||
sizeof(*new_hctxs), GFP_KERNEL,
|
||||
set->numa_node);
|
||||
if (!new_hctxs)
|
||||
return;
|
||||
if (hctxs)
|
||||
memcpy(new_hctxs, hctxs, q->nr_hw_queues *
|
||||
sizeof(*hctxs));
|
||||
q->queue_hw_ctx = new_hctxs;
|
||||
q->nr_hw_queues = set->nr_hw_queues;
|
||||
kfree(hctxs);
|
||||
hctxs = new_hctxs;
|
||||
}
|
||||
|
||||
/* protect against switching io scheduler */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
for (i = 0; i < set->nr_hw_queues; i++) {
|
||||
@ -2844,19 +2822,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Maximum number of hardware queues we support. For single sets, we'll never
|
||||
* have more than the CPUs (software queues). For multiple sets, the tag_set
|
||||
* user may have set ->nr_hw_queues larger.
|
||||
*/
|
||||
static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
|
||||
{
|
||||
if (set->nr_maps == 1)
|
||||
return nr_cpu_ids;
|
||||
|
||||
return max(set->nr_hw_queues, nr_cpu_ids);
|
||||
}
|
||||
|
||||
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
struct request_queue *q,
|
||||
bool elevator_init)
|
||||
@ -2876,12 +2841,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
/* init q->mq_kobj and sw queues' kobjects */
|
||||
blk_mq_sysfs_init(q);
|
||||
|
||||
q->nr_queues = nr_hw_queues(set);
|
||||
q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
|
||||
GFP_KERNEL, set->numa_node);
|
||||
if (!q->queue_hw_ctx)
|
||||
goto err_sys_init;
|
||||
|
||||
INIT_LIST_HEAD(&q->unused_hctx_list);
|
||||
spin_lock_init(&q->unused_hctx_lock);
|
||||
|
||||
@ -2929,7 +2888,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
err_hctxs:
|
||||
kfree(q->queue_hw_ctx);
|
||||
q->nr_hw_queues = 0;
|
||||
err_sys_init:
|
||||
blk_mq_sysfs_deinit(q);
|
||||
err_poll:
|
||||
blk_stat_free_callback(q->poll_cb);
|
||||
@ -3030,6 +2988,29 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
|
||||
}
|
||||
}
|
||||
|
||||
static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
|
||||
int cur_nr_hw_queues, int new_nr_hw_queues)
|
||||
{
|
||||
struct blk_mq_tags **new_tags;
|
||||
|
||||
if (cur_nr_hw_queues >= new_nr_hw_queues)
|
||||
return 0;
|
||||
|
||||
new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
|
||||
GFP_KERNEL, set->numa_node);
|
||||
if (!new_tags)
|
||||
return -ENOMEM;
|
||||
|
||||
if (set->tags)
|
||||
memcpy(new_tags, set->tags, cur_nr_hw_queues *
|
||||
sizeof(*set->tags));
|
||||
kfree(set->tags);
|
||||
set->tags = new_tags;
|
||||
set->nr_hw_queues = new_nr_hw_queues;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Alloc a tag set to be associated with one or more request queues.
|
||||
* May fail with EINVAL for various error conditions. May adjust the
|
||||
@ -3083,9 +3064,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
||||
if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
|
||||
set->nr_hw_queues = nr_cpu_ids;
|
||||
|
||||
set->tags = kcalloc_node(nr_hw_queues(set), sizeof(struct blk_mq_tags *),
|
||||
GFP_KERNEL, set->numa_node);
|
||||
if (!set->tags)
|
||||
if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = -ENOMEM;
|
||||
@ -3126,7 +3105,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < nr_hw_queues(set); i++)
|
||||
for (i = 0; i < set->nr_hw_queues; i++)
|
||||
blk_mq_free_map_and_requests(set, i);
|
||||
|
||||
for (j = 0; j < set->nr_maps; j++) {
|
||||
@ -3270,10 +3249,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list)
|
||||
blk_mq_freeze_queue(q);
|
||||
/*
|
||||
* Sync with blk_mq_queue_tag_busy_iter.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
/*
|
||||
* Switch IO scheduler to 'none', cleaning up the data associated
|
||||
* with the previous scheduler. We will switch back once we are done
|
||||
@ -3288,6 +3263,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
blk_mq_sysfs_unregister(q);
|
||||
}
|
||||
|
||||
if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
|
||||
0)
|
||||
goto reregister;
|
||||
|
||||
prev_nr_hw_queues = set->nr_hw_queues;
|
||||
set->nr_hw_queues = nr_hw_queues;
|
||||
blk_mq_update_queue_map(set);
|
||||
@ -3304,6 +3283,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
blk_mq_map_swqueue(q);
|
||||
}
|
||||
|
||||
reregister:
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||
blk_mq_sysfs_register(q);
|
||||
blk_mq_debugfs_register_hctxs(q);
|
||||
|
@ -128,15 +128,6 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
|
||||
|
||||
void blk_mq_release(struct request_queue *q);
|
||||
|
||||
/**
|
||||
* blk_mq_rq_state() - read the current MQ_RQ_* state of a request
|
||||
* @rq: target request.
|
||||
*/
|
||||
static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
|
||||
{
|
||||
return READ_ONCE(rq->state);
|
||||
}
|
||||
|
||||
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
|
||||
unsigned int cpu)
|
||||
{
|
||||
|
@ -42,17 +42,13 @@ static __latent_entropy void blk_done_softirq(struct softirq_action *h)
|
||||
static void trigger_softirq(void *data)
|
||||
{
|
||||
struct request *rq = data;
|
||||
unsigned long flags;
|
||||
struct list_head *list;
|
||||
|
||||
local_irq_save(flags);
|
||||
list = this_cpu_ptr(&blk_cpu_done);
|
||||
list_add_tail(&rq->ipi_list, list);
|
||||
|
||||
if (list->next == &rq->ipi_list)
|
||||
raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -53,7 +53,7 @@ void blk_stat_add(struct request *rq, u64 now)
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_stat_callback *cb;
|
||||
struct blk_rq_stat *stat;
|
||||
int bucket;
|
||||
int bucket, cpu;
|
||||
u64 value;
|
||||
|
||||
value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
|
||||
@ -61,6 +61,7 @@ void blk_stat_add(struct request *rq, u64 now)
|
||||
blk_throtl_stat_add(rq, value);
|
||||
|
||||
rcu_read_lock();
|
||||
cpu = get_cpu();
|
||||
list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
|
||||
if (!blk_stat_is_active(cb))
|
||||
continue;
|
||||
@ -69,10 +70,10 @@ void blk_stat_add(struct request *rq, u64 now)
|
||||
if (bucket < 0)
|
||||
continue;
|
||||
|
||||
stat = &get_cpu_ptr(cb->cpu_stat)[bucket];
|
||||
stat = &per_cpu_ptr(cb->cpu_stat, cpu)[bucket];
|
||||
blk_rq_stat_add(stat, value);
|
||||
put_cpu_ptr(cb->cpu_stat);
|
||||
}
|
||||
put_cpu();
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
@ -801,10 +801,6 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
|
||||
if (!entry->show)
|
||||
return -EIO;
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (blk_queue_dying(q)) {
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return -ENOENT;
|
||||
}
|
||||
res = entry->show(q, page);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
@ -823,10 +819,6 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
|
||||
q = container_of(kobj, struct request_queue, kobj);
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (blk_queue_dying(q)) {
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return -ENOENT;
|
||||
}
|
||||
res = entry->store(q, page, length);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/blktrace_api.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include "blk.h"
|
||||
#include "blk-cgroup-rwstat.h"
|
||||
|
||||
/* Max dispatch from a group in 1 round */
|
||||
static int throtl_grp_quantum = 8;
|
||||
@ -176,6 +177,9 @@ struct throtl_grp {
|
||||
unsigned int bio_cnt; /* total bios */
|
||||
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
|
||||
unsigned long bio_cnt_reset_time;
|
||||
|
||||
struct blkg_rwstat stat_bytes;
|
||||
struct blkg_rwstat stat_ios;
|
||||
};
|
||||
|
||||
/* We measure latency for request size from <= 4k to >= 1M */
|
||||
@ -489,6 +493,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
|
||||
if (!tg)
|
||||
return NULL;
|
||||
|
||||
if (blkg_rwstat_init(&tg->stat_bytes, gfp))
|
||||
goto err_free_tg;
|
||||
|
||||
if (blkg_rwstat_init(&tg->stat_ios, gfp))
|
||||
goto err_exit_stat_bytes;
|
||||
|
||||
throtl_service_queue_init(&tg->service_queue);
|
||||
|
||||
for (rw = READ; rw <= WRITE; rw++) {
|
||||
@ -513,6 +523,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
|
||||
tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
|
||||
|
||||
return &tg->pd;
|
||||
|
||||
err_exit_stat_bytes:
|
||||
blkg_rwstat_exit(&tg->stat_bytes);
|
||||
err_free_tg:
|
||||
kfree(tg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void throtl_pd_init(struct blkg_policy_data *pd)
|
||||
@ -611,6 +627,8 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
|
||||
struct throtl_grp *tg = pd_to_tg(pd);
|
||||
|
||||
del_timer_sync(&tg->service_queue.pending_timer);
|
||||
blkg_rwstat_exit(&tg->stat_bytes);
|
||||
blkg_rwstat_exit(&tg->stat_ios);
|
||||
kfree(tg);
|
||||
}
|
||||
|
||||
@ -1464,6 +1482,32 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
|
||||
return tg_set_conf(of, buf, nbytes, off, false);
|
||||
}
|
||||
|
||||
static int tg_print_rwstat(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
blkg_prfill_rwstat, &blkcg_policy_throtl,
|
||||
seq_cft(sf)->private, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 tg_prfill_rwstat_recursive(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
struct blkg_rwstat_sample sum;
|
||||
|
||||
blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_throtl, off,
|
||||
&sum);
|
||||
return __blkg_prfill_rwstat(sf, pd, &sum);
|
||||
}
|
||||
|
||||
static int tg_print_rwstat_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
tg_prfill_rwstat_recursive, &blkcg_policy_throtl,
|
||||
seq_cft(sf)->private, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct cftype throtl_legacy_files[] = {
|
||||
{
|
||||
.name = "throttle.read_bps_device",
|
||||
@ -1491,23 +1535,23 @@ static struct cftype throtl_legacy_files[] = {
|
||||
},
|
||||
{
|
||||
.name = "throttle.io_service_bytes",
|
||||
.private = (unsigned long)&blkcg_policy_throtl,
|
||||
.seq_show = blkg_print_stat_bytes,
|
||||
.private = offsetof(struct throtl_grp, stat_bytes),
|
||||
.seq_show = tg_print_rwstat,
|
||||
},
|
||||
{
|
||||
.name = "throttle.io_service_bytes_recursive",
|
||||
.private = (unsigned long)&blkcg_policy_throtl,
|
||||
.seq_show = blkg_print_stat_bytes_recursive,
|
||||
.private = offsetof(struct throtl_grp, stat_bytes),
|
||||
.seq_show = tg_print_rwstat_recursive,
|
||||
},
|
||||
{
|
||||
.name = "throttle.io_serviced",
|
||||
.private = (unsigned long)&blkcg_policy_throtl,
|
||||
.seq_show = blkg_print_stat_ios,
|
||||
.private = offsetof(struct throtl_grp, stat_ios),
|
||||
.seq_show = tg_print_rwstat,
|
||||
},
|
||||
{
|
||||
.name = "throttle.io_serviced_recursive",
|
||||
.private = (unsigned long)&blkcg_policy_throtl,
|
||||
.seq_show = blkg_print_stat_ios_recursive,
|
||||
.private = offsetof(struct throtl_grp, stat_ios),
|
||||
.seq_show = tg_print_rwstat_recursive,
|
||||
},
|
||||
{ } /* terminate */
|
||||
};
|
||||
@ -2127,7 +2171,16 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
|
||||
/* see throtl_charge_bio() */
|
||||
if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw])
|
||||
if (bio_flagged(bio, BIO_THROTTLED))
|
||||
goto out;
|
||||
|
||||
if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
|
||||
blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
|
||||
bio->bi_iter.bi_size);
|
||||
blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
|
||||
}
|
||||
|
||||
if (!tg->has_rules[rw])
|
||||
goto out;
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
|
@ -202,32 +202,14 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkdev_report_zones);
|
||||
|
||||
/*
|
||||
* Special case of zone reset operation to reset all zones in one command,
|
||||
* useful for applications like mkfs.
|
||||
*/
|
||||
static int __blkdev_reset_all_zones(struct block_device *bdev, gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio = bio_alloc(gfp_mask, 0);
|
||||
int ret;
|
||||
|
||||
/* across the zones operations, don't need any sectors */
|
||||
bio_set_dev(bio, bdev);
|
||||
bio_set_op_attrs(bio, REQ_OP_ZONE_RESET_ALL, 0);
|
||||
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
|
||||
sector_t sector,
|
||||
sector_t nr_sectors)
|
||||
{
|
||||
if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
|
||||
return false;
|
||||
|
||||
if (nr_sectors != part_nr_sects_read(bdev->bd_part))
|
||||
if (sector || nr_sectors != part_nr_sects_read(bdev->bd_part))
|
||||
return false;
|
||||
/*
|
||||
* REQ_OP_ZONE_RESET_ALL can be executed only if the block device is
|
||||
@ -239,26 +221,29 @@ static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
|
||||
}
|
||||
|
||||
/**
|
||||
* blkdev_reset_zones - Reset zones write pointer
|
||||
* blkdev_zone_mgmt - Execute a zone management operation on a range of zones
|
||||
* @bdev: Target block device
|
||||
* @sector: Start sector of the first zone to reset
|
||||
* @nr_sectors: Number of sectors, at least the length of one zone
|
||||
* @op: Operation to be performed on the zones
|
||||
* @sector: Start sector of the first zone to operate on
|
||||
* @nr_sectors: Number of sectors, should be at least the length of one zone and
|
||||
* must be zone size aligned.
|
||||
* @gfp_mask: Memory allocation flags (for bio_alloc)
|
||||
*
|
||||
* Description:
|
||||
* Reset the write pointer of the zones contained in the range
|
||||
* Perform the specified operation on the range of zones specified by
|
||||
* @sector..@sector+@nr_sectors. Specifying the entire disk sector range
|
||||
* is valid, but the specified range should not contain conventional zones.
|
||||
* The operation to execute on each zone can be a zone reset, open, close
|
||||
* or finish request.
|
||||
*/
|
||||
int blkdev_reset_zones(struct block_device *bdev,
|
||||
sector_t sector, sector_t nr_sectors,
|
||||
gfp_t gfp_mask)
|
||||
int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
|
||||
sector_t sector, sector_t nr_sectors,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
sector_t zone_sectors;
|
||||
sector_t zone_sectors = blk_queue_zone_sectors(q);
|
||||
sector_t end_sector = sector + nr_sectors;
|
||||
struct bio *bio = NULL;
|
||||
struct blk_plug plug;
|
||||
int ret;
|
||||
|
||||
if (!blk_queue_is_zoned(q))
|
||||
@ -267,15 +252,14 @@ int blkdev_reset_zones(struct block_device *bdev,
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
if (!op_is_zone_mgmt(op))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
|
||||
/* Out of range */
|
||||
return -EINVAL;
|
||||
|
||||
if (blkdev_allow_reset_all_zones(bdev, nr_sectors))
|
||||
return __blkdev_reset_all_zones(bdev, gfp_mask);
|
||||
|
||||
/* Check alignment (handle eventual smaller last zone) */
|
||||
zone_sectors = blk_queue_zone_sectors(q);
|
||||
if (sector & (zone_sectors - 1))
|
||||
return -EINVAL;
|
||||
|
||||
@ -283,29 +267,34 @@ int blkdev_reset_zones(struct block_device *bdev,
|
||||
end_sector != bdev->bd_part->nr_sects)
|
||||
return -EINVAL;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
while (sector < end_sector) {
|
||||
|
||||
bio = blk_next_bio(bio, 0, gfp_mask);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio_set_dev(bio, bdev);
|
||||
bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
|
||||
|
||||
/*
|
||||
* Special case for the zone reset operation that reset all
|
||||
* zones, this is useful for applications like mkfs.
|
||||
*/
|
||||
if (op == REQ_OP_ZONE_RESET &&
|
||||
blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
|
||||
bio->bi_opf = REQ_OP_ZONE_RESET_ALL;
|
||||
break;
|
||||
}
|
||||
|
||||
bio->bi_opf = op;
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
sector += zone_sectors;
|
||||
|
||||
/* This may take a while, so be nice to others */
|
||||
cond_resched();
|
||||
|
||||
}
|
||||
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkdev_reset_zones);
|
||||
EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
|
||||
|
||||
/*
|
||||
* BLKREPORTZONE ioctl processing.
|
||||
@ -368,15 +357,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
}
|
||||
|
||||
/*
|
||||
* BLKRESETZONE ioctl processing.
|
||||
* BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
|
||||
* Called from blkdev_ioctl.
|
||||
*/
|
||||
int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
void __user *argp = (void __user *)arg;
|
||||
struct request_queue *q;
|
||||
struct blk_zone_range zrange;
|
||||
enum req_opf op;
|
||||
|
||||
if (!argp)
|
||||
return -EINVAL;
|
||||
@ -397,8 +387,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
|
||||
return -EFAULT;
|
||||
|
||||
return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
|
||||
GFP_KERNEL);
|
||||
switch (cmd) {
|
||||
case BLKRESETZONE:
|
||||
op = REQ_OP_ZONE_RESET;
|
||||
break;
|
||||
case BLKOPENZONE:
|
||||
op = REQ_OP_ZONE_OPEN;
|
||||
break;
|
||||
case BLKCLOSEZONE:
|
||||
op = REQ_OP_ZONE_CLOSE;
|
||||
break;
|
||||
case BLKFINISHZONE:
|
||||
op = REQ_OP_ZONE_FINISH;
|
||||
break;
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
|
||||
GFP_KERNEL);
|
||||
}
|
||||
|
||||
static inline unsigned long *blk_alloc_zone_bitmap(int node,
|
||||
|
@ -242,14 +242,11 @@ int blk_dev_init(void);
|
||||
* Contribute to IO statistics IFF:
|
||||
*
|
||||
* a) it's attached to a gendisk, and
|
||||
* b) the queue had IO stats enabled when this request was started, and
|
||||
* c) it's a file system request
|
||||
* b) the queue had IO stats enabled when this request was started
|
||||
*/
|
||||
static inline bool blk_do_io_stat(struct request *rq)
|
||||
{
|
||||
return rq->rq_disk &&
|
||||
(rq->rq_flags & RQF_IO_STAT) &&
|
||||
!blk_rq_is_passthrough(rq);
|
||||
return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT);
|
||||
}
|
||||
|
||||
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
|
||||
|
@ -832,3 +832,12 @@ struct request *elv_rb_latter_request(struct request_queue *q,
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(elv_rb_latter_request);
|
||||
|
||||
static int __init elevator_setup(char *str)
|
||||
{
|
||||
pr_warn("Kernel parameter elevator= does not have any effect anymore.\n"
|
||||
"Please use sysfs to set IO scheduler for individual devices.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("elevator=", elevator_setup);
|
||||
|
@ -1385,7 +1385,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
|
||||
"%lu %lu %lu %u "
|
||||
"%lu %lu %lu %u "
|
||||
"%u %u %u "
|
||||
"%lu %lu %lu %u\n",
|
||||
"%lu %lu %lu %u "
|
||||
"%lu %u"
|
||||
"\n",
|
||||
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
|
||||
disk_name(gp, hd->partno, buf),
|
||||
part_stat_read(hd, ios[STAT_READ]),
|
||||
@ -1402,7 +1404,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
|
||||
part_stat_read(hd, ios[STAT_DISCARD]),
|
||||
part_stat_read(hd, merges[STAT_DISCARD]),
|
||||
part_stat_read(hd, sectors[STAT_DISCARD]),
|
||||
(unsigned int)part_stat_read_msecs(hd, STAT_DISCARD)
|
||||
(unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
|
||||
part_stat_read(hd, ios[STAT_FLUSH]),
|
||||
(unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
|
||||
);
|
||||
}
|
||||
disk_part_iter_exit(&piter);
|
||||
|
@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
|
||||
case BLKREPORTZONE:
|
||||
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
|
||||
case BLKRESETZONE:
|
||||
return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
|
||||
case BLKOPENZONE:
|
||||
case BLKCLOSEZONE:
|
||||
case BLKFINISHZONE:
|
||||
return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
|
||||
case BLKGETZONESZ:
|
||||
return put_uint(arg, bdev_zone_sectors(bdev));
|
||||
case BLKGETNRZONES:
|
||||
|
@ -76,7 +76,6 @@ enum opal_response_token {
|
||||
* Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
|
||||
* Section: 6.3 Assigned UIDs
|
||||
*/
|
||||
#define OPAL_UID_LENGTH 8
|
||||
#define OPAL_METHOD_LENGTH 8
|
||||
#define OPAL_MSID_KEYLEN 15
|
||||
#define OPAL_UID_LENGTH_HALF 4
|
||||
@ -108,6 +107,7 @@ enum opal_uid {
|
||||
OPAL_C_PIN_TABLE,
|
||||
OPAL_LOCKING_INFO_TABLE,
|
||||
OPAL_ENTERPRISE_LOCKING_INFO_TABLE,
|
||||
OPAL_DATASTORE,
|
||||
/* C_PIN_TABLE object ID's */
|
||||
OPAL_C_PIN_MSID,
|
||||
OPAL_C_PIN_SID,
|
||||
@ -205,6 +205,10 @@ enum opal_lockingstate {
|
||||
OPAL_LOCKING_LOCKED = 0x03,
|
||||
};
|
||||
|
||||
enum opal_parameter {
|
||||
OPAL_SUM_SET_LIST = 0x060000,
|
||||
};
|
||||
|
||||
/* Packets derived from:
|
||||
* TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
|
||||
* Secion: 3.2.3 ComPackets, Packets & Subpackets
|
||||
|
@ -127,7 +127,8 @@ ssize_t part_stat_show(struct device *dev,
|
||||
"%8lu %8lu %8llu %8u "
|
||||
"%8lu %8lu %8llu %8u "
|
||||
"%8u %8u %8u "
|
||||
"%8lu %8lu %8llu %8u"
|
||||
"%8lu %8lu %8llu %8u "
|
||||
"%8lu %8u"
|
||||
"\n",
|
||||
part_stat_read(p, ios[STAT_READ]),
|
||||
part_stat_read(p, merges[STAT_READ]),
|
||||
@ -143,7 +144,9 @@ ssize_t part_stat_show(struct device *dev,
|
||||
part_stat_read(p, ios[STAT_DISCARD]),
|
||||
part_stat_read(p, merges[STAT_DISCARD]),
|
||||
(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
|
||||
(unsigned int)part_stat_read_msecs(p, STAT_DISCARD));
|
||||
(unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
|
||||
part_stat_read(p, ios[STAT_FLUSH]),
|
||||
(unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
|
||||
}
|
||||
|
||||
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
|
||||
|
318
block/sed-opal.c
318
block/sed-opal.c
@ -149,6 +149,8 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
|
||||
{ 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x01 },
|
||||
[OPAL_ENTERPRISE_LOCKING_INFO_TABLE] =
|
||||
{ 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 },
|
||||
[OPAL_DATASTORE] =
|
||||
{ 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00 },
|
||||
|
||||
/* C_PIN_TABLE object ID's */
|
||||
[OPAL_C_PIN_MSID] =
|
||||
@ -1139,11 +1141,11 @@ static int generic_get_column(struct opal_dev *dev, const u8 *table,
|
||||
*
|
||||
* the result is provided in dev->resp->tok[4]
|
||||
*/
|
||||
static int generic_get_table_info(struct opal_dev *dev, enum opal_uid table,
|
||||
static int generic_get_table_info(struct opal_dev *dev, const u8 *table_uid,
|
||||
u64 column)
|
||||
{
|
||||
u8 uid[OPAL_UID_LENGTH];
|
||||
const unsigned int half = OPAL_UID_LENGTH/2;
|
||||
const unsigned int half = OPAL_UID_LENGTH_HALF;
|
||||
|
||||
/* sed-opal UIDs can be split in two halves:
|
||||
* first: actual table index
|
||||
@ -1152,7 +1154,7 @@ static int generic_get_table_info(struct opal_dev *dev, enum opal_uid table,
|
||||
* first part of the target table as relative index into that table
|
||||
*/
|
||||
memcpy(uid, opaluid[OPAL_TABLE_TABLE], half);
|
||||
memcpy(uid+half, opaluid[table], half);
|
||||
memcpy(uid + half, table_uid, half);
|
||||
|
||||
return generic_get_column(dev, uid, column);
|
||||
}
|
||||
@ -1221,6 +1223,75 @@ static int get_active_key(struct opal_dev *dev, void *data)
|
||||
return get_active_key_cont(dev);
|
||||
}
|
||||
|
||||
static int generic_table_write_data(struct opal_dev *dev, const u64 data,
|
||||
u64 offset, u64 size, const u8 *uid)
|
||||
{
|
||||
const u8 __user *src = (u8 __user *)(uintptr_t)data;
|
||||
u8 *dst;
|
||||
u64 len;
|
||||
size_t off = 0;
|
||||
int err;
|
||||
|
||||
/* do we fit in the available space? */
|
||||
err = generic_get_table_info(dev, uid, OPAL_TABLE_ROWS);
|
||||
if (err) {
|
||||
pr_debug("Couldn't get the table size\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
len = response_get_u64(&dev->parsed, 4);
|
||||
if (size > len || offset > len - size) {
|
||||
pr_debug("Does not fit in the table (%llu vs. %llu)\n",
|
||||
offset + size, len);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/* do the actual transmission(s) */
|
||||
while (off < size) {
|
||||
err = cmd_start(dev, uid, opalmethod[OPAL_SET]);
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_WHERE);
|
||||
add_token_u64(&err, dev, offset + off);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_VALUES);
|
||||
|
||||
/*
|
||||
* The bytestring header is either 1 or 2 bytes, so assume 2.
|
||||
* There also needs to be enough space to accommodate the
|
||||
* trailing OPAL_ENDNAME (1 byte) and tokens added by
|
||||
* cmd_finalize.
|
||||
*/
|
||||
len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED),
|
||||
(size_t)(size - off));
|
||||
pr_debug("Write bytes %zu+%llu/%llu\n", off, len, size);
|
||||
|
||||
dst = add_bytestring_header(&err, dev, len);
|
||||
if (!dst)
|
||||
break;
|
||||
|
||||
if (copy_from_user(dst, src + off, len)) {
|
||||
err = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
dev->pos += len;
|
||||
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
err = finalize_and_send(dev, parse_and_check_status);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
off += len;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int generic_lr_enable_disable(struct opal_dev *dev,
|
||||
u8 *uid, bool rle, bool wle,
|
||||
bool rl, bool wl)
|
||||
@ -1583,68 +1654,9 @@ static int set_mbr_enable_disable(struct opal_dev *dev, void *data)
|
||||
static int write_shadow_mbr(struct opal_dev *dev, void *data)
|
||||
{
|
||||
struct opal_shadow_mbr *shadow = data;
|
||||
const u8 __user *src;
|
||||
u8 *dst;
|
||||
size_t off = 0;
|
||||
u64 len;
|
||||
int err = 0;
|
||||
|
||||
/* do we fit in the available shadow mbr space? */
|
||||
err = generic_get_table_info(dev, OPAL_MBR, OPAL_TABLE_ROWS);
|
||||
if (err) {
|
||||
pr_debug("MBR: could not get shadow size\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
len = response_get_u64(&dev->parsed, 4);
|
||||
if (shadow->size > len || shadow->offset > len - shadow->size) {
|
||||
pr_debug("MBR: does not fit in shadow (%llu vs. %llu)\n",
|
||||
shadow->offset + shadow->size, len);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/* do the actual transmission(s) */
|
||||
src = (u8 __user *)(uintptr_t)shadow->data;
|
||||
while (off < shadow->size) {
|
||||
err = cmd_start(dev, opaluid[OPAL_MBR], opalmethod[OPAL_SET]);
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_WHERE);
|
||||
add_token_u64(&err, dev, shadow->offset + off);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_VALUES);
|
||||
|
||||
/*
|
||||
* The bytestring header is either 1 or 2 bytes, so assume 2.
|
||||
* There also needs to be enough space to accommodate the
|
||||
* trailing OPAL_ENDNAME (1 byte) and tokens added by
|
||||
* cmd_finalize.
|
||||
*/
|
||||
len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED),
|
||||
(size_t)(shadow->size - off));
|
||||
pr_debug("MBR: write bytes %zu+%llu/%llu\n",
|
||||
off, len, shadow->size);
|
||||
|
||||
dst = add_bytestring_header(&err, dev, len);
|
||||
if (!dst)
|
||||
break;
|
||||
if (copy_from_user(dst, src + off, len))
|
||||
err = -EFAULT;
|
||||
dev->pos += len;
|
||||
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
err = finalize_and_send(dev, parse_and_check_status);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
off += len;
|
||||
}
|
||||
|
||||
return err;
|
||||
return generic_table_write_data(dev, shadow->data, shadow->offset,
|
||||
shadow->size, opaluid[OPAL_MBR]);
|
||||
}
|
||||
|
||||
static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid,
|
||||
@ -1874,7 +1886,6 @@ static int activate_lsp(struct opal_dev *dev, void *data)
|
||||
{
|
||||
struct opal_lr_act *opal_act = data;
|
||||
u8 user_lr[OPAL_UID_LENGTH];
|
||||
u8 uint_3 = 0x83;
|
||||
int err, i;
|
||||
|
||||
err = cmd_start(dev, opaluid[OPAL_LOCKINGSP_UID],
|
||||
@ -1887,10 +1898,7 @@ static int activate_lsp(struct opal_dev *dev, void *data)
|
||||
return err;
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, uint_3);
|
||||
add_token_u8(&err, dev, 6);
|
||||
add_token_u8(&err, dev, 0);
|
||||
add_token_u8(&err, dev, 0);
|
||||
add_token_u64(&err, dev, OPAL_SUM_SET_LIST);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTLIST);
|
||||
add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
|
||||
@ -1957,6 +1965,113 @@ static int get_msid_cpin_pin(struct opal_dev *dev, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_table_data(struct opal_dev *dev, void *data)
|
||||
{
|
||||
struct opal_read_write_table *write_tbl = data;
|
||||
|
||||
return generic_table_write_data(dev, write_tbl->data, write_tbl->offset,
|
||||
write_tbl->size, write_tbl->table_uid);
|
||||
}
|
||||
|
||||
static int read_table_data_cont(struct opal_dev *dev)
|
||||
{
|
||||
int err;
|
||||
const char *data_read;
|
||||
|
||||
err = parse_and_check_status(dev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
dev->prev_d_len = response_get_string(&dev->parsed, 1, &data_read);
|
||||
dev->prev_data = (void *)data_read;
|
||||
if (!dev->prev_data) {
|
||||
pr_debug("%s: Couldn't read data from the table.\n", __func__);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* IO_BUFFER_LENGTH = 2048
|
||||
* sizeof(header) = 56
|
||||
* No. of Token Bytes in the Response = 11
|
||||
* MAX size of data that can be carried in response buffer
|
||||
* at a time is : 2048 - (56 + 11) = 1981 = 0x7BD.
|
||||
*/
|
||||
#define OPAL_MAX_READ_TABLE (0x7BD)
|
||||
|
||||
static int read_table_data(struct opal_dev *dev, void *data)
|
||||
{
|
||||
struct opal_read_write_table *read_tbl = data;
|
||||
int err;
|
||||
size_t off = 0, max_read_size = OPAL_MAX_READ_TABLE;
|
||||
u64 table_len, len;
|
||||
u64 offset = read_tbl->offset, read_size = read_tbl->size - 1;
|
||||
u8 __user *dst;
|
||||
|
||||
err = generic_get_table_info(dev, read_tbl->table_uid, OPAL_TABLE_ROWS);
|
||||
if (err) {
|
||||
pr_debug("Couldn't get the table size\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
table_len = response_get_u64(&dev->parsed, 4);
|
||||
|
||||
/* Check if the user is trying to read from the table limits */
|
||||
if (read_size > table_len || offset > table_len - read_size) {
|
||||
pr_debug("Read size exceeds the Table size limits (%llu vs. %llu)\n",
|
||||
offset + read_size, table_len);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
while (off < read_size) {
|
||||
err = cmd_start(dev, read_tbl->table_uid, opalmethod[OPAL_GET]);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTLIST);
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_STARTROW);
|
||||
add_token_u64(&err, dev, offset + off); /* start row value */
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_ENDROW);
|
||||
|
||||
len = min(max_read_size, (size_t)(read_size - off));
|
||||
add_token_u64(&err, dev, offset + off + len); /* end row value
|
||||
*/
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
|
||||
if (err) {
|
||||
pr_debug("Error building read table data command.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
err = finalize_and_send(dev, read_table_data_cont);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
/* len+1: This includes the NULL terminator at the end*/
|
||||
if (dev->prev_d_len > len + 1) {
|
||||
err = -EOVERFLOW;
|
||||
break;
|
||||
}
|
||||
|
||||
dst = (u8 __user *)(uintptr_t)read_tbl->data;
|
||||
if (copy_to_user(dst + off, dev->prev_data, dev->prev_d_len)) {
|
||||
pr_debug("Error copying data to userspace\n");
|
||||
err = -EFAULT;
|
||||
break;
|
||||
}
|
||||
dev->prev_data = NULL;
|
||||
|
||||
off += len;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int end_opal_session(struct opal_dev *dev, void *data)
|
||||
{
|
||||
int err = 0;
|
||||
@ -2443,6 +2558,68 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
|
||||
}
|
||||
EXPORT_SYMBOL(opal_unlock_from_suspend);
|
||||
|
||||
static int opal_read_table(struct opal_dev *dev,
|
||||
struct opal_read_write_table *rw_tbl)
|
||||
{
|
||||
const struct opal_step read_table_steps[] = {
|
||||
{ start_admin1LSP_opal_session, &rw_tbl->key },
|
||||
{ read_table_data, rw_tbl },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
if (!rw_tbl->size)
|
||||
return ret;
|
||||
|
||||
return execute_steps(dev, read_table_steps,
|
||||
ARRAY_SIZE(read_table_steps));
|
||||
}
|
||||
|
||||
static int opal_write_table(struct opal_dev *dev,
|
||||
struct opal_read_write_table *rw_tbl)
|
||||
{
|
||||
const struct opal_step write_table_steps[] = {
|
||||
{ start_admin1LSP_opal_session, &rw_tbl->key },
|
||||
{ write_table_data, rw_tbl },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
if (!rw_tbl->size)
|
||||
return ret;
|
||||
|
||||
return execute_steps(dev, write_table_steps,
|
||||
ARRAY_SIZE(write_table_steps));
|
||||
}
|
||||
|
||||
static int opal_generic_read_write_table(struct opal_dev *dev,
|
||||
struct opal_read_write_table *rw_tbl)
|
||||
{
|
||||
int ret, bit_set;
|
||||
|
||||
mutex_lock(&dev->dev_lock);
|
||||
setup_opal_dev(dev);
|
||||
|
||||
bit_set = fls64(rw_tbl->flags) - 1;
|
||||
switch (bit_set) {
|
||||
case OPAL_READ_TABLE:
|
||||
ret = opal_read_table(dev, rw_tbl);
|
||||
break;
|
||||
case OPAL_WRITE_TABLE:
|
||||
ret = opal_write_table(dev, rw_tbl);
|
||||
break;
|
||||
default:
|
||||
pr_debug("Invalid bit set in the flag (%016llx).\n",
|
||||
rw_tbl->flags);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||
{
|
||||
void *p;
|
||||
@ -2505,6 +2682,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||
case IOC_OPAL_PSID_REVERT_TPR:
|
||||
ret = opal_reverttper(dev, p, true);
|
||||
break;
|
||||
case IOC_OPAL_GENERIC_TABLE_RW:
|
||||
ret = opal_generic_read_write_table(dev, p);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -235,16 +235,12 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
|
||||
return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Type 3 does not have a reference tag so no remapping is required.
|
||||
*/
|
||||
/* Type 3 does not have a reference tag so no remapping is required. */
|
||||
static void t10_pi_type3_prepare(struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Type 3 does not have a reference tag so no remapping is required.
|
||||
*/
|
||||
/* Type 3 does not have a reference tag so no remapping is required. */
|
||||
static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
|
||||
{
|
||||
}
|
||||
|
@ -1312,9 +1312,9 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
|
||||
if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
|
||||
struct dmz_dev *dev = zmd->dev;
|
||||
|
||||
ret = blkdev_reset_zones(dev->bdev,
|
||||
dmz_start_sect(zmd, zone),
|
||||
dev->zone_nr_sectors, GFP_NOIO);
|
||||
ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET,
|
||||
dmz_start_sect(zmd, zone),
|
||||
dev->zone_nr_sectors, GFP_NOIO);
|
||||
if (ret) {
|
||||
dmz_dev_err(dev, "Reset zone %u failed %d",
|
||||
dmz_id(zmd, zone), ret);
|
||||
|
@ -1403,11 +1403,7 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
|
||||
"resized disk %s\n",
|
||||
bdev->bd_disk ? bdev->bd_disk->disk_name : "");
|
||||
}
|
||||
|
||||
if (!bdev->bd_disk)
|
||||
return;
|
||||
if (disk_part_scan_enabled(bdev->bd_disk))
|
||||
bdev->bd_invalidated = 1;
|
||||
bdev->bd_invalidated = 1;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1512,6 +1508,19 @@ EXPORT_SYMBOL(bd_set_size);
|
||||
|
||||
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
|
||||
|
||||
static void bdev_disk_changed(struct block_device *bdev, bool invalidate)
|
||||
{
|
||||
if (disk_part_scan_enabled(bdev->bd_disk)) {
|
||||
if (invalidate)
|
||||
invalidate_partitions(bdev->bd_disk, bdev);
|
||||
else
|
||||
rescan_partitions(bdev->bd_disk, bdev);
|
||||
} else {
|
||||
check_disk_size_change(bdev->bd_disk, bdev, !invalidate);
|
||||
bdev->bd_invalidated = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* bd_mutex locking:
|
||||
*
|
||||
@ -1594,12 +1603,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
|
||||
* The latter is necessary to prevent ghost
|
||||
* partitions on a removed medium.
|
||||
*/
|
||||
if (bdev->bd_invalidated) {
|
||||
if (!ret)
|
||||
rescan_partitions(disk, bdev);
|
||||
else if (ret == -ENOMEDIUM)
|
||||
invalidate_partitions(disk, bdev);
|
||||
}
|
||||
if (bdev->bd_invalidated &&
|
||||
(!ret || ret == -ENOMEDIUM))
|
||||
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
|
||||
|
||||
if (ret)
|
||||
goto out_clear;
|
||||
@ -1632,12 +1638,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
|
||||
if (bdev->bd_disk->fops->open)
|
||||
ret = bdev->bd_disk->fops->open(bdev, mode);
|
||||
/* the same as first opener case, read comment there */
|
||||
if (bdev->bd_invalidated) {
|
||||
if (!ret)
|
||||
rescan_partitions(bdev->bd_disk, bdev);
|
||||
else if (ret == -ENOMEDIUM)
|
||||
invalidate_partitions(bdev->bd_disk, bdev);
|
||||
}
|
||||
if (bdev->bd_invalidated &&
|
||||
(!ret || ret == -ENOMEDIUM))
|
||||
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
|
||||
if (ret)
|
||||
goto out_unlock_bdev;
|
||||
}
|
||||
|
@ -1771,7 +1771,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
|
||||
return -EIO;
|
||||
}
|
||||
trace_f2fs_issue_reset_zone(bdev, blkstart);
|
||||
return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
|
||||
return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
|
||||
sector, nr_sects, GFP_NOFS);
|
||||
}
|
||||
|
||||
/* For conventional zones, use regular discard if supported */
|
||||
|
@ -261,7 +261,7 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
|
||||
static bool rw_hint_valid(enum rw_hint hint)
|
||||
{
|
||||
switch (hint) {
|
||||
case RWF_WRITE_LIFE_NOT_SET:
|
||||
case RWH_WRITE_LIFE_NOT_SET:
|
||||
case RWH_WRITE_LIFE_NONE:
|
||||
case RWH_WRITE_LIFE_SHORT:
|
||||
case RWH_WRITE_LIFE_MEDIUM:
|
||||
|
@ -15,7 +15,9 @@
|
||||
*/
|
||||
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/percpu_counter.h>
|
||||
#include <linux/u64_stats_sync.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/blkdev.h>
|
||||
@ -31,15 +33,12 @@
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
|
||||
enum blkg_rwstat_type {
|
||||
BLKG_RWSTAT_READ,
|
||||
BLKG_RWSTAT_WRITE,
|
||||
BLKG_RWSTAT_SYNC,
|
||||
BLKG_RWSTAT_ASYNC,
|
||||
BLKG_RWSTAT_DISCARD,
|
||||
enum blkg_iostat_type {
|
||||
BLKG_IOSTAT_READ,
|
||||
BLKG_IOSTAT_WRITE,
|
||||
BLKG_IOSTAT_DISCARD,
|
||||
|
||||
BLKG_RWSTAT_NR,
|
||||
BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
|
||||
BLKG_IOSTAT_NR,
|
||||
};
|
||||
|
||||
struct blkcg_gq;
|
||||
@ -61,17 +60,15 @@ struct blkcg {
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* blkg_[rw]stat->aux_cnt is excluded for local stats but included for
|
||||
* recursive. Used to carry stats of dead children.
|
||||
*/
|
||||
struct blkg_rwstat {
|
||||
struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
|
||||
atomic64_t aux_cnt[BLKG_RWSTAT_NR];
|
||||
struct blkg_iostat {
|
||||
u64 bytes[BLKG_IOSTAT_NR];
|
||||
u64 ios[BLKG_IOSTAT_NR];
|
||||
};
|
||||
|
||||
struct blkg_rwstat_sample {
|
||||
u64 cnt[BLKG_RWSTAT_NR];
|
||||
struct blkg_iostat_set {
|
||||
struct u64_stats_sync sync;
|
||||
struct blkg_iostat cur;
|
||||
struct blkg_iostat last;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -127,8 +124,8 @@ struct blkcg_gq {
|
||||
/* is this blkg online? protected by both blkcg and q locks */
|
||||
bool online;
|
||||
|
||||
struct blkg_rwstat stat_bytes;
|
||||
struct blkg_rwstat stat_ios;
|
||||
struct blkg_iostat_set __percpu *iostat_cpu;
|
||||
struct blkg_iostat_set iostat;
|
||||
|
||||
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
|
||||
|
||||
@ -202,13 +199,6 @@ int blkcg_activate_policy(struct request_queue *q,
|
||||
void blkcg_deactivate_policy(struct request_queue *q,
|
||||
const struct blkcg_policy *pol);
|
||||
|
||||
static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat,
|
||||
unsigned int idx)
|
||||
{
|
||||
return atomic64_read(&rwstat->aux_cnt[idx]) +
|
||||
percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]);
|
||||
}
|
||||
|
||||
const char *blkg_dev_name(struct blkcg_gq *blkg);
|
||||
void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
|
||||
u64 (*prfill)(struct seq_file *,
|
||||
@ -216,17 +206,6 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
|
||||
const struct blkcg_policy *pol, int data,
|
||||
bool show_total);
|
||||
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
|
||||
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
const struct blkg_rwstat_sample *rwstat);
|
||||
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
int off);
|
||||
int blkg_print_stat_bytes(struct seq_file *sf, void *v);
|
||||
int blkg_print_stat_ios(struct seq_file *sf, void *v);
|
||||
int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
|
||||
int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
|
||||
|
||||
void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
|
||||
int off, struct blkg_rwstat_sample *sum);
|
||||
|
||||
struct blkg_conf_ctx {
|
||||
struct gendisk *disk;
|
||||
@ -578,128 +557,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
|
||||
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
|
||||
(p_blkg)->q, false)))
|
||||
|
||||
static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++) {
|
||||
ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
|
||||
if (ret) {
|
||||
while (--i >= 0)
|
||||
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
|
||||
return ret;
|
||||
}
|
||||
atomic64_set(&rwstat->aux_cnt[i], 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_add - add a value to a blkg_rwstat
|
||||
* @rwstat: target blkg_rwstat
|
||||
* @op: REQ_OP and flags
|
||||
* @val: value to add
|
||||
*
|
||||
* Add @val to @rwstat. The counters are chosen according to @rw. The
|
||||
* caller is responsible for synchronizing calls to this function.
|
||||
*/
|
||||
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
|
||||
unsigned int op, uint64_t val)
|
||||
{
|
||||
struct percpu_counter *cnt;
|
||||
|
||||
if (op_is_discard(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
|
||||
else if (op_is_write(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
|
||||
else
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
|
||||
|
||||
percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
|
||||
|
||||
if (op_is_sync(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
|
||||
else
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
|
||||
|
||||
percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_read - read the current values of a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to read
|
||||
*
|
||||
* Read the current snapshot of @rwstat and return it in the aux counts.
|
||||
*/
|
||||
static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat,
|
||||
struct blkg_rwstat_sample *result)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
result->cnt[i] =
|
||||
percpu_counter_sum_positive(&rwstat->cpu_cnt[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_total - read the total count of a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to read
|
||||
*
|
||||
* Return the total count of @rwstat regardless of the IO direction. This
|
||||
* function can be called without synchronization and takes care of u64
|
||||
* atomicity.
|
||||
*/
|
||||
static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
struct blkg_rwstat_sample tmp = { };
|
||||
|
||||
blkg_rwstat_read(rwstat, &tmp);
|
||||
return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_reset - reset a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to reset
|
||||
*/
|
||||
static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++) {
|
||||
percpu_counter_set(&rwstat->cpu_cnt[i], 0);
|
||||
atomic64_set(&rwstat->aux_cnt[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
|
||||
* @to: the destination blkg_rwstat
|
||||
* @from: the source
|
||||
*
|
||||
* Add @from's count including the aux one to @to's aux count.
|
||||
*/
|
||||
static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
|
||||
struct blkg_rwstat *from)
|
||||
{
|
||||
u64 sum[BLKG_RWSTAT_NR];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
|
||||
&to->aux_cnt[i]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING
|
||||
extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
|
||||
struct bio *bio);
|
||||
@ -745,15 +602,33 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
|
||||
throtl = blk_throtl_bio(q, blkg, bio);
|
||||
|
||||
if (!throtl) {
|
||||
struct blkg_iostat_set *bis;
|
||||
int rwd, cpu;
|
||||
|
||||
if (op_is_discard(bio->bi_opf))
|
||||
rwd = BLKG_IOSTAT_DISCARD;
|
||||
else if (op_is_write(bio->bi_opf))
|
||||
rwd = BLKG_IOSTAT_WRITE;
|
||||
else
|
||||
rwd = BLKG_IOSTAT_READ;
|
||||
|
||||
cpu = get_cpu();
|
||||
bis = per_cpu_ptr(blkg->iostat_cpu, cpu);
|
||||
u64_stats_update_begin(&bis->sync);
|
||||
|
||||
/*
|
||||
* If the bio is flagged with BIO_QUEUE_ENTERED it means this
|
||||
* is a split bio and we would have already accounted for the
|
||||
* size of the bio.
|
||||
*/
|
||||
if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
|
||||
blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
|
||||
bio->bi_iter.bi_size);
|
||||
blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
|
||||
bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
|
||||
bis->cur.ios[rwd]++;
|
||||
|
||||
u64_stats_update_end(&bis->sync);
|
||||
if (cgroup_subsys_on_dfl(io_cgrp_subsys))
|
||||
cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu);
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
blkcg_bio_issue_init(bio);
|
||||
|
@ -10,103 +10,239 @@ struct blk_mq_tags;
|
||||
struct blk_flush_queue;
|
||||
|
||||
/**
|
||||
* struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device
|
||||
* struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
|
||||
* block device
|
||||
*/
|
||||
struct blk_mq_hw_ctx {
|
||||
struct {
|
||||
/** @lock: Protects the dispatch list. */
|
||||
spinlock_t lock;
|
||||
/**
|
||||
* @dispatch: Used for requests that are ready to be
|
||||
* dispatched to the hardware but for some reason (e.g. lack of
|
||||
* resources) could not be sent to the hardware. As soon as the
|
||||
* driver can send new requests, requests at this list will
|
||||
* be sent first for a fairer dispatch.
|
||||
*/
|
||||
struct list_head dispatch;
|
||||
unsigned long state; /* BLK_MQ_S_* flags */
|
||||
/**
|
||||
* @state: BLK_MQ_S_* flags. Defines the state of the hw
|
||||
* queue (active, scheduled to restart, stopped).
|
||||
*/
|
||||
unsigned long state;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
/**
|
||||
* @run_work: Used for scheduling a hardware queue run at a later time.
|
||||
*/
|
||||
struct delayed_work run_work;
|
||||
/** @cpumask: Map of available CPUs where this hctx can run. */
|
||||
cpumask_var_t cpumask;
|
||||
/**
|
||||
* @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU
|
||||
* selection from @cpumask.
|
||||
*/
|
||||
int next_cpu;
|
||||
/**
|
||||
* @next_cpu_batch: Counter of how many works left in the batch before
|
||||
* changing to the next CPU.
|
||||
*/
|
||||
int next_cpu_batch;
|
||||
|
||||
unsigned long flags; /* BLK_MQ_F_* flags */
|
||||
/** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */
|
||||
unsigned long flags;
|
||||
|
||||
/**
|
||||
* @sched_data: Pointer owned by the IO scheduler attached to a request
|
||||
* queue. It's up to the IO scheduler how to use this pointer.
|
||||
*/
|
||||
void *sched_data;
|
||||
/**
|
||||
* @queue: Pointer to the request queue that owns this hardware context.
|
||||
*/
|
||||
struct request_queue *queue;
|
||||
/** @fq: Queue of requests that need to perform a flush operation. */
|
||||
struct blk_flush_queue *fq;
|
||||
|
||||
/**
|
||||
* @driver_data: Pointer to data owned by the block driver that created
|
||||
* this hctx
|
||||
*/
|
||||
void *driver_data;
|
||||
|
||||
/**
|
||||
* @ctx_map: Bitmap for each software queue. If bit is on, there is a
|
||||
* pending request in that software queue.
|
||||
*/
|
||||
struct sbitmap ctx_map;
|
||||
|
||||
/**
|
||||
* @dispatch_from: Software queue to be used when no scheduler was
|
||||
* selected.
|
||||
*/
|
||||
struct blk_mq_ctx *dispatch_from;
|
||||
/**
|
||||
* @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to
|
||||
* decide if the hw_queue is busy using Exponential Weighted Moving
|
||||
* Average algorithm.
|
||||
*/
|
||||
unsigned int dispatch_busy;
|
||||
|
||||
/** @type: HCTX_TYPE_* flags. Type of hardware queue. */
|
||||
unsigned short type;
|
||||
/** @nr_ctx: Number of software queues. */
|
||||
unsigned short nr_ctx;
|
||||
/** @ctxs: Array of software queues. */
|
||||
struct blk_mq_ctx **ctxs;
|
||||
|
||||
/** @dispatch_wait_lock: Lock for dispatch_wait queue. */
|
||||
spinlock_t dispatch_wait_lock;
|
||||
/**
|
||||
* @dispatch_wait: Waitqueue to put requests when there is no tag
|
||||
* available at the moment, to wait for another try in the future.
|
||||
*/
|
||||
wait_queue_entry_t dispatch_wait;
|
||||
|
||||
/**
|
||||
* @wait_index: Index of next available dispatch_wait queue to insert
|
||||
* requests.
|
||||
*/
|
||||
atomic_t wait_index;
|
||||
|
||||
/**
|
||||
* @tags: Tags owned by the block driver. A tag at this set is only
|
||||
* assigned when a request is dispatched from a hardware queue.
|
||||
*/
|
||||
struct blk_mq_tags *tags;
|
||||
/**
|
||||
* @sched_tags: Tags owned by I/O scheduler. If there is an I/O
|
||||
* scheduler associated with a request queue, a tag is assigned when
|
||||
* that request is allocated. Else, this member is not used.
|
||||
*/
|
||||
struct blk_mq_tags *sched_tags;
|
||||
|
||||
/** @queued: Number of queued requests. */
|
||||
unsigned long queued;
|
||||
/** @run: Number of dispatched requests. */
|
||||
unsigned long run;
|
||||
#define BLK_MQ_MAX_DISPATCH_ORDER 7
|
||||
/** @dispatched: Number of dispatch requests by queue. */
|
||||
unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
|
||||
|
||||
/** @numa_node: NUMA node the storage adapter has been connected to. */
|
||||
unsigned int numa_node;
|
||||
/** @queue_num: Index of this hardware queue. */
|
||||
unsigned int queue_num;
|
||||
|
||||
/**
|
||||
* @nr_active: Number of active requests. Only used when a tag set is
|
||||
* shared across request queues.
|
||||
*/
|
||||
atomic_t nr_active;
|
||||
|
||||
/** @cpuhp_dead: List to store request if some CPU die. */
|
||||
struct hlist_node cpuhp_dead;
|
||||
/** @kobj: Kernel object for sysfs. */
|
||||
struct kobject kobj;
|
||||
|
||||
/** @poll_considered: Count times blk_poll() was called. */
|
||||
unsigned long poll_considered;
|
||||
/** @poll_invoked: Count how many requests blk_poll() polled. */
|
||||
unsigned long poll_invoked;
|
||||
/** @poll_success: Count how many polled requests were completed. */
|
||||
unsigned long poll_success;
|
||||
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
/**
|
||||
* @debugfs_dir: debugfs directory for this hardware queue. Named
|
||||
* as cpu<cpu_number>.
|
||||
*/
|
||||
struct dentry *debugfs_dir;
|
||||
/** @sched_debugfs_dir: debugfs directory for the scheduler. */
|
||||
struct dentry *sched_debugfs_dir;
|
||||
#endif
|
||||
|
||||
/** @hctx_list: List of all hardware queues. */
|
||||
struct list_head hctx_list;
|
||||
|
||||
/* Must be the last member - see also blk_mq_hw_ctx_size(). */
|
||||
/**
|
||||
* @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
|
||||
* blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
|
||||
* blk_mq_hw_ctx_size().
|
||||
*/
|
||||
struct srcu_struct srcu[0];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct blk_mq_queue_map - Map software queues to hardware queues
|
||||
* @mq_map: CPU ID to hardware queue index map. This is an array
|
||||
* with nr_cpu_ids elements. Each element has a value in the range
|
||||
* [@queue_offset, @queue_offset + @nr_queues).
|
||||
* @nr_queues: Number of hardware queues to map CPU IDs onto.
|
||||
* @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe
|
||||
* driver to map each hardware queue type (enum hctx_type) onto a distinct
|
||||
* set of hardware queues.
|
||||
*/
|
||||
struct blk_mq_queue_map {
|
||||
unsigned int *mq_map;
|
||||
unsigned int nr_queues;
|
||||
unsigned int queue_offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum hctx_type - Type of hardware queue
|
||||
* @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for.
|
||||
* @HCTX_TYPE_READ: Just for READ I/O.
|
||||
* @HCTX_TYPE_POLL: Polled I/O of any kind.
|
||||
* @HCTX_MAX_TYPES: Number of types of hctx.
|
||||
*/
|
||||
enum hctx_type {
|
||||
HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */
|
||||
HCTX_TYPE_READ, /* just for READ I/O */
|
||||
HCTX_TYPE_POLL, /* polled I/O of any kind */
|
||||
HCTX_TYPE_DEFAULT,
|
||||
HCTX_TYPE_READ,
|
||||
HCTX_TYPE_POLL,
|
||||
|
||||
HCTX_MAX_TYPES,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct blk_mq_tag_set - tag set that can be shared between request queues
|
||||
* @map: One or more ctx -> hctx mappings. One map exists for each
|
||||
* hardware queue type (enum hctx_type) that the driver wishes
|
||||
* to support. There are no restrictions on maps being of the
|
||||
* same size, and it's perfectly legal to share maps between
|
||||
* types.
|
||||
* @nr_maps: Number of elements in the @map array. A number in the range
|
||||
* [1, HCTX_MAX_TYPES].
|
||||
* @ops: Pointers to functions that implement block driver behavior.
|
||||
* @nr_hw_queues: Number of hardware queues supported by the block driver that
|
||||
* owns this data structure.
|
||||
* @queue_depth: Number of tags per hardware queue, reserved tags included.
|
||||
* @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag
|
||||
* allocations.
|
||||
* @cmd_size: Number of additional bytes to allocate per request. The block
|
||||
* driver owns these additional bytes.
|
||||
* @numa_node: NUMA node the storage adapter has been connected to.
|
||||
* @timeout: Request processing timeout in jiffies.
|
||||
* @flags: Zero or more BLK_MQ_F_* flags.
|
||||
* @driver_data: Pointer to data owned by the block driver that created this
|
||||
* tag set.
|
||||
* @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues
|
||||
* elements.
|
||||
* @tag_list_lock: Serializes tag_list accesses.
|
||||
* @tag_list: List of the request queues that use this tag set. See also
|
||||
* request_queue.tag_set_list.
|
||||
*/
|
||||
struct blk_mq_tag_set {
|
||||
/*
|
||||
* map[] holds ctx -> hctx mappings, one map exists for each type
|
||||
* that the driver wishes to support. There are no restrictions
|
||||
* on maps being of the same size, and it's perfectly legal to
|
||||
* share maps between types.
|
||||
*/
|
||||
struct blk_mq_queue_map map[HCTX_MAX_TYPES];
|
||||
unsigned int nr_maps; /* nr entries in map[] */
|
||||
unsigned int nr_maps;
|
||||
const struct blk_mq_ops *ops;
|
||||
unsigned int nr_hw_queues; /* nr hw queues across maps */
|
||||
unsigned int queue_depth; /* max hw supported */
|
||||
unsigned int nr_hw_queues;
|
||||
unsigned int queue_depth;
|
||||
unsigned int reserved_tags;
|
||||
unsigned int cmd_size; /* per-request extra data */
|
||||
unsigned int cmd_size;
|
||||
int numa_node;
|
||||
unsigned int timeout;
|
||||
unsigned int flags; /* BLK_MQ_F_* */
|
||||
unsigned int flags;
|
||||
void *driver_data;
|
||||
|
||||
struct blk_mq_tags **tags;
|
||||
@ -115,6 +251,12 @@ struct blk_mq_tag_set {
|
||||
struct list_head tag_list;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct blk_mq_queue_data - Data about a request inserted in a queue
|
||||
*
|
||||
* @rq: Request pointer.
|
||||
* @last: If it is the last request in the queue.
|
||||
*/
|
||||
struct blk_mq_queue_data {
|
||||
struct request *rq;
|
||||
bool last;
|
||||
@ -142,81 +284,101 @@ typedef bool (busy_fn)(struct request_queue *);
|
||||
typedef void (complete_fn)(struct request *);
|
||||
typedef void (cleanup_rq_fn)(struct request *);
|
||||
|
||||
|
||||
/**
|
||||
* struct blk_mq_ops - Callback functions that implements block driver
|
||||
* behaviour.
|
||||
*/
|
||||
struct blk_mq_ops {
|
||||
/*
|
||||
* Queue request
|
||||
/**
|
||||
* @queue_rq: Queue a new request from block IO.
|
||||
*/
|
||||
queue_rq_fn *queue_rq;
|
||||
|
||||
/*
|
||||
* If a driver uses bd->last to judge when to submit requests to
|
||||
* hardware, it must define this function. In case of errors that
|
||||
* make us stop issuing further requests, this hook serves the
|
||||
/**
|
||||
* @commit_rqs: If a driver uses bd->last to judge when to submit
|
||||
* requests to hardware, it must define this function. In case of errors
|
||||
* that make us stop issuing further requests, this hook serves the
|
||||
* purpose of kicking the hardware (which the last request otherwise
|
||||
* would have done).
|
||||
*/
|
||||
commit_rqs_fn *commit_rqs;
|
||||
|
||||
/*
|
||||
* Reserve budget before queue request, once .queue_rq is
|
||||
/**
|
||||
* @get_budget: Reserve budget before queue request, once .queue_rq is
|
||||
* run, it is driver's responsibility to release the
|
||||
* reserved budget. Also we have to handle failure case
|
||||
* of .get_budget for avoiding I/O deadlock.
|
||||
*/
|
||||
get_budget_fn *get_budget;
|
||||
/**
|
||||
* @put_budget: Release the reserved budget.
|
||||
*/
|
||||
put_budget_fn *put_budget;
|
||||
|
||||
/*
|
||||
* Called on request timeout
|
||||
/**
|
||||
* @timeout: Called on request timeout.
|
||||
*/
|
||||
timeout_fn *timeout;
|
||||
|
||||
/*
|
||||
* Called to poll for completion of a specific tag.
|
||||
/**
|
||||
* @poll: Called to poll for completion of a specific tag.
|
||||
*/
|
||||
poll_fn *poll;
|
||||
|
||||
/**
|
||||
* @complete: Mark the request as complete.
|
||||
*/
|
||||
complete_fn *complete;
|
||||
|
||||
/*
|
||||
* Called when the block layer side of a hardware queue has been
|
||||
* set up, allowing the driver to allocate/init matching structures.
|
||||
* Ditto for exit/teardown.
|
||||
/**
|
||||
* @init_hctx: Called when the block layer side of a hardware queue has
|
||||
* been set up, allowing the driver to allocate/init matching
|
||||
* structures.
|
||||
*/
|
||||
init_hctx_fn *init_hctx;
|
||||
/**
|
||||
* @exit_hctx: Ditto for exit/teardown.
|
||||
*/
|
||||
exit_hctx_fn *exit_hctx;
|
||||
|
||||
/*
|
||||
* Called for every command allocated by the block layer to allow
|
||||
* the driver to set up driver specific data.
|
||||
/**
|
||||
* @init_request: Called for every command allocated by the block layer
|
||||
* to allow the driver to set up driver specific data.
|
||||
*
|
||||
* Tag greater than or equal to queue_depth is for setting up
|
||||
* flush request.
|
||||
*
|
||||
* Ditto for exit/teardown.
|
||||
*/
|
||||
init_request_fn *init_request;
|
||||
/**
|
||||
* @exit_request: Ditto for exit/teardown.
|
||||
*/
|
||||
exit_request_fn *exit_request;
|
||||
/* Called from inside blk_get_request() */
|
||||
|
||||
/**
|
||||
* @initialize_rq_fn: Called from inside blk_get_request().
|
||||
*/
|
||||
void (*initialize_rq_fn)(struct request *rq);
|
||||
|
||||
/*
|
||||
* Called before freeing one request which isn't completed yet,
|
||||
* and usually for freeing the driver private data
|
||||
/**
|
||||
* @cleanup_rq: Called before freeing one request which isn't completed
|
||||
* yet, and usually for freeing the driver private data.
|
||||
*/
|
||||
cleanup_rq_fn *cleanup_rq;
|
||||
|
||||
/*
|
||||
* If set, returns whether or not this queue currently is busy
|
||||
/**
|
||||
* @busy: If set, returns whether or not this queue currently is busy.
|
||||
*/
|
||||
busy_fn *busy;
|
||||
|
||||
/**
|
||||
* @map_queues: This allows drivers specify their own queue mapping by
|
||||
* overriding the setup-time function that builds the mq_map.
|
||||
*/
|
||||
map_queues_fn *map_queues;
|
||||
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
/*
|
||||
* Used by the debugfs implementation to show driver-specific
|
||||
/**
|
||||
* @show_rq: Used by the debugfs implementation to show driver-specific
|
||||
* information about a request.
|
||||
*/
|
||||
void (*show_rq)(struct seq_file *m, struct request *rq);
|
||||
@ -262,7 +424,6 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
|
||||
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
|
||||
|
||||
void blk_mq_free_request(struct request *rq);
|
||||
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
|
||||
|
||||
bool blk_mq_queue_inflight(struct request_queue *q);
|
||||
|
||||
@ -301,9 +462,25 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
|
||||
return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_rq_state() - read the current MQ_RQ_* state of a request
|
||||
* @rq: target request.
|
||||
*/
|
||||
static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
|
||||
{
|
||||
return READ_ONCE(rq->state);
|
||||
}
|
||||
|
||||
static inline int blk_mq_request_started(struct request *rq)
|
||||
{
|
||||
return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
|
||||
}
|
||||
|
||||
static inline int blk_mq_request_completed(struct request *rq)
|
||||
{
|
||||
return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
|
||||
}
|
||||
|
||||
int blk_mq_request_started(struct request *rq);
|
||||
int blk_mq_request_completed(struct request *rq);
|
||||
void blk_mq_start_request(struct request *rq);
|
||||
void blk_mq_end_request(struct request *rq, blk_status_t error);
|
||||
void __blk_mq_end_request(struct request *rq, blk_status_t error);
|
||||
@ -324,7 +501,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
|
||||
void blk_mq_quiesce_queue(struct request_queue *q);
|
||||
void blk_mq_unquiesce_queue(struct request_queue *q);
|
||||
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
|
||||
bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
|
||||
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
|
||||
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
|
||||
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
|
||||
busy_tag_iter_fn *fn, void *priv);
|
||||
@ -343,14 +520,29 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q);
|
||||
|
||||
unsigned int blk_mq_rq_cpu(struct request *rq);
|
||||
|
||||
/*
|
||||
/**
|
||||
* blk_mq_rq_from_pdu - cast a PDU to a request
|
||||
* @pdu: the PDU (Protocol Data Unit) to be casted
|
||||
*
|
||||
* Return: request
|
||||
*
|
||||
* Driver command data is immediately after the request. So subtract request
|
||||
* size to get back to the original request, add request size to get the PDU.
|
||||
* size to get back to the original request.
|
||||
*/
|
||||
static inline struct request *blk_mq_rq_from_pdu(void *pdu)
|
||||
{
|
||||
return pdu - sizeof(struct request);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_rq_to_pdu - cast a request to a PDU
|
||||
* @rq: the request to be casted
|
||||
*
|
||||
* Return: pointer to the PDU
|
||||
*
|
||||
* Driver command data is immediately after the request. So add request to get
|
||||
* the PDU.
|
||||
*/
|
||||
static inline void *blk_mq_rq_to_pdu(struct request *rq)
|
||||
{
|
||||
return rq + 1;
|
||||
|
@ -153,10 +153,10 @@ struct bio {
|
||||
unsigned short bi_write_hint;
|
||||
blk_status_t bi_status;
|
||||
u8 bi_partno;
|
||||
atomic_t __bi_remaining;
|
||||
|
||||
struct bvec_iter bi_iter;
|
||||
|
||||
atomic_t __bi_remaining;
|
||||
bio_end_io_t *bi_end_io;
|
||||
|
||||
void *bi_private;
|
||||
@ -290,6 +290,12 @@ enum req_opf {
|
||||
REQ_OP_ZONE_RESET_ALL = 8,
|
||||
/* write the zero filled sector many times */
|
||||
REQ_OP_WRITE_ZEROES = 9,
|
||||
/* Open a zone */
|
||||
REQ_OP_ZONE_OPEN = 10,
|
||||
/* Close a zone */
|
||||
REQ_OP_ZONE_CLOSE = 11,
|
||||
/* Transition a zone to full */
|
||||
REQ_OP_ZONE_FINISH = 12,
|
||||
|
||||
/* SCSI passthrough using struct scsi_request */
|
||||
REQ_OP_SCSI_IN = 32,
|
||||
@ -371,6 +377,7 @@ enum stat_group {
|
||||
STAT_READ,
|
||||
STAT_WRITE,
|
||||
STAT_DISCARD,
|
||||
STAT_FLUSH,
|
||||
|
||||
NR_STAT_GROUPS
|
||||
};
|
||||
@ -417,6 +424,25 @@ static inline bool op_is_discard(unsigned int op)
|
||||
return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a bio or request operation is a zone management operation, with
|
||||
* the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
|
||||
* due to its different handling in the block layer and device response in
|
||||
* case of command failure.
|
||||
*/
|
||||
static inline bool op_is_zone_mgmt(enum req_opf op)
|
||||
{
|
||||
switch (op & REQ_OP_MASK) {
|
||||
case REQ_OP_ZONE_RESET:
|
||||
case REQ_OP_ZONE_OPEN:
|
||||
case REQ_OP_ZONE_CLOSE:
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int op_stat_group(unsigned int op)
|
||||
{
|
||||
if (op_is_discard(op))
|
||||
|
@ -360,14 +360,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev);
|
||||
extern int blkdev_report_zones(struct block_device *bdev,
|
||||
sector_t sector, struct blk_zone *zones,
|
||||
unsigned int *nr_zones);
|
||||
extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
|
||||
sector_t nr_sectors, gfp_t gfp_mask);
|
||||
extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
|
||||
sector_t sectors, sector_t nr_sectors,
|
||||
gfp_t gfp_mask);
|
||||
extern int blk_revalidate_disk_zones(struct gendisk *disk);
|
||||
|
||||
extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
@ -388,9 +389,9 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
|
||||
fmode_t mode, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
|
||||
fmode_t mode, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
return -ENOTTY;
|
||||
}
|
||||
@ -411,7 +412,6 @@ struct request_queue {
|
||||
|
||||
/* sw queues */
|
||||
struct blk_mq_ctx __percpu *queue_ctx;
|
||||
unsigned int nr_queues;
|
||||
|
||||
unsigned int queue_depth;
|
||||
|
||||
|
@ -216,15 +216,6 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
|
||||
*/
|
||||
bool sbitmap_any_bit_set(const struct sbitmap *sb);
|
||||
|
||||
/**
|
||||
* sbitmap_any_bit_clear() - Check for an unset bit in a &struct
|
||||
* sbitmap.
|
||||
* @sb: Bitmap to check.
|
||||
*
|
||||
* Return: true if any bit in the bitmap is clear, false otherwise.
|
||||
*/
|
||||
bool sbitmap_any_bit_clear(const struct sbitmap *sb);
|
||||
|
||||
#define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift)
|
||||
#define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U))
|
||||
|
||||
|
@ -42,6 +42,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
|
||||
case IOC_OPAL_PSID_REVERT_TPR:
|
||||
case IOC_OPAL_MBR_DONE:
|
||||
case IOC_OPAL_WRITE_SHADOW_MBR:
|
||||
case IOC_OPAL_GENERIC_TABLE_RW:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -33,7 +33,8 @@ TRACE_EVENT(wbt_stat,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strncpy(__entry->name, dev_name(bdi->dev), 32);
|
||||
strlcpy(__entry->name, dev_name(bdi->dev),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->rmean = stat[0].mean;
|
||||
__entry->rmin = stat[0].min;
|
||||
__entry->rmax = stat[0].max;
|
||||
@ -67,7 +68,8 @@ TRACE_EVENT(wbt_lat,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strncpy(__entry->name, dev_name(bdi->dev), 32);
|
||||
strlcpy(__entry->name, dev_name(bdi->dev),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->lat = div_u64(lat, 1000);
|
||||
),
|
||||
|
||||
@ -103,7 +105,8 @@ TRACE_EVENT(wbt_step,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strncpy(__entry->name, dev_name(bdi->dev), 32);
|
||||
strlcpy(__entry->name, dev_name(bdi->dev),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->msg = msg;
|
||||
__entry->step = step;
|
||||
__entry->window = div_u64(window, 1000);
|
||||
@ -138,7 +141,8 @@ TRACE_EVENT(wbt_timer,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
strncpy(__entry->name, dev_name(bdi->dev), 32);
|
||||
strlcpy(__entry->name, dev_name(bdi->dev),
|
||||
ARRAY_SIZE(__entry->name));
|
||||
__entry->status = status;
|
||||
__entry->step = step;
|
||||
__entry->inflight = inflight;
|
||||
|
@ -120,9 +120,11 @@ struct blk_zone_report {
|
||||
};
|
||||
|
||||
/**
|
||||
* struct blk_zone_range - BLKRESETZONE ioctl request
|
||||
* @sector: starting sector of the first zone to issue reset write pointer
|
||||
* @nr_sectors: Total number of sectors of 1 or more zones to reset
|
||||
* struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/
|
||||
* BLKCLOSEZONE/BLKFINISHZONE ioctl
|
||||
* requests
|
||||
* @sector: Starting sector of the first zone to operate on.
|
||||
* @nr_sectors: Total number of sectors of all zones to operate on.
|
||||
*/
|
||||
struct blk_zone_range {
|
||||
__u64 sector;
|
||||
@ -139,10 +141,19 @@ struct blk_zone_range {
|
||||
* sector range. The sector range must be zone aligned.
|
||||
* @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
|
||||
* @BLKGETNRZONES: Get the total number of zones of the device.
|
||||
* @BLKOPENZONE: Open the zones in the specified sector range.
|
||||
* The 512 B sector range must be zone aligned.
|
||||
* @BLKCLOSEZONE: Close the zones in the specified sector range.
|
||||
* The 512 B sector range must be zone aligned.
|
||||
* @BLKFINISHZONE: Mark the zones as full in the specified sector range.
|
||||
* The 512 B sector range must be zone aligned.
|
||||
*/
|
||||
#define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report)
|
||||
#define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range)
|
||||
#define BLKGETZONESZ _IOR(0x12, 132, __u32)
|
||||
#define BLKGETNRZONES _IOR(0x12, 133, __u32)
|
||||
#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
|
||||
#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
|
||||
#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
|
||||
|
||||
#endif /* _UAPI_BLKZONED_H */
|
||||
|
@ -58,13 +58,20 @@
|
||||
* Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
|
||||
* used to clear any hints previously set.
|
||||
*/
|
||||
#define RWF_WRITE_LIFE_NOT_SET 0
|
||||
#define RWH_WRITE_LIFE_NOT_SET 0
|
||||
#define RWH_WRITE_LIFE_NONE 1
|
||||
#define RWH_WRITE_LIFE_SHORT 2
|
||||
#define RWH_WRITE_LIFE_MEDIUM 3
|
||||
#define RWH_WRITE_LIFE_LONG 4
|
||||
#define RWH_WRITE_LIFE_EXTREME 5
|
||||
|
||||
/*
|
||||
* The originally introduced spelling is remained from the first
|
||||
* versions of the patch set that introduced the feature, see commit
|
||||
* v4.13-rc1~212^2~51.
|
||||
*/
|
||||
#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
|
||||
|
||||
/*
|
||||
* Types of directory notifications that may be requested.
|
||||
*/
|
||||
|
@ -113,6 +113,25 @@ struct opal_shadow_mbr {
|
||||
__u64 size;
|
||||
};
|
||||
|
||||
/* Opal table operations */
|
||||
enum opal_table_ops {
|
||||
OPAL_READ_TABLE,
|
||||
OPAL_WRITE_TABLE,
|
||||
};
|
||||
|
||||
#define OPAL_UID_LENGTH 8
|
||||
struct opal_read_write_table {
|
||||
struct opal_key key;
|
||||
const __u64 data;
|
||||
const __u8 table_uid[OPAL_UID_LENGTH];
|
||||
__u64 offset;
|
||||
__u64 size;
|
||||
#define OPAL_TABLE_READ (1 << OPAL_READ_TABLE)
|
||||
#define OPAL_TABLE_WRITE (1 << OPAL_WRITE_TABLE)
|
||||
__u64 flags;
|
||||
__u64 priv;
|
||||
};
|
||||
|
||||
#define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock)
|
||||
#define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock)
|
||||
#define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key)
|
||||
@ -128,5 +147,6 @@ struct opal_shadow_mbr {
|
||||
#define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key)
|
||||
#define IOC_OPAL_MBR_DONE _IOW('p', 233, struct opal_mbr_done)
|
||||
#define IOC_OPAL_WRITE_SHADOW_MBR _IOW('p', 234, struct opal_shadow_mbr)
|
||||
#define IOC_OPAL_GENERIC_TABLE_RW _IOW('p', 235, struct opal_read_write_table)
|
||||
|
||||
#endif /* _UAPI_SED_OPAL_H */
|
||||
|
@ -236,23 +236,6 @@ bool sbitmap_any_bit_set(const struct sbitmap *sb)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
|
||||
|
||||
bool sbitmap_any_bit_clear(const struct sbitmap *sb)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < sb->map_nr; i++) {
|
||||
const struct sbitmap_word *word = &sb->map[i];
|
||||
unsigned long mask = word->word & ~word->cleared;
|
||||
unsigned long ret;
|
||||
|
||||
ret = find_first_zero_bit(&mask, word->depth);
|
||||
if (ret < word->depth)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
|
||||
|
||||
static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
|
||||
{
|
||||
unsigned int i, weight = 0;
|
||||
|
@ -58,13 +58,20 @@
|
||||
* Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
|
||||
* used to clear any hints previously set.
|
||||
*/
|
||||
#define RWF_WRITE_LIFE_NOT_SET 0
|
||||
#define RWH_WRITE_LIFE_NOT_SET 0
|
||||
#define RWH_WRITE_LIFE_NONE 1
|
||||
#define RWH_WRITE_LIFE_SHORT 2
|
||||
#define RWH_WRITE_LIFE_MEDIUM 3
|
||||
#define RWH_WRITE_LIFE_LONG 4
|
||||
#define RWH_WRITE_LIFE_EXTREME 5
|
||||
|
||||
/*
|
||||
* The originally introduced spelling is remained from the first
|
||||
* versions of the patch set that introduced the feature, see commit
|
||||
* v4.13-rc1~212^2~51.
|
||||
*/
|
||||
#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
|
||||
|
||||
/*
|
||||
* Types of directory notifications that may be requested.
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user